rippled
codec.h
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #ifndef RIPPLE_NODESTORE_CODEC_H_INCLUDED
21 #define RIPPLE_NODESTORE_CODEC_H_INCLUDED
22 
23 // Disable lz4 deprecation warning due to incompatibility with clang attributes
24 #define LZ4_DISABLE_DEPRECATE_WARNINGS
25 
26 #include <ripple/basics/contract.h>
27 #include <ripple/basics/safe_cast.h>
28 #include <ripple/nodestore/NodeObject.h>
29 #include <ripple/nodestore/impl/varint.h>
30 #include <ripple/protocol/HashPrefix.h>
31 #include <cstddef>
32 #include <cstring>
33 #include <lz4.h>
34 #include <nudb/detail/field.hpp>
35 #include <string>
36 #include <utility>
37 
38 namespace ripple {
39 namespace NodeStore {
40 
41 template <class BufferFactory>
43 lz4_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
44 {
45  if (static_cast<int>(in_size) < 0)
46  Throw<std::runtime_error>("lz4_decompress: integer overflow (input)");
47 
48  std::size_t outSize = 0;
49 
50  auto const n = read_varint(
51  reinterpret_cast<std::uint8_t const*>(in), in_size, outSize);
52 
53  if (n == 0 || n >= in_size)
54  Throw<std::runtime_error>("lz4_decompress: invalid blob");
55 
56  if (static_cast<int>(outSize) <= 0)
57  Throw<std::runtime_error>("lz4_decompress: integer overflow (output)");
58 
59  void* const out = bf(outSize);
60 
61  if (LZ4_decompress_safe(
62  reinterpret_cast<char const*>(in) + n,
63  reinterpret_cast<char*>(out),
64  static_cast<int>(in_size - n),
65  static_cast<int>(outSize)) != static_cast<int>(outSize))
66  Throw<std::runtime_error>("lz4_decompress: LZ4_decompress_safe");
67 
68  return {out, outSize};
69 }
70 
71 template <class BufferFactory>
73 lz4_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
74 {
75  using std::runtime_error;
76  using namespace nudb::detail;
79  auto const n = write_varint(vi.data(), in_size);
80  auto const out_max = LZ4_compressBound(in_size);
81  std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(n + out_max));
82  result.first = out;
83  std::memcpy(out, vi.data(), n);
84  auto const out_size = LZ4_compress_default(
85  reinterpret_cast<char const*>(in),
86  reinterpret_cast<char*>(out + n),
87  in_size,
88  out_max);
89  if (out_size == 0)
90  Throw<std::runtime_error>("lz4 compress");
91  result.second = n + out_size;
92  return result;
93 }
94 
95 //------------------------------------------------------------------------------
96 
97 /*
98  object types:
99 
100  0 = Uncompressed
101  1 = lz4 compressed
102  2 = inner node compressed
103  3 = full inner node
104 */
105 
106 template <class BufferFactory>
108 nodeobject_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
109 {
110  using namespace nudb::detail;
111 
112  std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
113  std::size_t type;
114  auto const vn = read_varint(p, in_size, type);
115  if (vn == 0)
116  Throw<std::runtime_error>("nodeobject decompress");
117  p += vn;
118  in_size -= vn;
119 
121  switch (type)
122  {
123  case 0: // uncompressed
124  {
125  result.first = p;
126  result.second = in_size;
127  break;
128  }
129  case 1: // lz4
130  {
131  result = lz4_decompress(p, in_size, bf);
132  break;
133  }
134  case 2: // compressed v1 inner node
135  {
136  auto const hs = field<std::uint16_t>::size; // Mask
137  if (in_size < hs + 32)
138  Throw<std::runtime_error>(
139  "nodeobject codec v1: short inner node size: " +
140  std::string("in_size = ") + std::to_string(in_size) +
141  " hs = " + std::to_string(hs));
142  istream is(p, in_size);
143  std::uint16_t mask;
144  read<std::uint16_t>(is, mask); // Mask
145  in_size -= hs;
146  result.second = 525;
147  void* const out = bf(result.second);
148  result.first = out;
149  ostream os(out, result.second);
150  write<std::uint32_t>(os, 0);
151  write<std::uint32_t>(os, 0);
152  write<std::uint8_t>(os, hotUNKNOWN);
153  write<std::uint32_t>(
154  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
155  if (mask == 0)
156  Throw<std::runtime_error>(
157  "nodeobject codec v1: empty inner node");
158  std::uint16_t bit = 0x8000;
159  for (int i = 16; i--; bit >>= 1)
160  {
161  if (mask & bit)
162  {
163  if (in_size < 32)
164  Throw<std::runtime_error>(
165  "nodeobject codec v1: short inner node subsize: " +
166  std::string("in_size = ") +
167  std::to_string(in_size) +
168  " i = " + std::to_string(i));
169  std::memcpy(os.data(32), is(32), 32);
170  in_size -= 32;
171  }
172  else
173  {
174  std::memset(os.data(32), 0, 32);
175  }
176  }
177  if (in_size > 0)
178  Throw<std::runtime_error>(
179  "nodeobject codec v1: long inner node, in_size = " +
180  std::to_string(in_size));
181  break;
182  }
183  case 3: // full v1 inner node
184  {
185  if (in_size != 16 * 32) // hashes
186  Throw<std::runtime_error>(
187  "nodeobject codec v1: short full inner node, in_size = " +
188  std::to_string(in_size));
189  istream is(p, in_size);
190  result.second = 525;
191  void* const out = bf(result.second);
192  result.first = out;
193  ostream os(out, result.second);
194  write<std::uint32_t>(os, 0);
195  write<std::uint32_t>(os, 0);
196  write<std::uint8_t>(os, hotUNKNOWN);
197  write<std::uint32_t>(
198  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
199  write(os, is(512), 512);
200  break;
201  }
202  default:
203  Throw<std::runtime_error>(
204  "nodeobject codec: bad type=" + std::to_string(type));
205  };
206  return result;
207 }
208 
209 template <class = void>
210 void const*
212 {
213  static std::array<char, 32> v{};
214  return v.data();
215 }
216 
217 template <class BufferFactory>
219 nodeobject_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
220 {
221  using std::runtime_error;
222  using namespace nudb::detail;
223 
224  // Check for inner node v1
225  if (in_size == 525)
226  {
227  istream is(in, in_size);
228  std::uint32_t index;
229  std::uint32_t unused;
230  std::uint8_t kind;
231  std::uint32_t prefix;
232  read<std::uint32_t>(is, index);
233  read<std::uint32_t>(is, unused);
234  read<std::uint8_t>(is, kind);
235  read<std::uint32_t>(is, prefix);
236  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
237  {
238  std::size_t n = 0;
239  std::uint16_t mask = 0;
241  for (unsigned bit = 0x8000; bit; bit >>= 1)
242  {
243  void const* const h = is(32);
244  if (std::memcmp(h, zero32(), 32) == 0)
245  continue;
246  std::memcpy(vh.data() + 32 * n, h, 32);
247  mask |= bit;
248  ++n;
249  }
251  if (n < 16)
252  {
253  // 2 = v1 inner node compressed
254  auto const type = 2U;
255  auto const vs = size_varint(type);
256  result.second = vs + field<std::uint16_t>::size + // mask
257  n * 32; // hashes
258  std::uint8_t* out =
259  reinterpret_cast<std::uint8_t*>(bf(result.second));
260  result.first = out;
261  ostream os(out, result.second);
262  write<varint>(os, type);
263  write<std::uint16_t>(os, mask);
264  write(os, vh.data(), n * 32);
265  return result;
266  }
267  // 3 = full v1 inner node
268  auto const type = 3U;
269  auto const vs = size_varint(type);
270  result.second = vs + n * 32; // hashes
271  std::uint8_t* out =
272  reinterpret_cast<std::uint8_t*>(bf(result.second));
273  result.first = out;
274  ostream os(out, result.second);
275  write<varint>(os, type);
276  write(os, vh.data(), n * 32);
277  return result;
278  }
279  }
280 
282 
283  constexpr std::size_t codecType = 1;
284  auto const vn = write_varint(vi.data(), codecType);
286  switch (codecType)
287  {
288  // case 0 was uncompressed data; we always compress now.
289  case 1: // lz4
290  {
291  std::uint8_t* p;
292  auto const lzr = NodeStore::lz4_compress(
293  in, in_size, [&p, &vn, &bf](std::size_t n) {
294  p = reinterpret_cast<std::uint8_t*>(bf(vn + n));
295  return p + vn;
296  });
297  std::memcpy(p, vi.data(), vn);
298  result.first = p;
299  result.second = vn + lzr.second;
300  break;
301  }
302  default:
303  Throw<std::logic_error>(
304  "nodeobject codec: unknown=" + std::to_string(codecType));
305  };
306  return result;
307 }
308 
309 // Modifies an inner node to erase the ledger
310 // sequence and type information so the codec
311 // verification can pass.
312 //
313 template <class = void>
314 void
315 filter_inner(void* in, std::size_t in_size)
316 {
317  using namespace nudb::detail;
318 
319  // Check for inner node
320  if (in_size == 525)
321  {
322  istream is(in, in_size);
323  std::uint32_t index;
324  std::uint32_t unused;
325  std::uint8_t kind;
326  std::uint32_t prefix;
327  read<std::uint32_t>(is, index);
328  read<std::uint32_t>(is, unused);
329  read<std::uint8_t>(is, kind);
330  read<std::uint32_t>(is, prefix);
331  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
332  {
333  ostream os(in, 9);
334  write<std::uint32_t>(os, 0);
335  write<std::uint32_t>(os, 0);
336  write<std::uint8_t>(os, hotUNKNOWN);
337  }
338  }
339 }
340 
341 } // namespace NodeStore
342 } // namespace ripple
343 
344 #endif
ripple::NodeStore::nodeobject_decompress
std::pair< void const *, std::size_t > nodeobject_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:108
ripple::hotUNKNOWN
@ hotUNKNOWN
Definition: NodeObject.h:33
std::string
STL class.
utility
cstring
std::pair
ripple::NodeStore::lz4_decompress
std::pair< void const *, std::size_t > lz4_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:43
ripple::QualityDirection::in
@ in
ripple::NodeStore::write
void write(nudb::detail::ostream &os, std::size_t t)
Definition: varint.h:133
ripple::QualityDirection::out
@ out
ripple::HashPrefix::innerNode
@ innerNode
inner node in V1 tree
ripple::NodeStore::read_varint
std::size_t read_varint(void const *buf, std::size_t buflen, std::size_t &t)
Definition: varint.h:56
ripple::NodeStore::lz4_compress
std::pair< void const *, std::size_t > lz4_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:73
cstddef
std::to_string
T to_string(T... args)
ripple::NodeStore::filter_inner
void filter_inner(void *in, std::size_t in_size)
Definition: codec.h:315
std::array
STL class.
std::runtime_error
STL class.
std::uint8_t
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::NodeStore::zero32
void const * zero32()
Definition: codec.h:211
ripple::NodeStore::size_varint
std::size_t size_varint(T v)
Definition: varint.h:89
std::size_t
std::memcpy
T memcpy(T... args)
ripple::NodeStore::nodeobject_compress
std::pair< void const *, std::size_t > nodeobject_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:219
std::memcmp
T memcmp(T... args)
std::array::data
T data(T... args)
ripple::NodeStore::write_varint
std::size_t write_varint(void *p0, std::size_t v)
Definition: varint.h:102
std::memset
T memset(T... args)
string