From 3ba45c72771bf9ba62cbd301d89a5b5c6699967c Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 30 Jun 2017 11:20:26 -0700 Subject: [PATCH] Upgrade protozero to version 1.5.2 --- CHANGELOG.md | 5 + protozero/byteswap.hpp | 81 ++-- protozero/config.hpp | 11 - protozero/exception.hpp | 8 +- protozero/iterators.hpp | 328 ++++++++++++++ protozero/pbf_builder.hpp | 45 +- protozero/pbf_message.hpp | 16 +- protozero/pbf_reader.hpp | 891 ++++++++++++++++---------------------- protozero/pbf_writer.hpp | 328 ++++++++++---- protozero/types.hpp | 191 +++++++- protozero/varint.hpp | 149 +++++-- protozero/version.hpp | 22 +- version.hpp | 2 +- 13 files changed, 1337 insertions(+), 740 deletions(-) create mode 100644 protozero/iterators.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c9d802..17089b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ +## 1.19.3 + +* Upgrade protozero to version 1.5.2 + ## 1.19.2 + * Ignore UTF-8 byte order mark if present ## 1.19.1 diff --git a/protozero/byteswap.hpp b/protozero/byteswap.hpp index a018c1c..bca4844 100644 --- a/protozero/byteswap.hpp +++ b/protozero/byteswap.hpp @@ -16,56 +16,69 @@ documentation. * @brief Contains functions to swap bytes in values (for different endianness). */ -#include #include +#include #include namespace protozero { +namespace detail { -/** - * Swap N byte value between endianness formats. This template function must - * be specialized to actually work. - */ -template -inline void byteswap(const char* /*data*/, char* /*result*/) { - static_assert(N == 1, "Can only swap 4 or 8 byte values"); -} - -/** - * Swap 4 byte value (int32_t, uint32_t, float) between endianness formats. - */ -template <> -inline void byteswap<4>(const char* data, char* result) { +inline uint32_t byteswap_impl(uint32_t value) noexcept { #ifdef PROTOZERO_USE_BUILTIN_BSWAP - *reinterpret_cast(result) = __builtin_bswap32(*reinterpret_cast(data)); + return __builtin_bswap32(value); #else - result[3] = data[0]; - result[2] = data[1]; - result[1] = data[2]; - result[0] = data[3]; + return ((value & 0xff000000) >> 24) | + ((value & 0x00ff0000) >> 8) | + ((value & 0x0000ff00) << 8) | + ((value & 0x000000ff) << 24); #endif } -/** - * Swap 8 byte value (int64_t, uint64_t, double) between endianness formats. - */ -template <> -inline void byteswap<8>(const char* data, char* result) { +inline uint64_t byteswap_impl(uint64_t value) noexcept { #ifdef PROTOZERO_USE_BUILTIN_BSWAP - *reinterpret_cast(result) = __builtin_bswap64(*reinterpret_cast(data)); + return __builtin_bswap64(value); #else - result[7] = data[0]; - result[6] = data[1]; - result[5] = data[2]; - result[4] = data[3]; - result[3] = data[4]; - result[2] = data[5]; - result[1] = data[6]; - result[0] = data[7]; + return ((value & 0xff00000000000000ULL) >> 56) | + ((value & 0x00ff000000000000ULL) >> 40) | + ((value & 0x0000ff0000000000ULL) >> 24) | + ((value & 0x000000ff00000000ULL) >> 8) | + ((value & 0x00000000ff000000ULL) << 8) | + ((value & 0x0000000000ff0000ULL) << 24) | + ((value & 0x000000000000ff00ULL) << 40) | + ((value & 0x00000000000000ffULL) << 56); #endif } +inline void byteswap_inplace(uint32_t* ptr) noexcept { + *ptr = byteswap_impl(*ptr); +} + +inline void byteswap_inplace(uint64_t* ptr) noexcept { + *ptr = byteswap_impl(*ptr); +} + +inline void byteswap_inplace(int32_t* ptr) noexcept { + auto bptr = reinterpret_cast(ptr); + *bptr = byteswap_impl(*bptr); +} + +inline void byteswap_inplace(int64_t* ptr) noexcept { + auto bptr = reinterpret_cast(ptr); + *bptr = byteswap_impl(*bptr); +} + +inline void byteswap_inplace(float* ptr) noexcept { + auto bptr = reinterpret_cast(ptr); + *bptr = byteswap_impl(*bptr); +} + +inline void byteswap_inplace(double* ptr) noexcept { + auto bptr = reinterpret_cast(ptr); + *bptr = byteswap_impl(*bptr); +} + +} // end namespace detail } // end namespace protozero #endif // PROTOZERO_BYTESWAP_HPP diff --git a/protozero/config.hpp b/protozero/config.hpp index 8465c96..6fc7749 100644 --- a/protozero/config.hpp +++ b/protozero/config.hpp @@ -35,17 +35,6 @@ documentation. # define PROTOZERO_BYTE_ORDER PROTOZERO_LITTLE_ENDIAN #endif -// On some ARM machines and depending on compiler settings access to unaligned -// floating point values will result in a SIGBUS. Do not use the bare pointers -// in this case. -#if PROTOZERO_BYTE_ORDER == PROTOZERO_LITTLE_ENDIAN -# if !defined(__arm__) && !defined(_M_ARM) -# ifndef PROTOZERO_DO_NOT_USE_BARE_POINTER -# define PROTOZERO_USE_BARE_POINTER_FOR_PACKED_FIXED -# endif -# endif -#endif - // Check whether __builtin_bswap is available #if defined(__GNUC__) || defined(__clang__) # define PROTOZERO_USE_BUILTIN_BSWAP diff --git a/protozero/exception.hpp b/protozero/exception.hpp index 5c7ab54..ca4340e 100644 --- a/protozero/exception.hpp +++ b/protozero/exception.hpp @@ -29,7 +29,7 @@ namespace protozero { */ struct exception : std::exception { /// Returns the explanatory string. - const char *what() const noexcept override { return "pbf exception"; } + const char* what() const noexcept override { return "pbf exception"; } }; /** @@ -38,7 +38,7 @@ struct exception : std::exception { */ struct varint_too_long_exception : exception { /// Returns the explanatory string. - const char *what() const noexcept override { return "varint too long exception"; } + const char* what() const noexcept override { return "varint too long exception"; } }; /** @@ -47,7 +47,7 @@ struct varint_too_long_exception : exception { */ struct unknown_pbf_wire_type_exception : exception { /// Returns the explanatory string. - const char *what() const noexcept override { return "unknown pbf field type exception"; } + const char* what() const noexcept override { return "unknown pbf field type exception"; } }; /** @@ -60,7 +60,7 @@ struct unknown_pbf_wire_type_exception : exception { */ struct end_of_buffer_exception : exception { /// Returns the explanatory string. - const char *what() const noexcept override { return "end of buffer exception"; } + const char* what() const noexcept override { return "end of buffer exception"; } }; } // end namespace protozero diff --git a/protozero/iterators.hpp b/protozero/iterators.hpp new file mode 100644 index 0000000..a19f202 --- /dev/null +++ b/protozero/iterators.hpp @@ -0,0 +1,328 @@ +#ifndef PROTOZERO_ITERATORS_HPP +#define PROTOZERO_ITERATORS_HPP + +/***************************************************************************** + +protozero - Minimalistic protocol buffer decoder and encoder in C++. + +This file is from https://github.com/mapbox/protozero where you can find more +documentation. + +*****************************************************************************/ + +/** + * @file iterators.hpp + * + * @brief Contains the iterators for access to packed repeated fields. + */ + +#include +#include +#include + +#include +#include + +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN +# include +#endif + +namespace protozero { + +/** + * A range of iterators based on std::pair. Created from beginning and + * end iterators. Used as a return type from some pbf_reader methods + * that is easy to use with range-based for loops. + */ +template > +class iterator_range : +#ifdef PROTOZERO_STRICT_API + protected +#else + public +#endif + P { + +public: + + /// The type of the iterators in this range. + using iterator = T; + + /// The value type of the underlying iterator. + using value_type = typename std::iterator_traits::value_type; + + /** + * Default constructor. Create empty iterator_range. + */ + constexpr iterator_range() : + P(iterator{}, iterator{}) { + } + + /** + * Create iterator range from two iterators. + * + * @param first_iterator Iterator to beginning or range. + * @param last_iterator Iterator to end or range. + */ + constexpr iterator_range(iterator&& first_iterator, iterator&& last_iterator) : + P(std::forward(first_iterator), + std::forward(last_iterator)) { + } + + /// Return iterator to beginning of range. + constexpr iterator begin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator end() const noexcept { + return this->second; + } + + /// Return iterator to beginning of range. + constexpr iterator cbegin() const noexcept { + return this->first; + } + + /// Return iterator to end of range. + constexpr iterator cend() const noexcept { + return this->second; + } + + /// Return true if this range is empty. + constexpr std::size_t empty() const noexcept { + return begin() == end(); + } + + /** + * Get element at the beginning of the range. + * + * @pre Range must not be empty. + */ + value_type front() const { + protozero_assert(!empty()); + return *(this->first); + } + + /** + * Advance beginning of range by one. + * + * @pre Range must not be empty. + */ + void drop_front() { + protozero_assert(!empty()); + ++this->first; + } + + /** + * Swap the contents of this range with the other. + * + * @param other Other range to swap data with. + */ + void swap(iterator_range& other) noexcept { + using std::swap; + swap(this->first, other.first); + swap(this->second, other.second); + } + +}; // struct iterator_range + +/** + * Swap two iterator_ranges. + * + * @param lhs First range. + * @param rhs Second range. + */ +template +inline void swap(iterator_range& lhs, iterator_range& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * A forward iterator used for accessing packed repeated fields of fixed + * length (fixed32, sfixed32, float, double). + */ +template +class const_fixed_iterator { + + /// Pointer to current iterator position + const char* m_data; + + /// Pointer to end iterator position + const char* m_end; + +public: + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_fixed_iterator() noexcept : + m_data(nullptr), + m_end(nullptr) { + } + + const_fixed_iterator(const char* data, const char* end) noexcept : + m_data(data), + m_end(end) { + } + + const_fixed_iterator(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator(const_fixed_iterator&&) noexcept = default; + + const_fixed_iterator& operator=(const const_fixed_iterator&) noexcept = default; + const_fixed_iterator& operator=(const_fixed_iterator&&) noexcept = default; + + ~const_fixed_iterator() noexcept = default; + + value_type operator*() const { + value_type result; + std::memcpy(&result, m_data, sizeof(value_type)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + detail::byteswap_inplace(&result); +#endif + return result; + } + + const_fixed_iterator& operator++() { + m_data += sizeof(value_type); + return *this; + } + + const_fixed_iterator operator++(int) { + const const_fixed_iterator tmp(*this); + ++(*this); + return tmp; + } + + bool operator==(const const_fixed_iterator& rhs) const noexcept { + return m_data == rhs.m_data && m_end == rhs.m_end; + } + + bool operator!=(const const_fixed_iterator& rhs) const noexcept { + return !(*this == rhs); + } + +}; // class const_fixed_iterator + +/** + * A forward iterator used for accessing packed repeated varint fields + * (int32, uint32, int64, uint64, bool, enum). + */ +template +class const_varint_iterator { + +protected: + + /// Pointer to current iterator position + const char* m_data; + + /// Pointer to end iterator position + const char* m_end; + +public: + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_varint_iterator() noexcept : + m_data(nullptr), + m_end(nullptr) { + } + + const_varint_iterator(const char* data, const char* end) noexcept : + m_data(data), + m_end(end) { + } + + const_varint_iterator(const const_varint_iterator&) noexcept = default; + const_varint_iterator(const_varint_iterator&&) noexcept = default; + + const_varint_iterator& operator=(const const_varint_iterator&) noexcept = default; + const_varint_iterator& operator=(const_varint_iterator&&) noexcept = default; + + ~const_varint_iterator() noexcept = default; + + value_type operator*() const { + const char* d = m_data; // will be thrown away + return static_cast(decode_varint(&d, m_end)); + } + + const_varint_iterator& operator++() { + skip_varint(&m_data, m_end); + return *this; + } + + const_varint_iterator operator++(int) { + const const_varint_iterator tmp(*this); + ++(*this); + return tmp; + } + + bool operator==(const const_varint_iterator& rhs) const noexcept { + return m_data == rhs.m_data && m_end == rhs.m_end; + } + + bool operator!=(const const_varint_iterator& rhs) const noexcept { + return !(*this == rhs); + } + +}; // class const_varint_iterator + +/** + * A forward iterator used for accessing packed repeated svarint fields + * (sint32, sint64). + */ +template +class const_svarint_iterator : public const_varint_iterator { + +public: + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + const_svarint_iterator() noexcept : + const_varint_iterator() { + } + + const_svarint_iterator(const char* data, const char* end) noexcept : + const_varint_iterator(data, end) { + } + + const_svarint_iterator(const const_svarint_iterator&) = default; + const_svarint_iterator(const_svarint_iterator&&) = default; + + const_svarint_iterator& operator=(const const_svarint_iterator&) = default; + const_svarint_iterator& operator=(const_svarint_iterator&&) = default; + + ~const_svarint_iterator() = default; + + value_type operator*() const { + const char* d = this->m_data; // will be thrown away + return static_cast(decode_zigzag64(decode_varint(&d, this->m_end))); + } + + const_svarint_iterator& operator++() { + skip_varint(&this->m_data, this->m_end); + return *this; + } + + const_svarint_iterator operator++(int) { + const const_svarint_iterator tmp(*this); + ++(*this); + return tmp; + } + +}; // class const_svarint_iterator + +} // end namespace protozero + +#endif // PROTOZERO_ITERATORS_HPP diff --git a/protozero/pbf_builder.hpp b/protozero/pbf_builder.hpp index 548f4ce..8197395 100644 --- a/protozero/pbf_builder.hpp +++ b/protozero/pbf_builder.hpp @@ -18,8 +18,8 @@ documentation. #include -#include #include +#include namespace protozero { @@ -46,7 +46,7 @@ public: using enum_type = T; - pbf_builder(std::string& data) noexcept : + explicit pbf_builder(std::string& data) noexcept : pbf_writer(data) { } @@ -57,7 +57,7 @@ public: /// @cond INTERNAL #define PROTOZERO_WRITER_WRAP_ADD_SCALAR(name, type) \ - inline void add_##name(T tag, type value) { \ + void add_##name(T tag, type value) { \ pbf_writer::add_##name(pbf_tag_type(tag), value); \ } @@ -79,38 +79,59 @@ public: #undef PROTOZERO_WRITER_WRAP_ADD_SCALAR /// @endcond - inline void add_bytes(T tag, const char* value, std::size_t size) { + void add_bytes(T tag, const char* value, std::size_t size) { pbf_writer::add_bytes(pbf_tag_type(tag), value, size); } - inline void add_bytes(T tag, const std::string& value) { + void add_bytes(T tag, const data_view& value) { pbf_writer::add_bytes(pbf_tag_type(tag), value); } - inline void add_string(T tag, const char* value, std::size_t size) { + void add_bytes(T tag, const std::string& value) { + pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + void add_bytes(T tag, const char* value) { + pbf_writer::add_bytes(pbf_tag_type(tag), value); + } + + template + void add_bytes_vectored(T tag, Ts&&... values) { + pbf_writer::add_bytes_vectored(pbf_tag_type(tag), std::forward(values)...); + } + + void add_string(T tag, const char* value, std::size_t size) { pbf_writer::add_string(pbf_tag_type(tag), value, size); } - inline void add_string(T tag, const std::string& value) { + void add_string(T tag, const data_view& value) { pbf_writer::add_string(pbf_tag_type(tag), value); } - inline void add_string(T tag, const char* value) { + void add_string(T tag, const std::string& value) { pbf_writer::add_string(pbf_tag_type(tag), value); } - inline void add_message(T tag, const char* value, std::size_t size) { + void add_string(T tag, const char* value) { + pbf_writer::add_string(pbf_tag_type(tag), value); + } + + void add_message(T tag, const char* value, std::size_t size) { pbf_writer::add_message(pbf_tag_type(tag), value, size); } - inline void add_message(T tag, const std::string& value) { + void add_message(T tag, const data_view& value) { + pbf_writer::add_message(pbf_tag_type(tag), value); + } + + void add_message(T tag, const std::string& value) { pbf_writer::add_message(pbf_tag_type(tag), value); } /// @cond INTERNAL #define PROTOZERO_WRITER_WRAP_ADD_PACKED(name) \ template \ - inline void add_packed_##name(T tag, InputIterator first, InputIterator last) { \ + void add_packed_##name(T tag, InputIterator first, InputIterator last) { \ pbf_writer::add_packed_##name(pbf_tag_type(tag), first, last); \ } @@ -132,7 +153,7 @@ public: #undef PROTOZERO_WRITER_WRAP_ADD_PACKED /// @endcond -}; +}; // class pbf_builder } // end namespace protozero diff --git a/protozero/pbf_message.hpp b/protozero/pbf_message.hpp index 45f01c1..c599cf1 100644 --- a/protozero/pbf_message.hpp +++ b/protozero/pbf_message.hpp @@ -13,7 +13,7 @@ documentation. /** * @file pbf_message.hpp * - * @brief Contains the pbf_message class. + * @brief Contains the pbf_message template class. */ #include @@ -75,19 +75,23 @@ public: pbf_reader(std::forward(args)...) { } - inline bool next() { + bool next() { return pbf_reader::next(); } - inline bool next(T tag) { - return pbf_reader::next(pbf_tag_type(tag)); + bool next(T next_tag) { + return pbf_reader::next(pbf_tag_type(next_tag)); } - inline T tag() const noexcept { + bool next(T next_tag, pbf_wire_type type) { + return pbf_reader::next(pbf_tag_type(next_tag), type); + } + + T tag() const noexcept { return T(pbf_reader::tag()); } -}; +}; // class pbf_message } // end namespace protozero diff --git a/protozero/pbf_reader.hpp b/protozero/pbf_reader.hpp index 58b3884..905ca0a 100644 --- a/protozero/pbf_reader.hpp +++ b/protozero/pbf_reader.hpp @@ -18,13 +18,12 @@ documentation. #include #include -#include -#include #include #include #include #include +#include #include #include @@ -55,16 +54,16 @@ namespace protozero { * * All methods of the pbf_reader class except get_bytes() and get_string() * provide the strong exception guarantee, ie they either succeed or do not - * change the pbf_reader object they are called on. Use the get_data() method + * change the pbf_reader object they are called on. Use the get_view() method * instead of get_bytes() or get_string(), if you need this guarantee. */ class pbf_reader { // A pointer to the next unread data. - const char *m_data = nullptr; + const char* m_data = nullptr; // A pointer to one past the end of data. - const char *m_end = nullptr; + const char* m_end = nullptr; // The wire type of the current field. pbf_wire_type m_wire_type = pbf_wire_type::unknown; @@ -72,177 +71,179 @@ class pbf_reader { // The tag of the current field. pbf_tag_type m_tag = 0; - // Copy N bytes from src to dest on little endian machines, on big endian - // swap the bytes in the process. - template - static void copy_or_byteswap(const char* src, void* dest) noexcept { -#if PROTOZERO_BYTE_ORDER == PROTOZERO_LITTLE_ENDIAN - memcpy(dest, src, N); -#else - byteswap(src, reinterpret_cast(dest)); -#endif - } - template - inline T get_fixed() { + T get_fixed() { T result; skip_bytes(sizeof(T)); - copy_or_byteswap(m_data - sizeof(T), &result); + std::memcpy(&result, m_data - sizeof(T), sizeof(T)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + detail::byteswap_inplace(&result); +#endif return result; } -#ifdef PROTOZERO_USE_BARE_POINTER_FOR_PACKED_FIXED - template - using const_fixed_iterator = const T*; - - template - inline std::pair, const_fixed_iterator> create_fixed_iterator_pair(const char* first, const char* last) { - return std::make_pair(reinterpret_cast(first), - reinterpret_cast(last)); - } - -#else - - template - class const_fixed_iterator : public std::iterator { - - const char* m_data; - const char* m_end; - - public: - - const_fixed_iterator() noexcept : - m_data(nullptr), - m_end(nullptr) { - } - - const_fixed_iterator(const char *data, const char* end) noexcept : - m_data(data), - m_end(end) { - } - - const_fixed_iterator(const const_fixed_iterator&) noexcept = default; - const_fixed_iterator(const_fixed_iterator&&) noexcept = default; - - const_fixed_iterator& operator=(const const_fixed_iterator&) noexcept = default; - const_fixed_iterator& operator=(const_fixed_iterator&&) noexcept = default; - - ~const_fixed_iterator() noexcept = default; - - T operator*() { - T result; - copy_or_byteswap(m_data , &result); - return result; - } - - const_fixed_iterator& operator++() { - m_data += sizeof(T); - return *this; - } - - const_fixed_iterator operator++(int) { - const const_fixed_iterator tmp(*this); - ++(*this); - return tmp; - } - - bool operator==(const const_fixed_iterator& rhs) const noexcept { - return m_data == rhs.m_data && m_end == rhs.m_end; - } - - bool operator!=(const const_fixed_iterator& rhs) const noexcept { - return !(*this == rhs); - } - - }; // class const_fixed_iterator - - template - inline std::pair, const_fixed_iterator> create_fixed_iterator_pair(const char* first, const char* last) { - return std::make_pair(const_fixed_iterator(first, last), - const_fixed_iterator(last, last)); - } - -#endif - - template - inline std::pair, const_fixed_iterator> packed_fixed() { + iterator_range> packed_fixed() { protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); + const auto len = get_len_and_skip(); protozero_assert(len % sizeof(T) == 0); - return create_fixed_iterator_pair(m_data-len, m_data); + return iterator_range>{const_fixed_iterator(m_data - len, m_data), + const_fixed_iterator(m_data, m_data)}; } - template inline T get_varint(); - template inline T get_svarint(); + template + T get_varint() { + return static_cast(decode_varint(&m_data, m_end)); + } - inline pbf_length_type get_length() { return get_varint(); } + template + T get_svarint() { + protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint"); + return static_cast(decode_zigzag64(decode_varint(&m_data, m_end))); + } - inline void skip_bytes(pbf_length_type len); + pbf_length_type get_length() { + return get_varint(); + } - inline pbf_length_type get_len_and_skip(); + void skip_bytes(pbf_length_type len) { + if (m_data + len > m_end) { + throw end_of_buffer_exception(); + } + m_data += len; + + // In debug builds reset the tag to zero so that we can detect (some) + // wrong code. +#ifndef NDEBUG + m_tag = 0; +#endif + } + + pbf_length_type get_len_and_skip() { + const auto len = get_length(); + skip_bytes(len); + return len; + } + + template + iterator_range get_packed() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + const auto len = get_len_and_skip(); + return iterator_range{T{m_data - len, m_data}, + T{m_data, m_data}}; + } public: /** - * Construct a pbf_reader message from a data pointer and a length. The pointer - * will be stored inside the pbf_reader object, no data is copied. So you must - * make sure the buffer stays valid as long as the pbf_reader object is used. + * Construct a pbf_reader message from a data_view. The pointer from the + * data_view will be stored inside the pbf_reader object, no data is + * copied. So you must make sure the view stays valid as long as the + * pbf_reader object is used. * * The buffer must contain a complete protobuf message. * * @post There is no current field. */ - inline pbf_reader(const char *data, std::size_t length) noexcept; + explicit pbf_reader(const data_view& view) noexcept + : m_data(view.data()), + m_end(view.data() + view.size()), + m_wire_type(pbf_wire_type::unknown), + m_tag(0) { + } /** - * Construct a pbf_reader message from a data pointer and a length. The pointer - * will be stored inside the pbf_reader object, no data is copied. So you must - * make sure the buffer stays valid as long as the pbf_reader object is used. - * - * The buffer must contain a complete protobuf message. - * - * @post There is no current field. - */ - inline pbf_reader(std::pair data) noexcept; - - /** - * Construct a pbf_reader message from a std::string. A pointer to the string - * internals will be stored inside the pbf_reader object, no data is copied. - * So you must make sure the string is unchanged as long as the pbf_reader + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader * object is used. * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + pbf_reader(const char* data, std::size_t size) noexcept + : m_data(data), + m_end(data + size), + m_wire_type(pbf_wire_type::unknown), + m_tag(0) { + } + + /** + * Construct a pbf_reader message from a data pointer and a length. The + * pointer will be stored inside the pbf_reader object, no data is copied. + * So you must make sure the buffer stays valid as long as the pbf_reader + * object is used. + * + * The buffer must contain a complete protobuf message. + * + * @post There is no current field. + */ + explicit pbf_reader(const std::pair& data) noexcept + : m_data(data.first), + m_end(data.first + data.second), + m_wire_type(pbf_wire_type::unknown), + m_tag(0) { + } + + /** + * Construct a pbf_reader message from a std::string. A pointer to the + * string internals will be stored inside the pbf_reader object, no data + * is copied. So you must make sure the string is unchanged as long as the + * pbf_reader object is used. + * * The string must contain a complete protobuf message. * * @post There is no current field. */ - inline pbf_reader(const std::string& data) noexcept; + explicit pbf_reader(const std::string& data) noexcept + : m_data(data.data()), + m_end(data.data() + data.size()), + m_wire_type(pbf_wire_type::unknown), + m_tag(0) { + } /** * pbf_reader can be default constructed and behaves like it has an empty * buffer. */ - inline pbf_reader() noexcept = default; + pbf_reader() noexcept = default; /// pbf_reader messages can be copied trivially. - inline pbf_reader(const pbf_reader&) noexcept = default; + pbf_reader(const pbf_reader&) noexcept = default; /// pbf_reader messages can be moved trivially. - inline pbf_reader(pbf_reader&&) noexcept = default; + pbf_reader(pbf_reader&&) noexcept = default; /// pbf_reader messages can be copied trivially. - inline pbf_reader& operator=(const pbf_reader& other) noexcept = default; + pbf_reader& operator=(const pbf_reader& other) noexcept = default; /// pbf_reader messages can be moved trivially. - inline pbf_reader& operator=(pbf_reader&& other) noexcept = default; + pbf_reader& operator=(pbf_reader&& other) noexcept = default; - inline ~pbf_reader() = default; + ~pbf_reader() = default; /** - * In a boolean context the pbf_reader class evaluates to `true` if there are - * still fields available and to `false` if the last field has been read. + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. */ - inline operator bool() const noexcept; + void swap(pbf_reader& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_end, other.m_end); + swap(m_wire_type, other.m_wire_type); + swap(m_tag, other.m_tag); + } + + /** + * In a boolean context the pbf_reader class evaluates to `true` if there + * are still fields available and to `false` if the last field has been + * read. + */ + operator bool() const noexcept { + return m_data < m_end; + } /** * Return the length in bytes of the current message. If you have @@ -272,7 +273,32 @@ public: * @pre There must be no current field. * @post If it returns `true` there is a current field now. */ - inline bool next(); + bool next() { + if (m_data == m_end) { + return false; + } + + const auto value = get_varint(); + m_tag = pbf_tag_type(value >> 3); + + // tags 0 and 19000 to 19999 are not allowed as per + // https://developers.google.com/protocol-buffers/docs/proto + protozero_assert(((m_tag > 0 && m_tag < 19000) || + (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range"); + + m_wire_type = pbf_wire_type(value & 0x07); + switch (m_wire_type) { + case pbf_wire_type::varint: + case pbf_wire_type::fixed64: + case pbf_wire_type::length_delimited: + case pbf_wire_type::fixed32: + break; + default: + throw unknown_pbf_wire_type_exception(); + } + + return true; + } /** * Set next field with given tag in the message as the current field. @@ -295,11 +321,62 @@ public: * } * @endcode * + * Note that this will not check the wire type. The two-argument version + * of this function will also check the wire type. + * * @returns `true` if there is a next field with this tag. * @pre There must be no current field. * @post If it returns `true` there is a current field now with the given tag. */ - inline bool next(pbf_tag_type tag); + bool next(pbf_tag_type next_tag) { + while (next()) { + if (m_tag == next_tag) { + return true; + } else { + skip(); + } + } + return false; + } + + /** + * Set next field with given tag and wire type in the message as the + * current field. Fields with other tags are skipped. This is usually + * called in a while loop for repeated fields: + * + * @code + * pbf_reader message(...); + * while (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * or you can call it just once to get the one field with this tag: + * + * @code + * pbf_reader message(...); + * if (message.next(17, pbf_wire_type::varint)) { + * // handle field + * } + * @endcode + * + * Note that this will also check the wire type. The one-argument version + * of this function will not check the wire type. + * + * @returns `true` if there is a next field with this tag. + * @pre There must be no current field. + * @post If it returns `true` there is a current field now with the given tag. + */ + bool next(pbf_tag_type next_tag, pbf_wire_type type) { + while (next()) { + if (m_tag == next_tag && m_wire_type == type) { + return true; + } else { + skip(); + } + } + return false; + } /** * The tag of the current field. The tag is the field number from the @@ -310,7 +387,9 @@ public: * @returns tag of the current field. * @pre There must be a current field (ie. next() must have returned `true`). */ - inline pbf_tag_type tag() const noexcept; + pbf_tag_type tag() const noexcept { + return m_tag; + } /** * Get the wire type of the current field. The wire types are: @@ -327,7 +406,35 @@ public: * @returns wire type of the current field. * @pre There must be a current field (ie. next() must have returned `true`). */ - inline pbf_wire_type wire_type() const noexcept; + pbf_wire_type wire_type() const noexcept { + return m_wire_type; + } + + /** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * Use it like this: + * + * @code + * pbf_reader message(...); + * while (message.next()) { + * switch (message.tag_and_type()) { + * case tag_and_type(17, pbf_wire_type::length_delimited): + * .... + * break; + * case tag_and_type(21, pbf_wire_type::varint): + * .... + * break; + * default: + * message.skip(); + * } + * } + * @endcode + */ + uint32_t tag_and_type() const noexcept { + return protozero::tag_and_type(tag(), wire_type()); + } /** * Check the wire type of the current field. @@ -335,7 +442,9 @@ public: * @returns `true` if the current field has the given wire type. * @pre There must be a current field (ie. next() must have returned `true`). */ - inline bool has_wire_type(pbf_wire_type type) const noexcept; + bool has_wire_type(pbf_wire_type type) const noexcept { + return wire_type() == type; + } /** * Consume the current field. @@ -343,7 +452,25 @@ public: * @pre There must be a current field (ie. next() must have returned `true`). * @post The current field was consumed and there is no current field now. */ - inline void skip(); + void skip() { + protozero_assert(tag() != 0 && "call next() before calling skip()"); + switch (wire_type()) { + case pbf_wire_type::varint: + skip_varint(&m_data, m_end); + break; + case pbf_wire_type::fixed64: + skip_bytes(8); + break; + case pbf_wire_type::length_delimited: + skip_bytes(get_length()); + break; + case pbf_wire_type::fixed32: + skip_bytes(4); + break; + default: + protozero_assert(false && "can not be here because next() should have thrown already"); + } + } ///@{ /** @@ -357,7 +484,13 @@ public: * @pre The current field must be of type "bool". * @post The current field was consumed and there is no current field now. */ - inline bool get_bool(); + bool get_bool() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); + protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint"); + skip_bytes(1); + return m_data[-1] != 0; // -1 okay because we incremented m_data the line before + } /** * Consume and return value of current "enum" field. @@ -366,7 +499,7 @@ public: * @pre The current field must be of type "enum". * @post The current field was consumed and there is no current field now. */ - inline int32_t get_enum() { + int32_t get_enum() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_varint(); } @@ -378,7 +511,7 @@ public: * @pre The current field must be of type "int32". * @post The current field was consumed and there is no current field now. */ - inline int32_t get_int32() { + int32_t get_int32() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_varint(); } @@ -390,7 +523,7 @@ public: * @pre The current field must be of type "sint32". * @post The current field was consumed and there is no current field now. */ - inline int32_t get_sint32() { + int32_t get_sint32() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_svarint(); } @@ -402,7 +535,7 @@ public: * @pre The current field must be of type "uint32". * @post The current field was consumed and there is no current field now. */ - inline uint32_t get_uint32() { + uint32_t get_uint32() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_varint(); } @@ -414,7 +547,7 @@ public: * @pre The current field must be of type "int64". * @post The current field was consumed and there is no current field now. */ - inline int64_t get_int64() { + int64_t get_int64() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_varint(); } @@ -426,7 +559,7 @@ public: * @pre The current field must be of type "sint64". * @post The current field was consumed and there is no current field now. */ - inline int64_t get_sint64() { + int64_t get_sint64() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_svarint(); } @@ -438,7 +571,7 @@ public: * @pre The current field must be of type "uint64". * @post The current field was consumed and there is no current field now. */ - inline uint64_t get_uint64() { + uint64_t get_uint64() { protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); return get_varint(); } @@ -450,7 +583,11 @@ public: * @pre The current field must be of type "fixed32". * @post The current field was consumed and there is no current field now. */ - inline uint32_t get_fixed32(); + uint32_t get_fixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } /** * Consume and return value of current "sfixed32" field. @@ -459,7 +596,11 @@ public: * @pre The current field must be of type "sfixed32". * @post The current field was consumed and there is no current field now. */ - inline int32_t get_sfixed32(); + int32_t get_sfixed32() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } /** * Consume and return value of current "fixed64" field. @@ -468,7 +609,11 @@ public: * @pre The current field must be of type "fixed64". * @post The current field was consumed and there is no current field now. */ - inline uint64_t get_fixed64(); + uint64_t get_fixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } /** * Consume and return value of current "sfixed64" field. @@ -477,7 +622,11 @@ public: * @pre The current field must be of type "sfixed64". * @post The current field was consumed and there is no current field now. */ - inline int64_t get_sfixed64(); + int64_t get_sfixed64() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } /** * Consume and return value of current "float" field. @@ -486,7 +635,11 @@ public: * @pre The current field must be of type "float". * @post The current field was consumed and there is no current field now. */ - inline float get_float(); + float get_float() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); + return get_fixed(); + } /** * Consume and return value of current "double" field. @@ -495,8 +648,29 @@ public: * @pre The current field must be of type "double". * @post The current field was consumed and there is no current field now. */ - inline double get_double(); + double get_double() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); + return get_fixed(); + } + /** + * Consume and return value of current "bytes", "string", or "message" + * field. + * + * @returns A data_view object. + * @pre There must be a current field (ie. next() must have returned `true`). + * @pre The current field must be of type "bytes", "string", or "message". + * @post The current field was consumed and there is no current field now. + */ + data_view get_view() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return data_view{m_data - len, len}; + } + +#ifndef PROTOZERO_STRICT_API /** * Consume and return value of current "bytes" or "string" field. * @@ -505,7 +679,13 @@ public: * @pre The current field must be of type "bytes" or "string". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_data(); + std::pair get_data() { + protozero_assert(tag() != 0 && "call next() before accessing field value"); + protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); + const auto len = get_len_and_skip(); + return std::make_pair(m_data - len, len); + } +#endif /** * Consume and return value of current "bytes" field. @@ -514,7 +694,9 @@ public: * @pre The current field must be of type "bytes". * @post The current field was consumed and there is no current field now. */ - inline std::string get_bytes(); + std::string get_bytes() { + return std::string(get_view()); + } /** * Consume and return value of current "string" field. @@ -523,7 +705,9 @@ public: * @pre The current field must be of type "string". * @post The current field was consumed and there is no current field now. */ - inline std::string get_string(); + std::string get_string() { + return std::string(get_view()); + } /** * Consume and return value of current "message" field. @@ -532,136 +716,35 @@ public: * @pre The current field must be of type "message". * @post The current field was consumed and there is no current field now. */ - inline pbf_reader get_message() { - return pbf_reader(get_data()); + pbf_reader get_message() { + return pbf_reader(get_view()); } ///@} -private: - - template - class const_varint_iterator : public std::iterator { - - protected: - - const char* m_data; - const char* m_end; - - public: - - const_varint_iterator() noexcept : - m_data(nullptr), - m_end(nullptr) { - } - - const_varint_iterator(const char *data, const char* end) noexcept : - m_data(data), - m_end(end) { - } - - const_varint_iterator(const const_varint_iterator&) noexcept = default; - const_varint_iterator(const_varint_iterator&&) noexcept = default; - - const_varint_iterator& operator=(const const_varint_iterator&) noexcept = default; - const_varint_iterator& operator=(const_varint_iterator&&) noexcept = default; - - ~const_varint_iterator() noexcept = default; - - T operator*() { - const char* d = m_data; // will be thrown away - return static_cast(decode_varint(&d, m_end)); - } - - const_varint_iterator& operator++() { - // Ignore the result, we call decode_varint() just for the - // side-effect of updating m_data. - decode_varint(&m_data, m_end); - return *this; - } - - const_varint_iterator operator++(int) { - const const_varint_iterator tmp(*this); - ++(*this); - return tmp; - } - - bool operator==(const const_varint_iterator& rhs) const noexcept { - return m_data == rhs.m_data && m_end == rhs.m_end; - } - - bool operator!=(const const_varint_iterator& rhs) const noexcept { - return !(*this == rhs); - } - - }; // class const_varint_iterator - - template - class const_svarint_iterator : public const_varint_iterator { - - public: - - const_svarint_iterator() noexcept : - const_varint_iterator() { - } - - const_svarint_iterator(const char *data, const char* end) noexcept : - const_varint_iterator(data, end) { - } - - const_svarint_iterator(const const_svarint_iterator&) = default; - const_svarint_iterator(const_svarint_iterator&&) = default; - - const_svarint_iterator& operator=(const const_svarint_iterator&) = default; - const_svarint_iterator& operator=(const_svarint_iterator&&) = default; - - ~const_svarint_iterator() = default; - - T operator*() { - const char* d = this->m_data; // will be thrown away - return static_cast(decode_zigzag64(decode_varint(&d, this->m_end))); - } - - const_svarint_iterator& operator++() { - // Ignore the result, we call decode_varint() just for the - // side-effect of updating m_data. - decode_varint(&this->m_data, this->m_end); - return *this; - } - - const_svarint_iterator operator++(int) { - const const_svarint_iterator tmp(*this); - ++(*this); - return tmp; - } - - }; // class const_svarint_iterator - -public: - /// Forward iterator for iterating over bool (int32 varint) values. - typedef const_varint_iterator< int32_t> const_bool_iterator; + using const_bool_iterator = const_varint_iterator< int32_t>; /// Forward iterator for iterating over enum (int32 varint) values. - typedef const_varint_iterator< int32_t> const_enum_iterator; + using const_enum_iterator = const_varint_iterator< int32_t>; /// Forward iterator for iterating over int32 (varint) values. - typedef const_varint_iterator< int32_t> const_int32_iterator; + using const_int32_iterator = const_varint_iterator< int32_t>; /// Forward iterator for iterating over sint32 (varint) values. - typedef const_svarint_iterator const_sint32_iterator; + using const_sint32_iterator = const_svarint_iterator; /// Forward iterator for iterating over uint32 (varint) values. - typedef const_varint_iterator const_uint32_iterator; + using const_uint32_iterator = const_varint_iterator; /// Forward iterator for iterating over int64 (varint) values. - typedef const_varint_iterator< int64_t> const_int64_iterator; + using const_int64_iterator = const_varint_iterator< int64_t>; /// Forward iterator for iterating over sint64 (varint) values. - typedef const_svarint_iterator const_sint64_iterator; + using const_sint64_iterator = const_svarint_iterator; /// Forward iterator for iterating over uint64 (varint) values. - typedef const_varint_iterator const_uint64_iterator; + using const_uint64_iterator = const_varint_iterator; ///@{ /** @@ -677,7 +760,9 @@ public: * @pre The current field must be of type "repeated packed bool". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_bool(); + iterator_range get_packed_bool() { + return get_packed(); + } /** * Consume current "repeated packed enum" field. @@ -688,7 +773,9 @@ public: * @pre The current field must be of type "repeated packed enum". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_enum(); + iterator_range get_packed_enum() { + return get_packed(); + } /** * Consume current "repeated packed int32" field. @@ -699,7 +786,9 @@ public: * @pre The current field must be of type "repeated packed int32". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_int32(); + iterator_range get_packed_int32() { + return get_packed(); + } /** * Consume current "repeated packed sint32" field. @@ -710,7 +799,9 @@ public: * @pre The current field must be of type "repeated packed sint32". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_sint32(); + iterator_range get_packed_sint32() { + return get_packed(); + } /** * Consume current "repeated packed uint32" field. @@ -721,7 +812,9 @@ public: * @pre The current field must be of type "repeated packed uint32". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_uint32(); + iterator_range get_packed_uint32() { + return get_packed(); + } /** * Consume current "repeated packed int64" field. @@ -732,7 +825,9 @@ public: * @pre The current field must be of type "repeated packed int64". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_int64(); + iterator_range get_packed_int64() { + return get_packed(); + } /** * Consume current "repeated packed sint64" field. @@ -743,7 +838,9 @@ public: * @pre The current field must be of type "repeated packed sint64". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_sint64(); + iterator_range get_packed_sint64() { + return get_packed(); + } /** * Consume current "repeated packed uint64" field. @@ -754,7 +851,9 @@ public: * @pre The current field must be of type "repeated packed uint64". * @post The current field was consumed and there is no current field now. */ - inline std::pair get_packed_uint64(); + iterator_range get_packed_uint64() { + return get_packed(); + } /** * Consume current "repeated packed fixed32" field. @@ -765,7 +864,7 @@ public: * @pre The current field must be of type "repeated packed fixed32". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_fixed32() -> decltype(packed_fixed()) { + auto get_packed_fixed32() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -778,7 +877,7 @@ public: * @pre The current field must be of type "repeated packed sfixed32". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_sfixed32() -> decltype(packed_fixed()) { + auto get_packed_sfixed32() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -791,7 +890,7 @@ public: * @pre The current field must be of type "repeated packed fixed64". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_fixed64() -> decltype(packed_fixed()) { + auto get_packed_fixed64() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -804,7 +903,7 @@ public: * @pre The current field must be of type "repeated packed sfixed64". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_sfixed64() -> decltype(packed_fixed()) { + auto get_packed_sfixed64() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -817,7 +916,7 @@ public: * @pre The current field must be of type "repeated packed float". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_float() -> decltype(packed_fixed()) { + auto get_packed_float() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -830,7 +929,7 @@ public: * @pre The current field must be of type "repeated packed double". * @post The current field was consumed and there is no current field now. */ - inline auto get_packed_double() -> decltype(packed_fixed()) { + auto get_packed_double() -> decltype(packed_fixed()) { return packed_fixed(); } @@ -838,238 +937,14 @@ public: }; // class pbf_reader -pbf_reader::pbf_reader(const char *data, std::size_t length) noexcept - : m_data(data), - m_end(data + length), - m_wire_type(pbf_wire_type::unknown), - m_tag(0) { -} - -pbf_reader::pbf_reader(std::pair data) noexcept - : m_data(data.first), - m_end(data.first + data.second), - m_wire_type(pbf_wire_type::unknown), - m_tag(0) { -} - -pbf_reader::pbf_reader(const std::string& data) noexcept - : m_data(data.data()), - m_end(data.data() + data.size()), - m_wire_type(pbf_wire_type::unknown), - m_tag(0) { -} - -pbf_reader::operator bool() const noexcept { - return m_data < m_end; -} - -bool pbf_reader::next() { - if (m_data == m_end) { - return false; - } - - auto value = get_varint(); - m_tag = value >> 3; - - // tags 0 and 19000 to 19999 are not allowed as per - // https://developers.google.com/protocol-buffers/docs/proto - protozero_assert(((m_tag > 0 && m_tag < 19000) || (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range"); - - m_wire_type = pbf_wire_type(value & 0x07); - switch (m_wire_type) { - case pbf_wire_type::varint: - case pbf_wire_type::fixed64: - case pbf_wire_type::length_delimited: - case pbf_wire_type::fixed32: - break; - default: - throw unknown_pbf_wire_type_exception(); - } - - return true; -} - -bool pbf_reader::next(pbf_tag_type requested_tag) { - while (next()) { - if (m_tag == requested_tag) { - return true; - } else { - skip(); - } - } - return false; -} - -pbf_tag_type pbf_reader::tag() const noexcept { - return m_tag; -} - -pbf_wire_type pbf_reader::wire_type() const noexcept { - return m_wire_type; -} - -bool pbf_reader::has_wire_type(pbf_wire_type type) const noexcept { - return wire_type() == type; -} - -void pbf_reader::skip_bytes(pbf_length_type len) { - if (m_data + len > m_end) { - throw end_of_buffer_exception(); - } - m_data += len; - -// In debug builds reset the tag to zero so that we can detect (some) -// wrong code. -#ifndef NDEBUG - m_tag = 0; -#endif -} - -void pbf_reader::skip() { - protozero_assert(tag() != 0 && "call next() before calling skip()"); - switch (wire_type()) { - case pbf_wire_type::varint: - (void)get_uint32(); // called for the side-effect of skipping value - break; - case pbf_wire_type::fixed64: - skip_bytes(8); - break; - case pbf_wire_type::length_delimited: - skip_bytes(get_length()); - break; - case pbf_wire_type::fixed32: - skip_bytes(4); - break; - default: - protozero_assert(false && "can not be here because next() should have thrown already"); - } -} - -pbf_length_type pbf_reader::get_len_and_skip() { - auto len = get_length(); - skip_bytes(len); - return len; -} - -template -T pbf_reader::get_varint() { - return static_cast(decode_varint(&m_data, m_end)); -} - -template -T pbf_reader::get_svarint() { - protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint"); - return static_cast(decode_zigzag64(decode_varint(&m_data, m_end))); -} - -uint32_t pbf_reader::get_fixed32() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); - return get_fixed(); -} - -int32_t pbf_reader::get_sfixed32() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); - return get_fixed(); -} - -uint64_t pbf_reader::get_fixed64() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); - return get_fixed(); -} - -int64_t pbf_reader::get_sfixed64() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); - return get_fixed(); -} - -float pbf_reader::get_float() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed"); - return get_fixed(); -} - -double pbf_reader::get_double() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed"); - return get_fixed(); -} - -bool pbf_reader::get_bool() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint"); - protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint"); - skip_bytes(1); - return m_data[-1] != 0; // -1 okay because we incremented m_data the line before -} - -std::pair pbf_reader::get_data() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message"); - auto len = get_len_and_skip(); - return std::make_pair(m_data-len, len); -} - -std::string pbf_reader::get_bytes() { - auto d = get_data(); - return std::string(d.first, d.second); -} - -std::string pbf_reader::get_string() { - return get_bytes(); -} - -std::pair pbf_reader::get_packed_bool() { - return get_packed_int32(); -} - -std::pair pbf_reader::get_packed_enum() { - return get_packed_int32(); -} - -std::pair pbf_reader::get_packed_int32() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_int32_iterator(m_data-len, m_data), - pbf_reader::const_int32_iterator(m_data, m_data)); -} - -std::pair pbf_reader::get_packed_uint32() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_uint32_iterator(m_data-len, m_data), - pbf_reader::const_uint32_iterator(m_data, m_data)); -} - -std::pair pbf_reader::get_packed_sint32() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_sint32_iterator(m_data-len, m_data), - pbf_reader::const_sint32_iterator(m_data, m_data)); -} - -std::pair pbf_reader::get_packed_int64() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_int64_iterator(m_data-len, m_data), - pbf_reader::const_int64_iterator(m_data, m_data)); -} - -std::pair pbf_reader::get_packed_uint64() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_uint64_iterator(m_data-len, m_data), - pbf_reader::const_uint64_iterator(m_data, m_data)); -} - -std::pair pbf_reader::get_packed_sint64() { - protozero_assert(tag() != 0 && "call next() before accessing field value"); - auto len = get_len_and_skip(); - return std::make_pair(pbf_reader::const_sint64_iterator(m_data-len, m_data), - pbf_reader::const_sint64_iterator(m_data, m_data)); +/** + * Swap two pbf_reader objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept { + lhs.swap(rhs); } } // end namespace protozero diff --git a/protozero/pbf_writer.hpp b/protozero/pbf_writer.hpp index 422e147..af626bd 100644 --- a/protozero/pbf_writer.hpp +++ b/protozero/pbf_writer.hpp @@ -22,6 +22,7 @@ documentation. #include #include #include +#include #include #include @@ -68,38 +69,35 @@ class pbf_writer { // parent to the position where the data of the submessage is written to. std::size_t m_pos = 0; - inline void add_varint(uint64_t value) { + void add_varint(uint64_t value) { protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); protozero_assert(m_data); write_varint(std::back_inserter(*m_data), value); } - inline void add_field(pbf_tag_type tag, pbf_wire_type type) { + void add_field(pbf_tag_type tag, pbf_wire_type type) { protozero_assert(((tag > 0 && tag < 19000) || (tag > 19999 && tag <= ((1 << 29) - 1))) && "tag out of range"); - uint32_t b = (tag << 3) | uint32_t(type); + const uint32_t b = (tag << 3) | uint32_t(type); add_varint(b); } - inline void add_tagged_varint(pbf_tag_type tag, uint64_t value) { + void add_tagged_varint(pbf_tag_type tag, uint64_t value) { add_field(tag, pbf_wire_type::varint); add_varint(value); } template - inline void add_fixed(T value) { + void add_fixed(T value) { protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); protozero_assert(m_data); -#if PROTOZERO_BYTE_ORDER == PROTOZERO_LITTLE_ENDIAN - m_data->append(reinterpret_cast(&value), sizeof(T)); -#else - auto size = m_data->size(); - m_data->resize(size + sizeof(T)); - byteswap(reinterpret_cast(&value), const_cast(m_data->data() + size)); +#if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN + detail::byteswap_inplace(&value); #endif + m_data->append(reinterpret_cast(&value), sizeof(T)); } template - inline void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag) { + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag) { if (first == last) { return; } @@ -112,12 +110,12 @@ class pbf_writer { } template - inline void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag) { + void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag) { if (first == last) { return; } - auto length = std::distance(first, last); + const auto length = std::distance(first, last); add_length_varint(tag, sizeof(T) * pbf_length_type(length)); reserve(sizeof(T) * std::size_t(length)); @@ -127,7 +125,7 @@ class pbf_writer { } template - inline void add_packed_varint(pbf_tag_type tag, It first, It last) { + void add_packed_varint(pbf_tag_type tag, It first, It last) { if (first == last) { return; } @@ -140,7 +138,7 @@ class pbf_writer { } template - inline void add_packed_svarint(pbf_tag_type tag, It first, It last) { + void add_packed_svarint(pbf_tag_type tag, It first, It last) { if (first == last) { return; } @@ -155,14 +153,18 @@ class pbf_writer { // The number of bytes to reserve for the varint holding the length of // a length-delimited field. The length has to fit into pbf_length_type, // and a varint needs 8 bit for every 7 bit. - static const int reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1; + enum constant_reserve_bytes : int { + reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1 + }; // If m_rollpack_pos is set to this special value, it means that when // the submessage is closed, nothing needs to be done, because the length // of the submessage has already been written correctly. - static const std::size_t size_is_known = std::numeric_limits::max(); + enum constant_size_is_known : std::size_t { + size_is_known = std::numeric_limits::max() + }; - inline void open_submessage(pbf_tag_type tag, std::size_t size) { + void open_submessage(pbf_tag_type tag, std::size_t size) { protozero_assert(m_pos == 0); protozero_assert(m_data); if (size == 0) { @@ -177,7 +179,7 @@ class pbf_writer { m_pos = m_data->size(); } - inline void rollback_submessage() { + void rollback_submessage() { protozero_assert(m_pos != 0); protozero_assert(m_rollback_pos != size_is_known); protozero_assert(m_data); @@ -185,20 +187,20 @@ class pbf_writer { m_pos = 0; } - inline void commit_submessage() { + void commit_submessage() { protozero_assert(m_pos != 0); protozero_assert(m_rollback_pos != size_is_known); protozero_assert(m_data); - auto length = pbf_length_type(m_data->size() - m_pos); + const auto length = pbf_length_type(m_data->size() - m_pos); protozero_assert(m_data->size() >= m_pos - reserve_bytes); - auto n = write_varint(m_data->begin() + long(m_pos) - reserve_bytes, length); + const auto n = write_varint(m_data->begin() + long(m_pos) - reserve_bytes, length); m_data->erase(m_data->begin() + long(m_pos) - reserve_bytes + n, m_data->begin() + long(m_pos)); m_pos = 0; } - inline void close_submessage() { + void close_submessage() { protozero_assert(m_data); if (m_pos == 0 || m_rollback_pos == size_is_known) { return; @@ -210,7 +212,7 @@ class pbf_writer { } } - inline void add_length_varint(pbf_tag_type tag, pbf_length_type length) { + void add_length_varint(pbf_tag_type tag, pbf_length_type length) { add_field(tag, pbf_wire_type::length_delimited); add_varint(length); } @@ -222,20 +224,18 @@ public: * stores a reference to that string and adds all data to it. The string * doesn't have to be empty. The pbf_writer will just append data. */ - inline explicit pbf_writer(std::string& data) noexcept : + explicit pbf_writer(std::string& data) noexcept : m_data(&data), - m_parent_writer(nullptr), - m_pos(0) { + m_parent_writer(nullptr) { } /** * Create a writer without a data store. In this form the writer can not * be used! */ - inline pbf_writer() noexcept : + pbf_writer() noexcept : m_data(nullptr), - m_parent_writer(nullptr), - m_pos(0) { + m_parent_writer(nullptr) { } /** @@ -248,10 +248,9 @@ public: * Setting this allows some optimizations but is only possible in * a few very specific cases. */ - inline pbf_writer(pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size=0) : + pbf_writer(pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size=0) : m_data(parent_writer.m_data), - m_parent_writer(&parent_writer), - m_pos(0) { + m_parent_writer(&parent_writer) { m_parent_writer->open_submessage(tag, size); } @@ -262,17 +261,30 @@ public: pbf_writer& operator=(const pbf_writer&) noexcept = default; /// A pbf_writer object can be moved - inline pbf_writer(pbf_writer&&) noexcept = default; + pbf_writer(pbf_writer&&) noexcept = default; /// A pbf_writer object can be moved - inline pbf_writer& operator=(pbf_writer&&) noexcept = default; + pbf_writer& operator=(pbf_writer&&) noexcept = default; - inline ~pbf_writer() { + ~pbf_writer() { if (m_parent_writer) { m_parent_writer->close_submessage(); } } + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(pbf_writer& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_parent_writer, other.m_parent_writer); + swap(m_rollback_pos, other.m_rollback_pos); + swap(m_pos, other.m_pos); + } + /** * Reserve size bytes in the underlying message store in addition to * whatever the message store already holds. So unlike @@ -286,7 +298,14 @@ public: m_data->reserve(m_data->size() + size); } - inline void rollback() { + /** + * Cancel writing of this submessage. The complete submessage will be + * removed as if it was never created and no fields were added. + * + * @pre Must be a pbf_writer of a submessage, ie one opened with the + * pbf_writer constructor taking a parent message. + */ + void rollback() { protozero_assert(m_parent_writer && "you can't call rollback() on a pbf_writer without a parent"); protozero_assert(m_pos == 0 && "you can't call rollback() on a pbf_writer that has an open nested submessage"); m_parent_writer->rollback_submessage(); @@ -304,7 +323,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_bool(pbf_tag_type tag, bool value) { + void add_bool(pbf_tag_type tag, bool value) { add_field(tag, pbf_wire_type::varint); protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); protozero_assert(m_data); @@ -317,7 +336,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_enum(pbf_tag_type tag, int32_t value) { + void add_enum(pbf_tag_type tag, int32_t value) { add_tagged_varint(tag, uint64_t(value)); } @@ -327,7 +346,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_int32(pbf_tag_type tag, int32_t value) { + void add_int32(pbf_tag_type tag, int32_t value) { add_tagged_varint(tag, uint64_t(value)); } @@ -337,7 +356,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_sint32(pbf_tag_type tag, int32_t value) { + void add_sint32(pbf_tag_type tag, int32_t value) { add_tagged_varint(tag, encode_zigzag32(value)); } @@ -347,7 +366,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_uint32(pbf_tag_type tag, uint32_t value) { + void add_uint32(pbf_tag_type tag, uint32_t value) { add_tagged_varint(tag, value); } @@ -357,7 +376,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_int64(pbf_tag_type tag, int64_t value) { + void add_int64(pbf_tag_type tag, int64_t value) { add_tagged_varint(tag, uint64_t(value)); } @@ -367,7 +386,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_sint64(pbf_tag_type tag, int64_t value) { + void add_sint64(pbf_tag_type tag, int64_t value) { add_tagged_varint(tag, encode_zigzag64(value)); } @@ -377,7 +396,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_uint64(pbf_tag_type tag, uint64_t value) { + void add_uint64(pbf_tag_type tag, uint64_t value) { add_tagged_varint(tag, value); } @@ -387,7 +406,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_fixed32(pbf_tag_type tag, uint32_t value) { + void add_fixed32(pbf_tag_type tag, uint32_t value) { add_field(tag, pbf_wire_type::fixed32); add_fixed(value); } @@ -398,7 +417,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_sfixed32(pbf_tag_type tag, int32_t value) { + void add_sfixed32(pbf_tag_type tag, int32_t value) { add_field(tag, pbf_wire_type::fixed32); add_fixed(value); } @@ -409,7 +428,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_fixed64(pbf_tag_type tag, uint64_t value) { + void add_fixed64(pbf_tag_type tag, uint64_t value) { add_field(tag, pbf_wire_type::fixed64); add_fixed(value); } @@ -420,7 +439,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_sfixed64(pbf_tag_type tag, int64_t value) { + void add_sfixed64(pbf_tag_type tag, int64_t value) { add_field(tag, pbf_wire_type::fixed64); add_fixed(value); } @@ -431,7 +450,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_float(pbf_tag_type tag, float value) { + void add_float(pbf_tag_type tag, float value) { add_field(tag, pbf_wire_type::fixed32); add_fixed(value); } @@ -442,7 +461,7 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_double(pbf_tag_type tag, double value) { + void add_double(pbf_tag_type tag, double value) { add_field(tag, pbf_wire_type::fixed64); add_fixed(value); } @@ -454,7 +473,7 @@ public: * @param value Pointer to value to be written * @param size Number of bytes to be written */ - inline void add_bytes(pbf_tag_type tag, const char* value, std::size_t size) { + void add_bytes(pbf_tag_type tag, const char* value, std::size_t size) { protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); protozero_assert(m_data); protozero_assert(size <= std::numeric_limits::max()); @@ -468,10 +487,62 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_bytes(pbf_tag_type tag, const std::string& value) { + void add_bytes(pbf_tag_type tag, const data_view& value) { add_bytes(tag, value.data(), value.size()); } + /** + * Add "bytes" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_bytes(pbf_tag_type tag, const std::string& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "bytes" field to data. Bytes from the value are written until + * a null byte is encountered. The null byte is not added. + * + * @param tag Tag (field number) of the field + * @param value Pointer to zero-delimited value to be written + */ + void add_bytes(pbf_tag_type tag, const char* value) { + add_bytes(tag, value, std::strlen(value)); + } + + /** + * Add "bytes" field to data using vectored input. All the data in the + * 2nd and further arguments is "concatenated" with only a single copy + * into the final buffer. + * + * This will work with objects of any type supporting the data() and + * size() methods like std::string or protozero::data_view. + * + * Example: + * @code + * std::string data1 = "abc"; + * std::string data2 = "xyz"; + * writer.add_bytes_vectored(1, data1, data2); + * @endcode + * + * @tparam Ts List of types supporting data() and size() methods. + * @param tag Tag (field number) of the field + * @param values List of objects of types Ts with data to be appended. + */ + template + void add_bytes_vectored(pbf_tag_type tag, Ts&&... values) { + protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage"); + protozero_assert(m_data); + size_t sum_size = 0; + (void)std::initializer_list{sum_size += values.size()...}; + protozero_assert(sum_size <= std::numeric_limits::max()); + add_length_varint(tag, pbf_length_type(sum_size)); + m_data->reserve(m_data->size() + sum_size); + (void)std::initializer_list{(m_data->append(values.data(), values.size()), 0)...}; + } + /** * Add "string" field to data. * @@ -479,7 +550,7 @@ public: * @param value Pointer to value to be written * @param size Number of bytes to be written */ - inline void add_string(pbf_tag_type tag, const char* value, std::size_t size) { + void add_string(pbf_tag_type tag, const char* value, std::size_t size) { add_bytes(tag, value, size); } @@ -489,7 +560,17 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written */ - inline void add_string(pbf_tag_type tag, const std::string& value) { + void add_string(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "string" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written + */ + void add_string(pbf_tag_type tag, const std::string& value) { add_bytes(tag, value.data(), value.size()); } @@ -500,7 +581,7 @@ public: * @param tag Tag (field number) of the field * @param value Pointer to value to be written */ - inline void add_string(pbf_tag_type tag, const char* value) { + void add_string(pbf_tag_type tag, const char* value) { add_bytes(tag, value, std::strlen(value)); } @@ -511,7 +592,7 @@ public: * @param value Pointer to message to be written * @param size Length of the message */ - inline void add_message(pbf_tag_type tag, const char* value, std::size_t size) { + void add_message(pbf_tag_type tag, const char* value, std::size_t size) { add_bytes(tag, value, size); } @@ -521,7 +602,17 @@ public: * @param tag Tag (field number) of the field * @param value Value to be written. The value must be a complete message. */ - inline void add_message(pbf_tag_type tag, const std::string& value) { + void add_message(pbf_tag_type tag, const data_view& value) { + add_bytes(tag, value.data(), value.size()); + } + + /** + * Add "message" field to data. + * + * @param tag Tag (field number) of the field + * @param value Value to be written. The value must be a complete message. + */ + void add_message(pbf_tag_type tag, const std::string& value) { add_bytes(tag, value.data(), value.size()); } @@ -535,126 +626,126 @@ public: /** * Add "repeated packed bool" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to bool. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed enum" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed int32" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed sint32" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_svarint(tag, first, last); } /** * Add "repeated packed uint32" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to uint32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed int64" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int64_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed sint64" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int64_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_svarint(tag, first, last); } /** * Add "repeated packed uint64" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to uint64_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_varint(tag, first, last); } /** * Add "repeated packed fixed32" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to uint32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -662,14 +753,14 @@ public: /** * Add "repeated packed sfixed32" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int32_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -677,14 +768,14 @@ public: /** * Add "repeated packed fixed64" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to uint64_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -692,14 +783,14 @@ public: /** * Add "repeated packed sfixed64" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to int64_t. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -707,14 +798,14 @@ public: /** * Add "repeated packed float" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to float. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -722,14 +813,14 @@ public: /** * Add "repeated packed double" field to data. * - * @tparam InputIterator An type satisfying the InputIterator concept. + * @tparam InputIterator A type satisfying the InputIterator concept. * Dereferencing the iterator must yield a type assignable to double. * @param tag Tag (field number) of the field * @param first Iterator pointing to the beginning of the data * @param last Iterator pointing one past the end of data */ template - inline void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) { + void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) { add_packed_fixed(tag, first, last, typename std::iterator_traits::iterator_category()); } @@ -742,6 +833,16 @@ public: }; // class pbf_writer +/** + * Swap two pbf_writer objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(pbf_writer& lhs, pbf_writer& rhs) noexcept { + lhs.swap(rhs); +} + namespace detail { class packed_field { @@ -752,6 +853,12 @@ namespace detail { public: + packed_field(const packed_field&) = delete; + packed_field& operator=(const packed_field&) = delete; + + packed_field(packed_field&&) = default; + packed_field& operator=(packed_field&&) = default; + packed_field(pbf_writer& parent_writer, pbf_tag_type tag) : m_writer(parent_writer, tag) { } @@ -771,12 +878,14 @@ namespace detail { public: - packed_field_fixed(pbf_writer& parent_writer, pbf_tag_type tag) : - packed_field(parent_writer, tag) { + template + packed_field_fixed(pbf_writer& parent_writer, P tag) : + packed_field(parent_writer, static_cast(tag)) { } - packed_field_fixed(pbf_writer& parent_writer, pbf_tag_type tag, std::size_t size) : - packed_field(parent_writer, tag, size * sizeof(T)) { + template + packed_field_fixed(pbf_writer& parent_writer, P tag, std::size_t size) : + packed_field(parent_writer, static_cast(tag), size * sizeof(T)) { } void add_element(T value) { @@ -790,8 +899,9 @@ namespace detail { public: - packed_field_varint(pbf_writer& parent_writer, pbf_tag_type tag) : - packed_field(parent_writer, tag) { + template + packed_field_varint(pbf_writer& parent_writer, P tag) : + packed_field(parent_writer, static_cast(tag)) { } void add_element(T value) { @@ -805,8 +915,9 @@ namespace detail { public: - packed_field_svarint(pbf_writer& parent_writer, pbf_tag_type tag) : - packed_field(parent_writer, tag) { + template + packed_field_svarint(pbf_writer& parent_writer, P tag) : + packed_field(parent_writer, static_cast(tag)) { } void add_element(T value) { @@ -817,19 +928,46 @@ namespace detail { } // end namespace detail +/// Class for generating packed repeated bool fields. using packed_field_bool = detail::packed_field_varint; + +/// Class for generating packed repeated enum fields. using packed_field_enum = detail::packed_field_varint; + +/// Class for generating packed repeated int32 fields. using packed_field_int32 = detail::packed_field_varint; + +/// Class for generating packed repeated sint32 fields. using packed_field_sint32 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint32 fields. using packed_field_uint32 = detail::packed_field_varint; + +/// Class for generating packed repeated int64 fields. using packed_field_int64 = detail::packed_field_varint; + +/// Class for generating packed repeated sint64 fields. using packed_field_sint64 = detail::packed_field_svarint; + +/// Class for generating packed repeated uint64 fields. using packed_field_uint64 = detail::packed_field_varint; + +/// Class for generating packed repeated fixed32 fields. using packed_field_fixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed32 fields. using packed_field_sfixed32 = detail::packed_field_fixed; + +/// Class for generating packed repeated fixed64 fields. using packed_field_fixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated sfixed64 fields. using packed_field_sfixed64 = detail::packed_field_fixed; + +/// Class for generating packed repeated float fields. using packed_field_float = detail::packed_field_fixed; + +/// Class for generating packed repeated double fields. using packed_field_double = detail::packed_field_fixed; } // end namespace protozero diff --git a/protozero/types.hpp b/protozero/types.hpp index 6856b3d..3dbdaf1 100644 --- a/protozero/types.hpp +++ b/protozero/types.hpp @@ -16,33 +16,190 @@ documentation. * @brief Contains the declaration of low-level types used in the pbf format. */ +#include +#include #include +#include +#include +#include + +#include namespace protozero { - /** - * The type used for field tags (field numbers). - */ - typedef uint32_t pbf_tag_type; +/** + * The type used for field tags (field numbers). + */ +using pbf_tag_type = uint32_t; + +/** + * The type used to encode type information. + * See the table on + * https://developers.google.com/protocol-buffers/docs/encoding + */ +enum class pbf_wire_type : uint32_t { + varint = 0, // int32/64, uint32/64, sint32/64, bool, enum + fixed64 = 1, // fixed64, sfixed64, double + length_delimited = 2, // string, bytes, embedded messages, + // packed repeated fields + fixed32 = 5, // fixed32, sfixed32, float + unknown = 99 // used for default setting in this library +}; + +/** + * Get the tag and wire type of the current field in one integer suitable + * for comparison with a switch statement. + * + * See pbf_reader.tag_and_type() for an example how to use this. + */ +template +constexpr inline uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept { + return (static_cast(static_cast(tag)) << 3) | static_cast(wire_type); +} + +/** + * The type used for length values, such as the length of a field. + */ +using pbf_length_type = uint32_t; + +#ifdef PROTOZERO_USE_VIEW +using data_view = PROTOZERO_USE_VIEW; +#else + +/** + * Holds a pointer to some data and a length. + * + * This class is supposed to be compatible with the std::string_view + * that will be available in C++17. + */ +class data_view { + + const char* m_data; + std::size_t m_size; + +public: /** - * The type used to encode type information. - * See the table on - * https://developers.google.com/protocol-buffers/docs/encoding + * Default constructor. Construct an empty data_view. */ - enum class pbf_wire_type : uint32_t { - varint = 0, // int32/64, uint32/64, sint32/64, bool, enum - fixed64 = 1, // fixed64, sfixed64, double - length_delimited = 2, // string, bytes, embedded messages, - // packed repeated fields - fixed32 = 5, // fixed32, sfixed32, float - unknown = 99 // used for default setting in this library - }; + constexpr data_view() noexcept + : m_data(nullptr), + m_size(0) { + } /** - * The type used for length values, such as the length of a field. + * Create data_view from pointer and size. + * + * @param ptr Pointer to the data. + * @param length Length of the data. */ - typedef uint32_t pbf_length_type; + constexpr data_view(const char* ptr, std::size_t length) noexcept + : m_data(ptr), + m_size(length) { + } + + /** + * Create data_view from string. + * + * @param str String with the data. + */ + data_view(const std::string& str) noexcept + : m_data(str.data()), + m_size(str.size()) { + } + + /** + * Create data_view from zero-terminated string. + * + * @param ptr Pointer to the data. + */ + data_view(const char* ptr) noexcept + : m_data(ptr), + m_size(std::strlen(ptr)) { + } + + /** + * Swap the contents of this object with the other. + * + * @param other Other object to swap data with. + */ + void swap(data_view& other) noexcept { + using std::swap; + swap(m_data, other.m_data); + swap(m_size, other.m_size); + } + + /// Return pointer to data. + constexpr const char* data() const noexcept { + return m_data; + } + + /// Return length of data in bytes. + constexpr std::size_t size() const noexcept { + return m_size; + } + + /// Returns true if size is 0. + constexpr bool empty() const noexcept { + return m_size == 0; + } + + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + */ + std::string to_string() const { + protozero_assert(m_data); + return std::string{m_data, m_size}; + } + + /** + * Convert data view to string. + * + * @pre Must not be default constructed data_view. + */ + explicit operator std::string() const { + protozero_assert(m_data); + return std::string{m_data, m_size}; + } + +}; // class data_view + +/** + * Swap two data_view objects. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline void swap(data_view& lhs, data_view& rhs) noexcept { + lhs.swap(rhs); +} + +/** + * Two data_view instances are equal if they have the same size and the + * same content. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator==(const data_view& lhs, const data_view& rhs) noexcept { + return lhs.size() == rhs.size() && std::equal(lhs.data(), lhs.data() + lhs.size(), rhs.data()); +} + +/** + * Two data_view instances are not equal if they have different sizes or the + * content differs. + * + * @param lhs First object. + * @param rhs Second object. + */ +inline bool operator!=(const data_view& lhs, const data_view& rhs) noexcept { + return !(lhs == rhs); +} + +#endif + } // end namespace protozero diff --git a/protozero/varint.hpp b/protozero/varint.hpp index 4242df9..d115d5f 100644 --- a/protozero/varint.hpp +++ b/protozero/varint.hpp @@ -23,13 +23,54 @@ documentation. namespace protozero { /** - * The maximum length of a 64bit varint. + * The maximum length of a 64 bit varint. */ constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1; -// from https://github.com/facebook/folly/blob/master/folly/Varint.h +namespace detail { + + // from https://github.com/facebook/folly/blob/master/folly/Varint.h + inline uint64_t decode_varint_impl(const char** data, const char* end) { + const int8_t* begin = reinterpret_cast(*data); + const int8_t* iend = reinterpret_cast(end); + const int8_t* p = begin; + uint64_t val = 0; + + if (iend - begin >= max_varint_length) { // fast path + do { + int64_t b; + b = *p++; val = uint64_t((b & 0x7f) ); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 7); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break; + b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break; + throw varint_too_long_exception(); + } while (false); + } else { + int shift = 0; + while (p != iend && *p < 0) { + val |= uint64_t(*p++ & 0x7f) << shift; + shift += 7; + } + if (p == iend) { + throw end_of_buffer_exception(); + } + val |= uint64_t(*p++) << shift; + } + + *data = reinterpret_cast(p); + return val; + } + +} // end namespace detail + /** - * Decode a 64bit varint. + * Decode a 64 bit varint. * * Strong exception guarantee: if there is an exception the data pointer will * not be changed. @@ -39,55 +80,69 @@ constexpr const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1; * @param[in] end Pointer one past the end of the input data. * @returns The decoded integer * @throws varint_too_long_exception if the varint is longer then the maximum - * length that would fit in a 64bit int. Usually this means your data + * length that would fit in a 64 bit int. Usually this means your data * is corrupted or you are trying to read something as a varint that * isn't. * @throws end_of_buffer_exception if the *end* of the buffer was reached * before the end of the varint. */ inline uint64_t decode_varint(const char** data, const char* end) { - const int8_t* begin = reinterpret_cast(*data); - const int8_t* iend = reinterpret_cast(end); - const int8_t* p = begin; - uint64_t val = 0; - - if (iend - begin >= max_varint_length) { // fast path - do { - int64_t b; - b = *p++; val = uint64_t((b & 0x7f) ); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 7); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break; - b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break; - throw varint_too_long_exception(); - } while (false); - } else { - int shift = 0; - while (p != iend && *p < 0) { - val |= uint64_t(*p++ & 0x7f) << shift; - shift += 7; - } - if (p == iend) { - throw end_of_buffer_exception(); - } - val |= uint64_t(*p++) << shift; + // If this is a one-byte varint, decode it here. + if (end != *data && ((**data & 0x80) == 0)) { + uint64_t val = uint64_t(**data); + ++(*data); + return val; } - - *data = reinterpret_cast(p); - return val; + // If this varint is more than one byte, defer to complete implementation. + return detail::decode_varint_impl(data, end); } /** - * Varint-encode a 64bit integer. + * Skip over a varint. + * + * Strong exception guarantee: if there is an exception the data pointer will + * not be changed. + * + * @param[in,out] data Pointer to pointer to the input data. After the function + * returns this will point to the next data to be read. + * @param[in] end Pointer one past the end of the input data. + * @throws end_of_buffer_exception if the *end* of the buffer was reached + * before the end of the varint. */ -template -inline int write_varint(OutputIterator data, uint64_t value) { - int n=1; +inline void skip_varint(const char** data, const char* end) { + const int8_t* begin = reinterpret_cast(*data); + const int8_t* iend = reinterpret_cast(end); + const int8_t* p = begin; + + while (p != iend && *p < 0) { + ++p; + } + + if (p >= begin + max_varint_length) { + throw varint_too_long_exception(); + } + + if (p == iend) { + throw end_of_buffer_exception(); + } + + ++p; + + *data = reinterpret_cast(p); +} + +/** + * Varint encode a 64 bit integer. + * + * @tparam T An output iterator type. + * @param data Output iterator the varint encoded value will be written to + * byte by byte. + * @param value The integer that will be encoded. + * @throws Any exception thrown by increment or dereference operator on data. + */ +template +inline int write_varint(T data, uint64_t value) { + int n = 1; while (value >= 0x80) { *data++ = char((value & 0x7f) | 0x80); @@ -102,29 +157,29 @@ inline int write_varint(OutputIterator data, uint64_t value) { /** * ZigZag encodes a 32 bit integer. */ -inline uint32_t encode_zigzag32(int32_t value) noexcept { +inline constexpr uint32_t encode_zigzag32(int32_t value) noexcept { return (static_cast(value) << 1) ^ (static_cast(value >> 31)); } /** * ZigZag encodes a 64 bit integer. */ -inline uint64_t encode_zigzag64(int64_t value) noexcept { +inline constexpr uint64_t encode_zigzag64(int64_t value) noexcept { return (static_cast(value) << 1) ^ (static_cast(value >> 63)); } /** * Decodes a 32 bit ZigZag-encoded integer. */ -inline int32_t decode_zigzag32(uint32_t value) noexcept { - return int32_t(value >> 1) ^ -int32_t(value & 1); +inline constexpr int32_t decode_zigzag32(uint32_t value) noexcept { + return static_cast(value >> 1) ^ -static_cast(value & 1); } /** * Decodes a 64 bit ZigZag-encoded integer. */ -inline int64_t decode_zigzag64(uint64_t value) noexcept { - return int64_t(value >> 1) ^ -int64_t(value & 1); +inline constexpr int64_t decode_zigzag64(uint64_t value) noexcept { + return static_cast(value >> 1) ^ -static_cast(value & 1); } } // end namespace protozero diff --git a/protozero/version.hpp b/protozero/version.hpp index 7b60e2e..6d82823 100644 --- a/protozero/version.hpp +++ b/protozero/version.hpp @@ -10,13 +10,25 @@ documentation. *****************************************************************************/ -#define PROTOZERO_VERSION_MAJOR 1 -#define PROTOZERO_VERSION_MINOR 3 -#define PROTOZERO_VERSION_PATCH 0 +/** + * @file version.hpp + * + * @brief Contains macros defining the protozero version. + */ +/// The major version number +#define PROTOZERO_VERSION_MAJOR 1 + +/// The minor version number +#define PROTOZERO_VERSION_MINOR 5 + +/// The patch number +#define PROTOZERO_VERSION_PATCH 2 + +/// The complete version number #define PROTOZERO_VERSION_CODE (PROTOZERO_VERSION_MAJOR * 10000 + PROTOZERO_VERSION_MINOR * 100 + PROTOZERO_VERSION_PATCH) -#define PROTOZERO_VERSION_STRING "1.3.0" - +/// Version number as string +#define PROTOZERO_VERSION_STRING "1.5.2" #endif // PROTOZERO_VERSION_HPP diff --git a/version.hpp b/version.hpp index f6a497f..b8ccbdf 100644 --- a/version.hpp +++ b/version.hpp @@ -1 +1 @@ -#define VERSION "tippecanoe v1.19.2\n" +#define VERSION "tippecanoe v1.19.3\n"