trace_buffer: revise trace buffer implementation

This commit simplifies the current implementation by overloading the length field with a padding indicator in addition to the zero-length head entry. This simplifies the iteration semantics as it eliminates the need for determining whether a zero-length entries is the actual head of the buffer or a padding at the buffer end. genodelabs/genode#4434
2025-06-16 14:18:27 +00:00 · 2022-02-04 16:56:21 +01:00
parent d24552f5e2
commit edc46d15f8
3 changed files with 149 additions and 93 deletions
--- a/repos/base/include/base/trace/buffer.h
+++ b/repos/base/include/base/trace/buffer.h
@ -1,11 +1,95 @@
 /*
- * \brief  Event tracing infrastructure
+ * \brief  Event tracing buffer
 * \author Norman Feske
+ * \author Johannes Schlatow
 * \date   2013-08-09
+ *
+ * The trace buffer is shared between the traced component (producer) and
+ * the trace monitor (consumer). It basically is a lock-free/wait-free
+ * single-producer single-consumer (spsc) ring buffer. There are a couple of
+ * differences to a a standard spsc ring buffer:
+ *
+ *  - If the buffer is full, we want to overwrite the oldest data.
+ *  - We do not care about the consumer as it might not even exist. Hence,
+ *    the tail pointer shall be managed locally by the consumer.
+ *  - The buffer entries have variable length.
+ *
+ * As a consequence of the variable length, the entry length needs to be stored
+ * in each entry. A zero-length entry marks the head of the buffer (the entry
+ * that is written next). Moreover, we may need some padding at the end of the
+ * buffer if the entry does not fit in the remaing space. To distinguish the
+ * padding from the buffer head, it is marked by a length field with a maximum
+ * unsigned value.
+ *
+ * Let's have a look at the layout of a non-full buffer. The zero-length field
+ * marks the head. The consumer can stop reading when it sees this.
+ *
+ * +------------------------+------------+-------------+-----+-----------------+
+ * | len1             data1 | len2 data2 | len3  data3 |  0  | empty           |
+ * +------------------------+------------+-------------+-----+-----------------+
+ *
+ * Now, when the next entry does not fit into the remaining buffer space, it
+ * wraps around and starts at the beginning. The unused space at the end is
+ * padded:
+ *
+ * +------------------------+------------+-------------+-----+-----------------+
+ * | len4 data4 | 0 | empty | len2 data2 | len3  data3 | MAX | padding         |
+ * +------------------------+------------+-------------+-----+-----------------+
+ *
+ * If the consumer detects the padding it skips it and continues at the
+ * beginning. Note, that the padding is not present if there is less than a
+ * length field left at the end of the buffer.
+ *
+ * A potential consumer is supposed read new buffer entries fast enough as,
+ * otherwise, it will miss some entries. We count the buffer wrap arounds to
+ * detect this.
+ *
+ * Unfortunately, we cannot easily ensure that the producer does not overwrite
+ * entries that are currently read out and, even worse, void the consumer's
+ * tail pointer in the process. Also, it cannot be implicitly detected by
+ * looking at the wrapped count. Imagine the consumer stopped in the middle of
+ * the buffer since there are no more entries and resumes reading when the
+ * producer wrapped once and almost caught up with the consumers position. The
+ * consumer sees that the buffer wrapped only once but can still be corruped
+ * by the producer.
+ *
+ * In order to prevent this, we need a way to determine on the producer side
+ * where the consumer currently resides in the buffer and a) prevent that the
+ * producer overwrites these parts of the buffer and b) still be able to
+ * write the most recent buffer entry to some place.
+ *
+ * One way to approach this could be to store the tail pointer in the buffer
+ * and ensure that the head never overtakes the tail. The producer could
+ * continue writing from the start of the buffer instead. A critical corner
+ * case, however, occurs when the tail pointer resides at the very beginning
+ * of the buffer. In this case, newly produced entries must be dropped.
+ *
+ * XXX: What follows is only a sketch of ideas that have not been implemented.
+ *
+ * Another approach is to split the buffer into two equal
+ * partitions. The foreground partition is the one currently written so that
+ * the background partition can be read without memory corruption. When the
+ * foreground partition is full, the producer switches the partitions and starts
+ * overwriting old entries in the former background partition. By locking the
+ * background partition, the consumer makes sure that the producer does not
+ * switch partitions. This way we assure that the head pointer never overtakes
+ * the tail pointer. In case the background partition is locked when the
+ * producer wants to switch partitions, it starts overwriting the foreground
+ * partition. The producer increments a counter for each partition whenever it
+ * overwrites the very first entry. This way the consumer is able to detect if
+ * it lost some events.
+ *
+ * XXX
+ * The consumer is also able to lock the foreground partition so that it does
+ * not need to wait for the producer to fill it and switch partitions. Yet,
+ * it must never lock both partitions as this would stall the producer. We
+ * ensure this making the unlock-background-lock-foreground operation atomic.
+ * In case the consumer crashed when a lock is held, the producer is still able
+ * to use half of the buffer.
 */

 /*
- * Copyright (C) 2013-2017 Genode Labs GmbH
+ * Copyright (C) 2013-2022 Genode Labs GmbH
 *
 * This file is part of the Genode OS framework, which is distributed
 * under the terms of the GNU Affero General Public License version 3.
@ -27,14 +111,25 @@ class Genode::Trace::Buffer
 {
 	private:

-		size_t   volatile _head_offset;  /* in bytes, relative to 'entries' */
-		size_t   volatile _size;         /* in bytes */
 		unsigned volatile _wrapped;      /* count of buffer wraps */

+		size_t            _head_offset;  /* in bytes, relative to 'entries' */
+		size_t            _size;         /* in bytes */
+
 		struct _Entry
 		{
+			enum Type : size_t {
+				HEAD     = 0,
+				PADDING  = ~(size_t)0
+			};
+
 			size_t len;
 			char   data[0];
+
+			void mark(Type t)    { len = t; }
+
+			bool head()    const { return len == HEAD; }
+			bool padding() const { return len == PADDING; }
 		};

 		_Entry _entries[0];
@ -46,8 +141,8 @@ class Genode::Trace::Buffer
 			_head_offset = 0;
 			_wrapped++;

-			/* mark first entry with len 0 */
-			_head_entry()->len = 0;
+			/* mark first entry as head */
+			_head_entry()->mark(_Entry::HEAD);
 		}

 		/*
@ -78,9 +173,9 @@ class Genode::Trace::Buffer
 			if (_head_offset + sizeof(_Entry) + len <= _size)
 				return _head_entry()->data;

-			/* mark last entry with len 0 and wrap */
+			/* mark unused space as padding */
 			if (_head_offset + sizeof(_Entry) <= _size)
-				_head_entry()->len = 0;
+				_head_entry()->mark(_Entry::PADDING);

 			_buffer_wrapped();

@ -104,9 +199,9 @@ class Genode::Trace::Buffer
 			if (_head_offset + sizeof(_Entry) > _size)
 				_buffer_wrapped();

-			/* mark entry next to new entry with len 0 */
+			/* mark entry next to new entry as head */
 			else if (_head_offset + sizeof(_Entry) <= _size)
-				_head_entry()->len = 0;
+				_head_entry()->mark(_Entry::HEAD);

 			*old_head_len = len;
 		}
@ -121,62 +216,48 @@ class Genode::Trace::Buffer
 		class Entry
 		{
 			private:
-
 				_Entry const *_entry;

 				friend class Buffer;

 				Entry(_Entry const *entry) : _entry(entry) { }

-			public:
+				/* entry is padding */
+				bool _padding()  const { return _entry->padding(); }

-				Entry() : _entry(0) { };
+			public:

 				size_t      length() const { return _entry->len; }
 				char const *data()   const { return _entry->data; }

-				/*
-				 * XXX The meaning of this method is irritating.
-				 *
-				 * If it returns 'true', it means either that the entry marks
-				 * the empty padding after the entry with the highest memory
-				 * address or that it actually marks the end of the buffer
-				 * according to commit order. This is an example state of the
-				 * buffer with the two types of "last" entry:
-				 *
-				 * +-------------+------------+---+---------+-------------+------------+---+-------+
-				 * | len3  data3 | len4 data4 | 0 | empty   | len1  data1 | len2 data2 | 0 | empty |
-				 * +-------------+------------+---+---------+-------------+------------+---+-------+
-				 *
-				 * If the entry with the highest memory address fits
-				 * perfectly, the first type of "last" entry is not needed:
-				 *
-				 * +------------+--------------------+---+-------+-------------+-------------------+
-				 * | len3 data3 | len4         data4 | 0 | empty | len1  data1 | len2        data2 |
-				 * +------------+--------------------+---+-------+-------------+-------------------+
-				 *
-				 * If the buffer didn't wrap so far, there is only one "last"
-				 * entry that has both meanings:
-				 *
-				 * +--------------------------+------------+-------------+---+---------------------+
-				 * | len1               data1 | len2 data2 | len3  data3 | 0 | empty               |
-				 * +--------------------------+------------+-------------+---+---------------------+
-				 */
-				bool last() const { return _entry == 0; }
+				/* return whether entry is valid, i.e. length field is present */
+				bool last()     const { return _entry == 0; }
+
+				/* return whether data field is invalid */
+				bool empty()    const { return last() || _padding() || _entry->head(); }
+
+				/* entry is head (zero length) */
+				bool head()     const { return !last() && _entry->head(); }
 		};

-		Entry first() const
-		{
-			return _entries->len ? Entry(_entries) : Entry(0);
-		}
+		/* Return the very first entry at the start of the buffer. */
+		Entry first() const { return Entry(_entries); }

+		/**
+		 * Return the entry that follows the given entry.
+		 * Returns an invalid entry if the end of the (used) buffer was reached.
+		 * Stops at the head of the buffer.
+		 *
+		 * The reader must check before on a valid entry whether it is the head
+		 * of the buffer (not yet written).
+		 */
 		Entry next(Entry entry) const
 		{
-			if (entry.last())
+			if (entry.last() || entry._padding())
 				return Entry(0);

-			if (entry.length() == 0)
-				return Entry(0);
+			if (entry.head())
+				return entry;

 			addr_t const offset = (addr_t)entry.data() - (addr_t)_entries;
 			if (offset + entry.length() + sizeof(_Entry) > _size)
--- a/repos/os/include/trace/trace_buffer.h
+++ b/repos/os/include/trace/trace_buffer.h
@ -46,62 +46,37 @@ class Trace_buffer
 				if ((_buffer.wrapped() - 1) != _wrapped_count) {
 					warning("buffer wrapped multiple times; you might want to raise buffer size; curr_count=",
 					        _buffer.wrapped(), " last_count=", _wrapped_count);
+					_curr = _buffer.first();
 				}
 				_wrapped_count = (unsigned)_buffer.wrapped();
 			}

-			Trace::Buffer::Entry new_curr { _curr };
 			Trace::Buffer::Entry entry    { _curr };

 			/**
-			 * If '_curr' is marked 'last' (i.e., the entry pointer it contains
-			 * is invalid), either this method wasn't called before on this
-			 * buffer or the buffer was empty on all previous calls (note that
-			 * '_curr' is only updated with valid entry pointers by this
-			 * method). In this case, we start with the first entry (if any).
+			 * Iterate over all entries that were not processed yet.
 			 *
-			 * If '_curr' is not marked 'last' it points to the last processed
-			 * entry and we proceed with the, so far unprocessed entry next to
-			 * it (if any). Note that in this case, the entry behind '_curr'
-			 * might have got overridden since the last call to this method
-			 * because the buffer wrapped and oustripped the entry consumer.
-			 * This problem is well known and should be avoided by choosing a
-			 * large enough buffer.
+			 * A note on terminology: The head of the buffer marks the write
+			 * position. The first entry is the one that starts at the lowest
+			 * memory address. The next entry returns an invalid entry called
+			 * if the 'last' end of the buffer (highest address) was reached.
 			 */
-			if (entry.last())
-				entry = _buffer.first();
-			else
-				entry = _buffer.next(entry);
-
-			/* iterate over all entries that were not processed yet */
-			for (; wrapped || !entry.last(); entry = _buffer.next(entry)) {
-				/* if buffer wrapped, we pass the last entry once and continue at first entry */
-				bool applied_wrap = false;
-				if (wrapped && entry.last()) {
-					wrapped = false;
-					applied_wrap = true;
+			for (; !entry.head(); entry = _buffer.next(entry)) {
+				/* continue at first entry if we hit the end of the buffer */
+				if (entry.last())
 					entry = _buffer.first();
-					if (entry.last()) {
-						new_curr = entry;
-						break;
-					}
-				}

-				if (!entry.length()) {
+				/* skip empty entries */
+				if (entry.empty())
 					continue;
-				}

-				if (!functor(entry)) {
-					if (applied_wrap)
-						new_curr = Trace::Buffer::Entry();
+				/* functor may return false to continue processing later on */
+				if (!functor(entry))
 					break;
-				}
-
-				new_curr = entry;
 			}

-			/* remember the last processed entry in _curr */
-			if (update) _curr = new_curr;
+			/* remember the next to be processed entry in _curr */
+			if (update) _curr = entry;
 		}

 		void * address()        const { return &_buffer; }
--- a/repos/os/src/test/trace/main.cc
+++ b/repos/os/src/test/trace/main.cc
@ -105,17 +105,17 @@ class Trace_buffer_monitor
 			log("overflows: ", _buffer->wrapped());
 			log("read all remaining events");

-			for (; !_curr_entry.last(); _curr_entry = _buffer->next(_curr_entry)) {
+			for (; !_curr_entry.head(); _curr_entry = _buffer->next(_curr_entry)) {
+				if (_curr_entry.last())
+					_curr_entry = _buffer->first();
+
 				/* omit empty entries */
-				if (_curr_entry.length() == 0)
+				if (_curr_entry.empty())
 					continue;

 				char const * const data = _terminate_entry(_curr_entry);
 				if (data) { log(data); }
 			}
-
-			/* reset after we read all available entries */
-			_curr_entry = _buffer->first();
 		}
 };