Blame - aos/ipc_lib/lockless_queue.cc - RealtimeRoboticsGroup/test

blob: c2b0254cac40664452889335b5cbd7b91c6c28f9 [file] [log] [blame]

Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	1	#include "aos/ipc_lib/lockless_queue.h"
				2
				3	#include <linux/futex.h>
				4	#include <sys/types.h>
				5	#include <syscall.h>
				6	#include <unistd.h>
				7	#include <algorithm>
				8	#include <iomanip>
				9	#include <iostream>
				10	#include <sstream>
				11
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	12	#include "aos/ipc_lib/lockless_queue_memory.h"
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	13	#include "aos/realtime.h"
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	14	#include "aos/util/compiler_memory_barrier.h"
Austin Schuh	f257f3c	2019-10-27 21:00:43 -0700	[diff] [blame]	15	#include "glog/logging.h"
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	16
				17	namespace aos {
				18	namespace ipc_lib {
				19
				20	namespace {
				21
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	22	void GrabQueueSetupLockOrDie(LocklessQueueMemory *memory) {
				23	const int result = mutex_grab(&(memory->queue_setup_lock));
				24	CHECK(result == 0 \|\| result == 1);
				25	}
				26
				27	// This must be called under the queue_setup_lock.
				28	void Cleanup(LocklessQueueMemory *memory) {
				29	const size_t num_senders = memory->num_senders();
				30	const size_t queue_size = memory->queue_size();
				31	const size_t num_messages = memory->num_messages();
				32
				33	// There are a large number of crazy cases here for how things can go wrong
				34	// and how we have to recover. They either require us to keep extra track of
				35	// what is going on, slowing down the send path, or require a large number of
				36	// cases.
				37	//
				38	// The solution here is to not over-think it. This is running while not real
				39	// time during construction. It is allowed to be slow. It will also very
				40	// rarely trigger. There is a small uS window where process death is
				41	// ambiguous.
				42	//
				43	// So, build up a list N long, where N is the number of messages. Search
				44	// through the entire queue and the sender list (ignoring any dead senders),
				45	// and mark down which ones we have seen. Once we have seen all the messages
				46	// except the N dead senders, we know which messages are dead. Because the
				47	// queue is active while we do this, it may take a couple of go arounds to see
				48	// everything.
				49
				50	// Do the easy case. Find all senders who have died. See if they are either
				51	// consistent already, or if they have copied over to_replace to the scratch
				52	// index, but haven't cleared to_replace. Count them.
				53	size_t valid_senders = 0;
				54	for (size_t i = 0; i < num_senders; ++i) {
				55	Sender *sender = memory->GetSender(i);
				56	const uint32_t tid =
				57	__atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
				58	if (tid & FUTEX_OWNER_DIED) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	59	VLOG(3) << "Found an easy death for sender " << i;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	60	const Index to_replace = sender->to_replace.RelaxedLoad();
				61	const Index scratch_index = sender->scratch_index.Load();
				62
				63	// I find it easiest to think about this in terms of the set of observable
				64	// states. The main code follows the following states:
				65
				66	// 1) scratch_index = xxx
				67	// to_replace = invalid
				68	// This is unambiguous. Already good.
				69
				70	// 2) scratch_index = xxx
				71	// to_replace = yyy
				72	// Very ambiguous. Is xxx or yyy the correct one? Need to either roll
				73	// this forwards or backwards.
				74
				75	// 3) scratch_index = yyy
				76	// to_replace = yyy
				77	// We are in the act of moving to_replace to scratch_index, but didn't
				78	// finish. Easy.
				79
				80	// 4) scratch_index = yyy
				81	// to_replace = invalid
				82	// Finished, but died. Looks like 1)
				83
				84	// Any cleanup code needs to follow the same set of states to be robust to
				85	// death, so death can be restarted.
				86
				87	// Could be 2) or 3).
				88	if (to_replace.valid()) {
				89	// 3)
				90	if (to_replace == scratch_index) {
				91	// Just need to invalidate to_replace to finish.
				92	sender->to_replace.Invalidate();
				93
				94	// And mark that we succeeded.
				95	__atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
				96	++valid_senders;
				97	}
				98	} else {
				99	// 1) or 4). Make sure we aren't corrupted and declare victory.
				100	CHECK(scratch_index.valid());
				101
				102	__atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
				103	++valid_senders;
				104	}
				105	} else {
				106	// Not dead.
				107	++valid_senders;
				108	}
				109	}
				110
				111	// If all the senders are (or were made) good, there is no need to do the hard
				112	// case.
				113	if (valid_senders == num_senders) {
				114	return;
				115	}
				116
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	117	VLOG(3) << "Starting hard cleanup";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	118
				119	size_t num_accounted_for = 0;
				120	size_t num_missing = 0;
				121	::std::vector<bool> accounted_for(num_messages, false);
				122
				123	while ((num_accounted_for + num_missing) != num_messages) {
				124	num_missing = 0;
				125	for (size_t i = 0; i < num_senders; ++i) {
				126	Sender *sender = memory->GetSender(i);
				127	const uint32_t tid =
				128	__atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
				129	if (tid & FUTEX_OWNER_DIED) {
				130	++num_missing;
				131	} else {
				132	const Index scratch_index = sender->scratch_index.RelaxedLoad();
				133	if (!accounted_for[scratch_index.message_index()]) {
				134	++num_accounted_for;
				135	}
				136	accounted_for[scratch_index.message_index()] = true;
				137	}
				138	}
				139
				140	for (size_t i = 0; i < queue_size; ++i) {
				141	const Index index = memory->GetQueue(i)->RelaxedLoad();
				142	if (!accounted_for[index.message_index()]) {
				143	++num_accounted_for;
				144	}
				145	accounted_for[index.message_index()] = true;
				146	}
				147	}
				148
				149	while (num_missing != 0) {
				150	const size_t starting_num_missing = num_missing;
				151	for (size_t i = 0; i < num_senders; ++i) {
				152	Sender *sender = memory->GetSender(i);
				153	const uint32_t tid =
				154	__atomic_load_n(&(sender->tid.futex), __ATOMIC_RELAXED);
				155	if (tid & FUTEX_OWNER_DIED) {
				156	const Index scratch_index = sender->scratch_index.RelaxedLoad();
				157	const Index to_replace = sender->to_replace.RelaxedLoad();
				158
				159	// Candidate.
				160	CHECK_LE(to_replace.message_index(), accounted_for.size());
				161	if (accounted_for[to_replace.message_index()]) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	162	VLOG(3) << "Sender " << i
				163	<< " died, to_replace is already accounted for";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	164	// If both are accounted for, we are corrupt...
				165	CHECK(!accounted_for[scratch_index.message_index()]);
				166
				167	// to_replace is already accounted for. This means that we didn't
				168	// atomically insert scratch_index into the queue yet. So
				169	// invalidate to_replace.
				170	sender->to_replace.Invalidate();
				171
				172	// And then mark this sender clean.
				173	__atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
				174
				175	// And account for scratch_index.
				176	accounted_for[scratch_index.message_index()] = true;
				177	--num_missing;
				178	++num_accounted_for;
				179	} else if (accounted_for[scratch_index.message_index()]) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	180	VLOG(3) << "Sender " << i
				181	<< " died, scratch_index is already accounted for";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	182	// scratch_index is accounted for. That means we did the insert,
				183	// but didn't record it.
				184	CHECK(to_replace.valid());
				185	// Finish the transaction. Copy to_replace, then clear it.
				186
				187	sender->scratch_index.Store(to_replace);
				188	sender->to_replace.Invalidate();
				189
				190	// And then mark this sender clean.
				191	__atomic_store_n(&(sender->tid.futex), 0, __ATOMIC_SEQ_CST);
				192
				193	// And account for to_replace.
				194	accounted_for[to_replace.message_index()] = true;
				195	--num_missing;
				196	++num_accounted_for;
				197	} else {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	198	VLOG(3) << "Sender " << i << " died, neither is accounted for";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	199	// Ambiguous. There will be an unambiguous one somewhere that we
				200	// can do first.
				201	}
				202	}
				203	}
				204	// CHECK that we are making progress.
				205	CHECK_NE(num_missing, starting_num_missing);
				206	}
				207	}
				208
				209	// Exposes rt_tgsigqueueinfo so we can send the signal just to the target
				210	// thread.
				211	int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t *si) {
				212	return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, si);
				213	}
				214
				215	} // namespace
				216
				217	size_t LocklessQueueMemorySize(LocklessQueueConfiguration config) {
				218	// Round up the message size so following data is double aligned. That should
				219	// be overkill for most platforms. And the checks below confirms it.
				220	config.message_data_size = (config.message_data_size + 7) & ~0x7;
				221
				222	// As we build up the size, confirm that everything is aligned to the
				223	// alignment requirements of the type.
				224	size_t size = sizeof(LocklessQueueMemory);
				225	CHECK_EQ(size & (alignof(LocklessQueueMemory) - 1), 0u);
				226
				227	CHECK_EQ(size & (alignof(AtomicIndex) - 1), 0u);
				228	size += LocklessQueueMemory::SizeOfQueue(config);
				229
				230	CHECK_EQ(size & (alignof(Message) - 1), 0u);
				231	size += LocklessQueueMemory::SizeOfMessages(config);
				232
				233	CHECK_EQ(size & (alignof(Watcher) - 1), 0u);
				234	size += LocklessQueueMemory::SizeOfWatchers(config);
				235
				236	CHECK_EQ(size & (alignof(Sender) - 1), 0u);
				237	size += LocklessQueueMemory::SizeOfSenders(config);
				238
				239	return size;
				240	}
				241
				242	LocklessQueueMemory *InitializeLocklessQueueMemory(
				243	LocklessQueueMemory *memory, LocklessQueueConfiguration config) {
				244	// Everything should be zero initialized already. So we just need to fill
				245	// everything out properly.
				246
				247	// Grab the mutex. We don't care if the previous reader died. We are going
				248	// to check everything anyways.
				249	GrabQueueSetupLockOrDie(memory);
				250
				251	if (!memory->initialized) {
				252	// TODO(austin): Check these for out of bounds.
				253	memory->config.num_watchers = config.num_watchers;
				254	memory->config.num_senders = config.num_senders;
				255	memory->config.queue_size = config.queue_size;
				256	// Round up to the nearest double word bytes.
				257	memory->config.message_data_size = (config.message_data_size + 7) & ~0x7;
				258
				259	const size_t num_messages = memory->num_messages();
				260	// There need to be at most MaxMessages() messages allocated.
				261	CHECK_LE(num_messages, Index::MaxMessages());
				262
				263	for (size_t i = 0; i < num_messages; ++i) {
				264	memory->GetMessage(Index(QueueIndex::Zero(memory->queue_size()), i))
				265	->header.queue_index.Invalidate();
				266	}
				267
				268	for (size_t i = 0; i < memory->queue_size(); ++i) {
				269	// Make the initial counter be the furthest away number. That means that
				270	// index 0 should be 0xffff, 1 should be 0, etc.
				271	memory->GetQueue(i)->Store(Index(QueueIndex::Zero(memory->queue_size())
				272	.IncrementBy(i)
				273	.DecrementBy(memory->queue_size()),
				274	i));
				275	}
				276
				277	memory->next_queue_index.Invalidate();
				278
				279	for (size_t i = 0; i < memory->num_senders(); ++i) {
				280	::aos::ipc_lib::Sender *s = memory->GetSender(i);
				281	s->scratch_index.Store(Index(0xffff, i + memory->queue_size()));
				282	s->to_replace.RelaxedInvalidate();
				283	}
				284
				285	// Signal everything is done. This needs to be done last, so if we die, we
				286	// redo initialization.
				287	// This is a full atomic (probably overkill), but this is at initialization
				288	// time, so it is cheap.
				289	memory->initialized.store(true);
				290	}
				291
				292	mutex_unlock(&(memory->queue_setup_lock));
				293	return memory;
				294	}
				295
				296	LocklessQueue::LocklessQueue(LocklessQueueMemory *memory,
				297	LocklessQueueConfiguration config)
				298	: memory_(InitializeLocklessQueueMemory(memory, config)),
				299	watcher_copy_(memory_->num_watchers()),
				300	pid_(getpid()),
				301	uid_(getuid()) {}
				302
				303	LocklessQueue::~LocklessQueue() {
				304	CHECK_EQ(watcher_index_, -1);
				305
				306	GrabQueueSetupLockOrDie(memory_);
				307	const int num_watchers = memory_->num_watchers();
				308	// Cleanup is cheap. Go for it anyways.
				309
				310	// And confirm that nothing is owned by us.
				311	for (int i = 0; i < num_watchers; ++i) {
				312	CHECK(!mutex_islocked(&(memory_->GetWatcher(i)->tid)));
				313	}
				314	mutex_unlock(&(memory_->queue_setup_lock));
				315	}
				316
				317	size_t LocklessQueue::QueueSize() const { return memory_->queue_size(); }
				318
				319	bool LocklessQueue::RegisterWakeup(int priority) {
				320	// TODO(austin): Make sure signal coalescing is turned on. We don't need
				321	// duplicates. That will improve performance under high load.
				322
				323	// Since everything is self consistent, all we need to do is make sure nobody
				324	// else is running. Someone dying will get caught in the generic consistency
				325	// check.
				326	GrabQueueSetupLockOrDie(memory_);
				327	const int num_watchers = memory_->num_watchers();
				328
				329	// Now, find the first empty watcher and grab it.
				330	CHECK_EQ(watcher_index_, -1);
				331	for (int i = 0; i < num_watchers; ++i) {
				332	const uint32_t tid =
				333	__atomic_load_n(&(memory_->GetWatcher(i)->tid.futex), __ATOMIC_RELAXED);
				334	if (tid == 0 \|\| tid & FUTEX_OWNER_DIED) {
				335	watcher_index_ = i;
				336	break;
				337	}
				338	}
				339
				340	// Bail if we failed to find an open slot.
				341	if (watcher_index_ == -1) {
				342	mutex_unlock(&(memory_->queue_setup_lock));
				343	return false;
				344	}
				345
				346	Watcher *w = memory_->GetWatcher(watcher_index_);
				347
				348	w->pid = getpid();
				349	w->priority = priority;
				350
				351	// Grabbing a mutex is a compiler and memory barrier, so nothing before will
				352	// get rearranged afterwords.
				353	//
				354	// Since everything is done under the queue_setup_lock, this should always
				355	// return immediately.
				356	const int result = mutex_grab(&(w->tid));
				357
				358	mutex_unlock(&(memory_->queue_setup_lock));
				359
				360	// We should either get the lock, or the previous owner should have died.
				361	// Anything else is a pretty serious error.
				362	return result == 0 \|\| result == 1;
				363	}
				364
				365	void LocklessQueue::UnregisterWakeup() {
				366	// Since everything is self consistent, all we need to do is make sure nobody
				367	// else is running. Someone dying will get caught in the generic consistency
				368	// check.
				369	GrabQueueSetupLockOrDie(memory_);
				370
				371	// Make sure we are registered.
				372	CHECK_NE(watcher_index_, -1);
				373
				374	// Make sure we still own the slot we are supposed to.
				375	CHECK(mutex_islocked(&(memory_->GetWatcher(watcher_index_)->tid)));
				376
				377	// The act of unlocking invalidates the entry. Invalidate it.
				378	mutex_unlock(&(memory_->GetWatcher(watcher_index_)->tid));
				379	// And internally forget the slot.
				380	watcher_index_ = -1;
				381
				382	mutex_unlock(&(memory_->queue_setup_lock));
				383	}
				384
				385	int LocklessQueue::Wakeup(const int current_priority) {
				386	const size_t num_watchers = memory_->num_watchers();
				387
				388	CHECK_EQ(watcher_copy_.size(), num_watchers);
				389
				390	// Grab a copy so it won't change out from underneath us, and we can sort it
				391	// nicely in C++.
				392	// Do note that there is still a window where the process can die after we
				393	// read everything. We will still PI boost and send a signal to the thread in
				394	// question. There is no way without pidfd's to close this window, and
				395	// creating a pidfd is likely not RT.
				396	for (size_t i = 0; i < num_watchers; ++i) {
				397	Watcher *w = memory_->GetWatcher(i);
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	398	// Start by reading the tid. This needs to be atomic to force it to come
				399	// first.
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	400	watcher_copy_[i].tid = __atomic_load_n(&(w->tid.futex), __ATOMIC_SEQ_CST);
				401	watcher_copy_[i].pid = w->pid;
				402	watcher_copy_[i].priority = w->priority;
				403
				404	// Use a priority of -1 to mean an invalid entry to make sorting easier.
				405	if (watcher_copy_[i].tid & FUTEX_OWNER_DIED \|\| watcher_copy_[i].tid == 0) {
				406	watcher_copy_[i].priority = -1;
				407	} else if (watcher_copy_[i].tid !=
				408	static_cast<pid_t>(
				409	__atomic_load_n(&(w->tid.futex), __ATOMIC_SEQ_CST))) {
				410	// Confirm that the watcher hasn't been re-used and modified while we read
				411	// it. If it has, mark it invalid again.
				412	watcher_copy_[i].priority = -1;
				413	watcher_copy_[i].tid = 0;
				414	}
				415	}
				416
				417	// Now sort.
				418	::std::sort(watcher_copy_.begin(), watcher_copy_.end(),
				419	[](const WatcherCopy &a, const WatcherCopy &b) {
				420	return a.priority > b.priority;
				421	});
				422
				423	int count = 0;
				424	if (watcher_copy_[0].priority != -1) {
				425	const int max_priority =
				426	::std::max(current_priority, watcher_copy_[0].priority);
				427	// Boost if we are RT and there is a higher priority sender out there.
				428	// Otherwise we might run into priority inversions.
				429	if (max_priority > current_priority && current_priority > 0) {
				430	SetCurrentThreadRealtimePriority(max_priority);
				431	}
				432
				433	// Build up the siginfo to send.
				434	siginfo_t uinfo;
				435	memset(&uinfo, 0, sizeof(uinfo));
				436
				437	uinfo.si_code = SI_QUEUE;
				438	uinfo.si_pid = pid_;
				439	uinfo.si_uid = uid_;
				440	uinfo.si_value.sival_int = 0;
				441
				442	for (const WatcherCopy &watcher_copy : watcher_copy_) {
				443	// The first -1 priority means we are at the end of the valid list.
				444	if (watcher_copy.priority == -1) {
				445	break;
				446	}
				447
				448	// Send the signal. Target just the thread that sent it so that we can
				449	// support multiple watchers in a process (when someone creates multiple
				450	// event loops in different threads).
				451	rt_tgsigqueueinfo(watcher_copy.pid, watcher_copy.tid, kWakeupSignal,
				452	&uinfo);
				453
				454	++count;
				455	}
				456
				457	// Drop back down if we were boosted.
				458	if (max_priority > current_priority && current_priority > 0) {
				459	SetCurrentThreadRealtimePriority(current_priority);
				460	}
				461	}
				462
				463	return count;
				464	}
				465
				466	LocklessQueue::Sender::Sender(LocklessQueueMemory *memory) : memory_(memory) {
				467	GrabQueueSetupLockOrDie(memory_);
				468
				469	// Since we already have the lock, go ahead and try cleaning up.
				470	Cleanup(memory_);
				471
				472	const int num_senders = memory_->num_senders();
				473
				474	for (int i = 0; i < num_senders; ++i) {
				475	::aos::ipc_lib::Sender *s = memory->GetSender(i);
				476	const uint32_t tid = __atomic_load_n(&(s->tid.futex), __ATOMIC_RELAXED);
				477	if (tid == 0) {
				478	sender_index_ = i;
				479	break;
				480	}
				481	}
				482
				483	if (sender_index_ == -1) {
Austin Schuh	f257f3c	2019-10-27 21:00:43 -0700	[diff] [blame]	484	LOG(FATAL) << "Too many senders";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	485	}
				486
				487	::aos::ipc_lib::Sender *s = memory_->GetSender(sender_index_);
				488
				489	// Atomically grab the mutex. This signals that we are alive. If the
				490	// previous owner died, we don't care, and want to grab the mutex anyways.
				491	const int result = mutex_grab(&(s->tid));
				492	CHECK(result == 0 \|\| result == 1);
				493
				494	mutex_unlock(&(memory->queue_setup_lock));
				495	}
				496
				497	LocklessQueue::Sender::~Sender() {
				498	if (memory_ != nullptr) {
				499	mutex_unlock(&(memory_->GetSender(sender_index_)->tid));
				500	}
				501	}
				502
				503	LocklessQueue::Sender LocklessQueue::MakeSender() {
				504	return LocklessQueue::Sender(memory_);
				505	}
				506
				507	QueueIndex ZeroOrValid(QueueIndex index) {
				508	if (!index.valid()) {
				509	return index.Clear();
				510	}
				511	return index;
				512	}
				513
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	514	size_t LocklessQueue::Sender::size() { return memory_->message_data_size(); }
				515
				516	void *LocklessQueue::Sender::Data() {
				517	::aos::ipc_lib::Sender *sender = memory_->GetSender(sender_index_);
				518	Index scratch_index = sender->scratch_index.RelaxedLoad();
				519	Message *message = memory_->GetMessage(scratch_index);
				520	message->header.queue_index.Invalidate();
				521
				522	return &message->data[0];
				523	}
				524
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	525	void LocklessQueue::Sender::Send(const char *data, size_t length) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	526	CHECK_LE(length, size());
				527	memcpy(Data(), data, length);
				528	Send(length);
				529	}
				530
				531	void LocklessQueue::Sender::Send(size_t length) {
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	532	const size_t queue_size = memory_->queue_size();
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	533	CHECK_LE(length, size());
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	534
				535	::aos::ipc_lib::Sender *sender = memory_->GetSender(sender_index_);
				536	Index scratch_index = sender->scratch_index.RelaxedLoad();
				537	Message *message = memory_->GetMessage(scratch_index);
				538
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	539	message->header.length = length;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	540
				541	while (true) {
				542	const QueueIndex actual_next_queue_index =
				543	memory_->next_queue_index.Load(queue_size);
				544	const QueueIndex next_queue_index = ZeroOrValid(actual_next_queue_index);
				545
				546	const QueueIndex incremented_queue_index = next_queue_index.Increment();
				547
				548	// TODO(austin): I think we can drop the barrier off this.
				549	const Index to_replace = memory_->LoadIndex(next_queue_index);
				550
				551	const QueueIndex decremented_queue_index =
				552	next_queue_index.DecrementBy(queue_size);
				553
				554	// See if we got beat. If we did, try to atomically update
				555	// next_queue_index in case the previous writer failed and retry.
				556	if (!to_replace.IsPlausible(decremented_queue_index)) {
				557	// We don't care about the result. It will either succeed, or we got
				558	// beat in fixing it and just need to give up and try again. If we got
				559	// beat multiple times, the only way progress can be made is if the queue
				560	// is updated as well. This means that if we retry reading
				561	// next_queue_index, we will be at most off by one and can retry.
				562	//
				563	// Both require no further action from us.
				564	//
				565	// TODO(austin): If we are having fairness issues under contention, we
				566	// could have a mode bit in next_queue_index, and could use a lock or some
				567	// other form of PI boosting to let the higher priority task win.
				568	memory_->next_queue_index.CompareAndExchangeStrong(
				569	actual_next_queue_index, incremented_queue_index);
				570
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	571	VLOG(3) << "We were beat. Try again. Was " << std::hex
				572	<< to_replace.get() << ", is " << decremented_queue_index.index();
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	573	continue;
				574	}
				575
				576	// Confirm that the message is what it should be.
				577	{
				578	// We just need this to be atomic and after the index has been calculated
				579	// and before we exchange the index back in. Both of those will be strong
				580	// barriers, so this is fine.
				581	const QueueIndex previous_index =
				582	memory_->GetMessage(to_replace)
				583	->header.queue_index.RelaxedLoad(queue_size);
				584	if (previous_index != decremented_queue_index && previous_index.valid()) {
				585	// Retry.
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	586	VLOG(3) << "Something fishy happened, queue index doesn't match. "
				587	"Retrying. Previous index was "
				588	<< std::hex << previous_index.index() << ", should be "
				589	<< decremented_queue_index.index();
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	590	continue;
				591	}
				592	}
				593
				594	message->header.monotonic_sent_time = ::aos::monotonic_clock::now();
				595	message->header.realtime_sent_time = ::aos::realtime_clock::now();
				596
				597	// Before we are fully done filling out the message, update the Sender state
				598	// with the new index to write. This re-uses the barrier for the
				599	// queue_index store.
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	600	const Index index_to_write(next_queue_index, scratch_index.message_index());
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	601
				602	sender->scratch_index.RelaxedStore(index_to_write);
				603
				604	message->header.queue_index.Store(next_queue_index);
				605
				606	// The message is now filled out, and we have a confirmed slot to store
				607	// into.
				608	//
				609	// Start by writing down what we are going to pull out of the queue. This
				610	// was Invalid before now.
				611	sender->to_replace.RelaxedStore(to_replace);
				612
				613	// Then exchange the next index into the queue.
				614	if (!memory_->GetQueue(next_queue_index.Wrapped())
				615	->CompareAndExchangeStrong(to_replace, index_to_write)) {
				616	// Aw, didn't succeed. Retry.
				617	sender->to_replace.RelaxedInvalidate();
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	618	VLOG(3) << "Failed to wrap into queue";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	619	continue;
				620	}
				621
				622	// Then update next_queue_index to save the next user some computation time.
				623	memory_->next_queue_index.CompareAndExchangeStrong(actual_next_queue_index,
				624	incremented_queue_index);
				625
				626	// Now update the scratch space and record that we succeeded.
				627	sender->scratch_index.Store(to_replace);
				628	// And then clear out the entry used to replace. This just needs to be
				629	// atomic. It can't be moved above the store because that is a full
				630	// barrier, but delaying it until later will only affect things if something
				631	// died.
				632	sender->to_replace.RelaxedInvalidate();
				633	break;
				634	}
				635	}
				636
				637	LocklessQueue::ReadResult LocklessQueue::Read(
				638	uint32_t uint32_queue_index,
				639	::aos::monotonic_clock::time_point *monotonic_sent_time,
				640	::aos::realtime_clock::time_point realtime_sent_time, size_t length,
				641	char *data) {
				642	const size_t queue_size = memory_->queue_size();
				643
				644	// Build up the QueueIndex.
				645	const QueueIndex queue_index =
				646	QueueIndex::Zero(queue_size).IncrementBy(uint32_queue_index);
				647
				648	// Read the message stored at the requested location.
				649	Index mi = memory_->LoadIndex(queue_index);
				650	Message *m = memory_->GetMessage(mi);
				651
				652	while (true) {
				653	// We need to confirm that the data doesn't change while we are reading it.
				654	// Do that by first confirming that the message points to the queue index we
				655	// want.
				656	const QueueIndex starting_queue_index =
				657	m->header.queue_index.Load(queue_size);
				658	if (starting_queue_index != queue_index) {
				659	// If we found a message that is exactly 1 loop old, we just wrapped.
				660	if (starting_queue_index == queue_index.DecrementBy(queue_size)) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	661	VLOG(3) << "Matches: " << std::hex << starting_queue_index.index()
				662	<< ", " << queue_index.DecrementBy(queue_size).index();
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	663	return ReadResult::NOTHING_NEW;
				664	} else {
				665	// Someone has re-used this message between when we pulled it out of the
				666	// queue and when we grabbed its index. It is pretty hard to deduce
				667	// what happened. Just try again.
				668	Message *new_m = memory_->GetMessage(queue_index);
				669	if (m != new_m) {
				670	m = new_m;
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	671	VLOG(3) << "Retrying, m doesn't match";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	672	continue;
				673	}
				674
				675	// We have confirmed that message still points to the same message. This
				676	// means that the message didn't get swapped out from under us, so
				677	// starting_queue_index is correct.
				678	//
				679	// Either we got too far behind (signaled by this being a valid
				680	// message), or this is one of the initial messages which are invalid.
				681	if (starting_queue_index.valid()) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	682	VLOG(3) << "Too old. Tried for " << std::hex << queue_index.index()
				683	<< ", got " << starting_queue_index.index() << ", behind by "
				684	<< std::dec
				685	<< (starting_queue_index.index() - queue_index.index());
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	686	return ReadResult::TOO_OLD;
				687	}
				688
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	689	VLOG(3) << "Initial";
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	690
				691	// There isn't a valid message at this location.
				692	//
				693	// If someone asks for one of the messages within the first go around,
				694	// then they need to wait. They got ahead. Otherwise, they are
				695	// asking for something crazy, like something before the beginning of
				696	// the queue. Tell them that they are behind.
				697	if (uint32_queue_index < memory_->queue_size()) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	698	VLOG(3) << "Near zero, " << std::hex << uint32_queue_index;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	699	return ReadResult::NOTHING_NEW;
				700	} else {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	701	VLOG(3) << "not near zero, " << std::hex << uint32_queue_index;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	702	return ReadResult::TOO_OLD;
				703	}
				704	}
				705	}
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	706	VLOG(3) << "Eq: " << std::hex << starting_queue_index.index() << ", "
				707	<< queue_index.index();
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	708	break;
				709	}
				710
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	711	// Then read the data out. Copy it all out to be deterministic and so we can
				712	// make length be from either end.
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	713	*monotonic_sent_time = m->header.monotonic_sent_time;
				714	*realtime_sent_time = m->header.realtime_sent_time;
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	715	memcpy(data, &m->data[0], message_data_size());
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	716	*length = m->header.length;
				717
				718	// And finally, confirm that the message still points to the queue index we
				719	// want. This means it didn't change out from under us.
				720	// If something changed out from under us, we were reading it much too late in
				721	// it's lifetime.
				722	const QueueIndex final_queue_index = m->header.queue_index.Load(queue_size);
				723	if (final_queue_index != queue_index) {
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	724	VLOG(3) << "Changed out from under us. Reading " << std::hex
				725	<< queue_index.index() << ", finished with "
				726	<< final_queue_index.index() << ", delta: " << std::dec
				727	<< (final_queue_index.index() - queue_index.index());
				728	return ReadResult::OVERWROTE;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	729	}
				730
				731	return ReadResult::GOOD;
				732	}
				733
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	734	size_t LocklessQueue::queue_size() const { return memory_->queue_size(); }
				735	size_t LocklessQueue::message_data_size() const {
				736	return memory_->message_data_size();
				737	}
				738
				739	QueueIndex LocklessQueue::LatestQueueIndex() {
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	740	const size_t queue_size = memory_->queue_size();
				741
				742	// There is only one interesting case. We need to know if the queue is empty.
				743	// That is done with a sentinel value. At worst, this will be off by one.
				744	const QueueIndex next_queue_index =
				745	memory_->next_queue_index.Load(queue_size);
				746	if (next_queue_index.valid()) {
				747	const QueueIndex current_queue_index = next_queue_index.DecrementBy(1u);
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	748	return current_queue_index;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	749	} else {
				750	return empty_queue_index();
				751	}
				752	}
				753
				754	namespace {
				755
				756	// Prints out the mutex state. Not safe to use while the mutex is being
				757	// changed.
				758	::std::string PrintMutex(aos_mutex *mutex) {
				759	::std::stringstream s;
				760	s << "aos_mutex(" << ::std::hex << mutex->futex;
				761
				762	if (mutex->futex != 0) {
				763	s << ":";
				764	if (mutex->futex & FUTEX_OWNER_DIED) {
				765	s << "FUTEX_OWNER_DIED\|";
				766	}
				767	s << "tid=" << (mutex->futex & FUTEX_TID_MASK);
				768	}
				769
				770	s << ")";
				771	return s.str();
				772	}
				773
				774	} // namespace
				775
				776	void PrintLocklessQueueMemory(LocklessQueueMemory *memory) {
				777	const size_t queue_size = memory->queue_size();
				778	::std::cout << "LocklessQueueMemory (" << memory << ") {" << ::std::endl;
				779	::std::cout << " aos_mutex queue_setup_lock = "
				780	<< PrintMutex(&memory->queue_setup_lock) << ::std::endl;
				781	::std::cout << " ::std::atomic<bool> initialized = " << memory->initialized
				782	<< ::std::endl;
				783	::std::cout << " config {" << ::std::endl;
				784	::std::cout << " size_t num_watchers = " << memory->config.num_watchers
				785	<< ::std::endl;
				786	::std::cout << " size_t num_senders = " << memory->config.num_senders
				787	<< ::std::endl;
				788	::std::cout << " size_t queue_size = " << memory->config.queue_size
				789	<< ::std::endl;
				790	::std::cout << " size_t message_data_size = "
				791	<< memory->config.message_data_size << ::std::endl;
				792
				793	::std::cout << " AtomicQueueIndex next_queue_index = "
				794	<< memory->next_queue_index.Load(queue_size).DebugString()
				795	<< ::std::endl;
				796
				797	::std::cout << " }" << ::std::endl;
				798	::std::cout << " AtomicIndex queue[" << queue_size << "] {" << ::std::endl;
				799	for (size_t i = 0; i < queue_size; ++i) {
				800	::std::cout << " [" << i << "] -> "
				801	<< memory->GetQueue(i)->Load().DebugString() << ::std::endl;
				802	}
				803	::std::cout << " }" << ::std::endl;
				804	::std::cout << " Message messages[" << memory->num_messages() << "] {"
				805	<< ::std::endl;
				806	for (size_t i = 0; i < memory->num_messages(); ++i) {
				807	Message *m = memory->GetMessage(Index(i, i));
				808	::std::cout << " [" << i << "] -> Message {" << ::std::endl;
				809	::std::cout << " Header {" << ::std::endl;
				810	::std::cout << " AtomicQueueIndex queue_index = "
				811	<< m->header.queue_index.Load(queue_size).DebugString()
				812	<< ::std::endl;
				813	::std::cout << " size_t length = " << m->header.length
				814	<< ::std::endl;
				815	::std::cout << " }" << ::std::endl;
				816	::std::cout << " data: {";
				817
				818	for (size_t j = 0; j < m->header.length; ++j) {
				819	char data = m->data[j];
				820	if (j != 0) {
				821	::std::cout << " ";
				822	}
				823	if (::std::isprint(data)) {
				824	::std::cout << ::std::setfill(' ') << ::std::setw(2) << ::std::hex
				825	<< data;
				826	} else {
				827	::std::cout << "0x" << ::std::setfill('0') << ::std::setw(2)
				828	<< ::std::hex << (static_cast<unsigned>(data) & 0xff);
				829	}
				830	}
				831	::std::cout << ::std::setfill(' ') << ::std::dec << "}" << ::std::endl;
				832	::std::cout << " }," << ::std::endl;
				833	}
				834	::std::cout << " }" << ::std::endl;
				835
Alex Perry	cb7da4b	2019-08-28 19:35:56 -0700	[diff] [blame^]	836	::std::cout << " Sender senders[" << memory->num_senders() << "] {"
				837	<< ::std::endl;
Austin Schuh	20b2b08	2019-09-11 20:42:56 -0700	[diff] [blame]	838	for (size_t i = 0; i < memory->num_senders(); ++i) {
				839	Sender *s = memory->GetSender(i);
				840	::std::cout << " [" << i << "] -> Sender {" << ::std::endl;
				841	::std::cout << " aos_mutex tid = " << PrintMutex(&s->tid)
				842	<< ::std::endl;
				843	::std::cout << " AtomicIndex scratch_index = "
				844	<< s->scratch_index.Load().DebugString() << ::std::endl;
				845	::std::cout << " AtomicIndex to_replace = "
				846	<< s->to_replace.Load().DebugString() << ::std::endl;
				847	::std::cout << " }" << ::std::endl;
				848	}
				849	::std::cout << " }" << ::std::endl;
				850
				851	::std::cout << " Watcher watchers[" << memory->num_watchers() << "] {"
				852	<< ::std::endl;
				853	for (size_t i = 0; i < memory->num_watchers(); ++i) {
				854	Watcher *w = memory->GetWatcher(i);
				855	::std::cout << " [" << i << "] -> Watcher {" << ::std::endl;
				856	::std::cout << " aos_mutex tid = " << PrintMutex(&w->tid)
				857	<< ::std::endl;
				858	::std::cout << " pid_t pid = " << w->pid << ::std::endl;
				859	::std::cout << " int priority = " << w->priority << ::std::endl;
				860	::std::cout << " }" << ::std::endl;
				861	}
				862	::std::cout << " }" << ::std::endl;
				863
				864	::std::cout << "}" << ::std::endl;
				865	}
				866
				867	} // namespace ipc_lib
				868	} // namespace aos