libc: fix deadlock in pthread_cond_timedwait/signal()

The deadlock occured with three concurrently running threads: two
waiters calling pthread_cond_timedwait() and one signaller calling
pthread_cond_signal().

If waiter W1 hits its timeout, the signaller may have called
pthread_cond_signal(), detected this waiter and posted the internal
'signal_sem' concurrently. Then, the signaller waits for 'handshake_sem'
to ensure the waiter got woken up.

Waiter W1 can't consume the 'signal_sem' post by
'sem_wait(&c->signal_sem)' because another waiter W2 may have consumed
the post already above in sem_wait/timedwait(). Waiting for a post on
'signal_sem' would block the waiter W1 in perfect deadlock with
signaller on 'handshake_sem'. As W1 also owns 'counter_mutex' in this
situation, waiter W2 would block when trying to aquire 'counter_mutex'
and can't resolve the situation.

So, W1 does nothing in this case and we accept the spurious wakeup on
next pthread_cond_wait/timedwait().
This commit is contained in:
Christian Helmuth 2020-10-02 11:40:56 +02:00
parent 7feea78991
commit abefca500b
2 changed files with 34 additions and 10 deletions
repos/libports/src
lib/libc
test/pthread

@ -1076,8 +1076,18 @@ extern "C" {
pthread_mutex_lock(&c->counter_mutex);
if (c->num_signallers > 0) {
if (result == ETIMEDOUT) /* timeout occured */
sem_wait(&c->signal_sem);
if (result == ETIMEDOUT) {
/*
* Another thread may have called pthread_cond_signal(),
* detected this waiter and posted 'signal_sem' concurrently.
*
* We can't consume this post by 'sem_wait(&c->signal_sem)'
* because a third thread may have consumed the post already
* above in sem_wait/timedwait(). So, we just do nothing here
* and accept the spurious wakeup on next
* pthread_cond_wait/timedwait().
*/
}
sem_post(&c->handshake_sem);
--c->num_signallers;
}

@ -815,7 +815,7 @@ struct Test_cond_timed
enum class State { RUN, END } _shared_state { State::RUN };
enum { ROUNDS = 10 };
enum { ROUNDS = 50, TIMEOUT_MS = 50 };
static void *signaller_fn(void *arg)
{
@ -829,18 +829,22 @@ struct Test_cond_timed
bool loop = true;
for (unsigned i = 1; loop; ++i) {
usleep(249*1000);
usleep((TIMEOUT_MS-1)*1000);
pthread_mutex_lock(_mutex.mutex());
if (i == ROUNDS) {
_shared_state = State::END;
loop = false;
}
pthread_cond_broadcast(_cond.cond());
pthread_cond_signal(_cond.cond());
pthread_mutex_unlock(_mutex.mutex());
}
pthread_mutex_lock(_mutex.mutex());
pthread_cond_broadcast(_cond.cond());
pthread_mutex_unlock(_mutex.mutex());
printf("signaller: finished\n");
}
@ -856,6 +860,9 @@ struct Test_cond_timed
printf("waiter%s: started\n", note);
bool const timedwait = !main_thread;
bool timed_out = false;
for (bool loop = true; loop; ) {
pthread_mutex_lock(_mutex.mutex());
@ -869,17 +876,24 @@ struct Test_cond_timed
break;
}
ts = add_timespec_ms(ts, 250);
if (timedwait) {
ts = add_timespec_ms(ts, TIMEOUT_MS);
ret = pthread_cond_timedwait(_cond.cond(), _mutex.mutex(), &ts);
} else {
ret = pthread_cond_wait(_cond.cond(), _mutex.mutex());
}
ret = pthread_cond_timedwait(_cond.cond(), _mutex.mutex(), &ts);
if (ret)
printf("waiter%s: pthread_cond_timedwait: %s\n", note, strerror(ret));
timed_out |= (ret == ETIMEDOUT);
} while (ret != 0);
pthread_mutex_unlock(_mutex.mutex());
}
if (timedwait && !timed_out) {
printf("waiter%s: pthread_cond_timedwait did not time-out!\n");
exit(-1);
}
printf("waiter%s: finished\n", note);
}