Add memory ordering constraint, convert to C11 atomics
Add an ordering constraint/fence to __system_property_serial.
This slows down a read on a Nexus 5 from about 50 to about 70 ns,
but avoids the possibility of seeing an inconsistent property value.
Use C11 atomic operations where easy and appropriate.
This code remains not fully C++11 memory model conformant, but
I would now expect the generated code to now be correct with current compilers.
Bug:14970171
Change-Id: I0891ff1d0f914ae5c3857e3d76b6a7c8a4a07d83
diff --git a/libc/bionic/system_properties.cpp b/libc/bionic/system_properties.cpp
index a564c39..30081a5 100644
--- a/libc/bionic/system_properties.cpp
+++ b/libc/bionic/system_properties.cpp
@@ -26,6 +26,7 @@
* SUCH DAMAGE.
*/
#include <new>
+#include <stdatomic.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
@@ -45,7 +46,6 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <netinet/in.h>
-#include <unistd.h>
#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
#include <sys/_system_properties.h>
@@ -80,6 +80,16 @@
uint8_t namelen;
uint8_t reserved[3];
+ // TODO: The following fields should be declared as atomic_uint32_t.
+ // They should be assigned to with release semantics, instead of using
+ // explicit fences. Unfortunately, the read accesses are generally
+ // followed by more dependent read accesses, and the dependence
+ // is assumed to enforce memory ordering. Which it does on supported
+ // hardware. This technically should use memory_order_consume, if
+ // that worked as intended.
+ // We should also avoid rereading these fields redundantly, since not
+ // all processor implementations ensure that multiple loads from the
+ // same field are carried out in the right order.
volatile uint32_t prop;
volatile uint32_t left;
@@ -93,7 +103,8 @@
this->namelen = name_length;
memcpy(this->name, name, name_length);
this->name[name_length] = '\0';
- ANDROID_MEMBAR_FULL();
+ ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store
+ // for subsequent pointer assignment.
}
private:
@@ -102,14 +113,15 @@
struct prop_area {
uint32_t bytes_used;
- volatile uint32_t serial;
+ atomic_uint_least32_t serial;
uint32_t magic;
uint32_t version;
uint32_t reserved[28];
char data[0];
prop_area(const uint32_t magic, const uint32_t version) :
- serial(0), magic(magic), version(version) {
+ magic(magic), version(version) {
+ atomic_init(&serial, 0);
memset(reserved, 0, sizeof(reserved));
// Allocate enough space for the root node.
bytes_used = sizeof(prop_bt);
@@ -120,7 +132,7 @@
};
struct prop_info {
- volatile uint32_t serial;
+ atomic_uint_least32_t serial;
char value[PROP_VALUE_MAX];
char name[0];
@@ -128,10 +140,11 @@
const uint8_t valuelen) {
memcpy(this->name, name, namelen);
this->name[namelen] = '\0';
- this->serial = (valuelen << 24);
+ atomic_init(&this->serial, valuelen << 24);
memcpy(this->value, value, valuelen);
this->value[valuelen] = '\0';
- ANDROID_MEMBAR_FULL();
+ ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store
+ // for subsequent point assignment.
}
private:
DISALLOW_COPY_AND_ASSIGN(prop_info);
@@ -605,11 +618,20 @@
}
while (true) {
- uint32_t serial = __system_property_serial(pi);
+ uint32_t serial = __system_property_serial(pi); // acquire semantics
size_t len = SERIAL_VALUE_LEN(serial);
memcpy(value, pi->value, len + 1);
- ANDROID_MEMBAR_FULL();
- if (serial == pi->serial) {
+ // TODO: Fix the synchronization scheme here.
+ // There is no fully supported way to implement this kind
+ // of synchronization in C++11, since the memcpy races with
+ // updates to pi, and the data being accessed is not atomic.
+ // The following fence is unintuitive, but would be the
+ // correct one if memcpy used memory_order_relaxed atomic accesses.
+ // In practice it seems unlikely that the generated code would
+ // would be any different, so this should be OK.
+ atomic_thread_fence(memory_order_acquire);
+ if (serial ==
+ atomic_load_explicit(&(pi->serial), memory_order_relaxed)) {
if (name != 0) {
strcpy(name, pi->name);
}
@@ -658,14 +680,24 @@
if (len >= PROP_VALUE_MAX)
return -1;
- pi->serial = pi->serial | 1;
- ANDROID_MEMBAR_FULL();
+ uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed);
+ serial |= 1;
+ atomic_store_explicit(&pi->serial, serial, memory_order_relaxed);
+ // The memcpy call here also races. Again pretend it
+ // used memory_order_relaxed atomics, and use the analogous
+ // counterintuitive fence.
+ atomic_thread_fence(memory_order_release);
memcpy(pi->value, value, len + 1);
- ANDROID_MEMBAR_FULL();
- pi->serial = (len << 24) | ((pi->serial + 1) & 0xffffff);
+ atomic_store_explicit(
+ &pi->serial,
+ (len << 24) | ((serial + 1) & 0xffffff),
+ memory_order_release);
__futex_wake(&pi->serial, INT32_MAX);
- pa->serial++;
+ atomic_store_explicit(
+ &pa->serial,
+ atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1,
+ memory_order_release);
__futex_wake(&pa->serial, INT32_MAX);
return 0;
@@ -688,17 +720,25 @@
if (!pi)
return -1;
- pa->serial++;
+ // There is only a single mutator, but we want to make sure that
+ // updates are visible to a reader waiting for the update.
+ atomic_store_explicit(
+ &pa->serial,
+ atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1,
+ memory_order_release);
__futex_wake(&pa->serial, INT32_MAX);
return 0;
}
+// Wait for non-locked serial, and retrieve it with acquire semantics.
unsigned int __system_property_serial(const prop_info *pi)
{
- uint32_t serial = pi->serial;
+ uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_acquire);
while (SERIAL_DIRTY(serial)) {
- __futex_wait(const_cast<volatile uint32_t*>(&pi->serial), serial, NULL);
- serial = pi->serial;
+ __futex_wait(const_cast<volatile void *>(
+ reinterpret_cast<const void *>(&pi->serial)),
+ serial, NULL);
+ serial = atomic_load_explicit(&pi->serial, memory_order_acquire);
}
return serial;
}
@@ -706,12 +746,14 @@
unsigned int __system_property_wait_any(unsigned int serial)
{
prop_area *pa = __system_property_area__;
+ uint32_t my_serial;
do {
__futex_wait(&pa->serial, serial, NULL);
- } while (pa->serial == serial);
+ my_serial = atomic_load_explicit(&pa->serial, memory_order_acquire);
+ } while (my_serial == serial);
- return pa->serial;
+ return my_serial;
}
const prop_info *__system_property_find_nth(unsigned n)