Merge "Fix native memory leak caused by small HWUI path cache creation"
diff --git a/api/current.txt b/api/current.txt
index 9f7e642..3cee0f5 100644
--- a/api/current.txt
+++ b/api/current.txt
@@ -6999,7 +6999,11 @@
   public class OobData implements android.os.Parcelable {
     ctor public OobData();
     method public int describeContents();
+    method public byte[] getLeSecureConnectionsConfirmation();
+    method public byte[] getLeSecureConnectionsRandom();
     method public byte[] getSecurityManagerTk();
+    method public void setLeSecureConnectionsConfirmation(byte[]);
+    method public void setLeSecureConnectionsRandom(byte[]);
     method public void setSecurityManagerTk(byte[]);
     method public void writeToParcel(android.os.Parcel, int);
     field public static final android.os.Parcelable.Creator<android.bluetooth.OobData> CREATOR;
@@ -33365,6 +33369,7 @@
     method public static int gettid();
     method public static int getuid();
     method public static java.lang.String if_indextoname(int);
+    method public static int if_nametoindex(java.lang.String);
     method public static java.net.InetAddress inet_pton(int, java.lang.String);
     method public static boolean isatty(java.io.FileDescriptor);
     method public static void kill(int, int) throws android.system.ErrnoException;
@@ -33406,6 +33411,7 @@
     method public static void seteuid(int) throws android.system.ErrnoException;
     method public static void setgid(int) throws android.system.ErrnoException;
     method public static int setsid() throws android.system.ErrnoException;
+    method public static void setsockoptInt(java.io.FileDescriptor, int, int, int) throws android.system.ErrnoException;
     method public static void setuid(int) throws android.system.ErrnoException;
     method public static void shutdown(java.io.FileDescriptor, int) throws android.system.ErrnoException;
     method public static java.io.FileDescriptor socket(int, int, int) throws android.system.ErrnoException;
@@ -33816,6 +33822,7 @@
     field public static final int S_IXOTH;
     field public static final int S_IXUSR;
     field public static final int TCP_NODELAY;
+    field public static final int TCP_USER_TIMEOUT;
     field public static final int WCONTINUED;
     field public static final int WEXITED;
     field public static final int WNOHANG;
@@ -48741,11 +48748,41 @@
     method public java.io.File directory();
     method public java.lang.ProcessBuilder directory(java.io.File);
     method public java.util.Map<java.lang.String, java.lang.String> environment();
+    method public java.lang.ProcessBuilder inheritIO();
+    method public java.lang.ProcessBuilder redirectError(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectError(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectError();
     method public boolean redirectErrorStream();
     method public java.lang.ProcessBuilder redirectErrorStream(boolean);
+    method public java.lang.ProcessBuilder redirectInput(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectInput(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectInput();
+    method public java.lang.ProcessBuilder redirectOutput(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectOutput(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectOutput();
     method public java.lang.Process start() throws java.io.IOException;
   }
 
+  public static abstract class ProcessBuilder.Redirect {
+    method public static java.lang.ProcessBuilder.Redirect appendTo(java.io.File);
+    method public java.io.File file();
+    method public static java.lang.ProcessBuilder.Redirect from(java.io.File);
+    method public static java.lang.ProcessBuilder.Redirect to(java.io.File);
+    method public abstract java.lang.ProcessBuilder.Redirect.Type type();
+    field public static final java.lang.ProcessBuilder.Redirect INHERIT;
+    field public static final java.lang.ProcessBuilder.Redirect PIPE;
+  }
+
+  public static final class ProcessBuilder.Redirect.Type extends java.lang.Enum {
+    method public static java.lang.ProcessBuilder.Redirect.Type valueOf(java.lang.String);
+    method public static final java.lang.ProcessBuilder.Redirect.Type[] values();
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type APPEND;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type INHERIT;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type PIPE;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type READ;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type WRITE;
+  }
+
   public abstract interface Readable {
     method public abstract int read(java.nio.CharBuffer) throws java.io.IOException;
   }
@@ -49279,6 +49316,7 @@
     method protected T initialValue();
     method public void remove();
     method public void set(T);
+    method public static java.lang.ThreadLocal<S> withInitial(java.util.function.Supplier<? extends S>);
   }
 
   public class Throwable implements java.io.Serializable {
@@ -52603,9 +52641,7 @@
 
   public static class KeyStore.PasswordProtection implements javax.security.auth.Destroyable java.security.KeyStore.ProtectionParameter {
     ctor public KeyStore.PasswordProtection(char[]);
-    method public synchronized void destroy() throws javax.security.auth.DestroyFailedException;
     method public synchronized char[] getPassword();
-    method public synchronized boolean isDestroyed();
   }
 
   public static final class KeyStore.PrivateKeyEntry implements java.security.KeyStore.Entry {
@@ -56389,9 +56425,17 @@
     ctor public HashMap();
     ctor public HashMap(java.util.Map<? extends K, ? extends V>);
     method public java.lang.Object clone();
+    method public V compute(K, java.util.function.BiFunction<? super K, ? super V, ? extends V>);
+    method public V computeIfAbsent(K, java.util.function.Function<? super K, ? extends V>);
+    method public V computeIfPresent(K, java.util.function.BiFunction<? super K, ? super V, ? extends V>);
     method public java.util.Set<java.util.Map.Entry<K, V>> entrySet();
     method public void forEach(java.util.function.BiConsumer<? super K, ? super V>);
+    method public V getOrDefault(java.lang.Object, V);
+    method public V merge(K, V, java.util.function.BiFunction<? super V, ? super V, ? extends V>);
+    method public V putIfAbsent(K, V);
+    method public boolean remove(java.lang.Object, java.lang.Object);
     method public boolean replace(K, V, V);
+    method public V replace(K, V);
     method public void replaceAll(java.util.function.BiFunction<? super K, ? super V, ? extends V>);
   }
 
@@ -62351,8 +62395,8 @@
   }
 
   public abstract interface Destroyable {
-    method public abstract void destroy() throws javax.security.auth.DestroyFailedException;
-    method public abstract boolean isDestroyed();
+    method public default void destroy() throws javax.security.auth.DestroyFailedException;
+    method public default boolean isDestroyed();
   }
 
   public final class PrivateCredentialPermission extends java.security.Permission {
diff --git a/api/system-current.txt b/api/system-current.txt
index a097edb..9c6d6f5 100644
--- a/api/system-current.txt
+++ b/api/system-current.txt
@@ -7220,7 +7220,11 @@
   public class OobData implements android.os.Parcelable {
     ctor public OobData();
     method public int describeContents();
+    method public byte[] getLeSecureConnectionsConfirmation();
+    method public byte[] getLeSecureConnectionsRandom();
     method public byte[] getSecurityManagerTk();
+    method public void setLeSecureConnectionsConfirmation(byte[]);
+    method public void setLeSecureConnectionsRandom(byte[]);
     method public void setSecurityManagerTk(byte[]);
     method public void writeToParcel(android.os.Parcel, int);
     field public static final android.os.Parcelable.Creator<android.bluetooth.OobData> CREATOR;
@@ -35517,6 +35521,7 @@
     method public static int gettid();
     method public static int getuid();
     method public static java.lang.String if_indextoname(int);
+    method public static int if_nametoindex(java.lang.String);
     method public static java.net.InetAddress inet_pton(int, java.lang.String);
     method public static boolean isatty(java.io.FileDescriptor);
     method public static void kill(int, int) throws android.system.ErrnoException;
@@ -35558,6 +35563,7 @@
     method public static void seteuid(int) throws android.system.ErrnoException;
     method public static void setgid(int) throws android.system.ErrnoException;
     method public static int setsid() throws android.system.ErrnoException;
+    method public static void setsockoptInt(java.io.FileDescriptor, int, int, int) throws android.system.ErrnoException;
     method public static void setuid(int) throws android.system.ErrnoException;
     method public static void shutdown(java.io.FileDescriptor, int) throws android.system.ErrnoException;
     method public static java.io.FileDescriptor socket(int, int, int) throws android.system.ErrnoException;
@@ -35968,6 +35974,7 @@
     field public static final int S_IXOTH;
     field public static final int S_IXUSR;
     field public static final int TCP_NODELAY;
+    field public static final int TCP_USER_TIMEOUT;
     field public static final int WCONTINUED;
     field public static final int WEXITED;
     field public static final int WNOHANG;
@@ -51359,11 +51366,41 @@
     method public java.io.File directory();
     method public java.lang.ProcessBuilder directory(java.io.File);
     method public java.util.Map<java.lang.String, java.lang.String> environment();
+    method public java.lang.ProcessBuilder inheritIO();
+    method public java.lang.ProcessBuilder redirectError(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectError(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectError();
     method public boolean redirectErrorStream();
     method public java.lang.ProcessBuilder redirectErrorStream(boolean);
+    method public java.lang.ProcessBuilder redirectInput(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectInput(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectInput();
+    method public java.lang.ProcessBuilder redirectOutput(java.lang.ProcessBuilder.Redirect);
+    method public java.lang.ProcessBuilder redirectOutput(java.io.File);
+    method public java.lang.ProcessBuilder.Redirect redirectOutput();
     method public java.lang.Process start() throws java.io.IOException;
   }
 
+  public static abstract class ProcessBuilder.Redirect {
+    method public static java.lang.ProcessBuilder.Redirect appendTo(java.io.File);
+    method public java.io.File file();
+    method public static java.lang.ProcessBuilder.Redirect from(java.io.File);
+    method public static java.lang.ProcessBuilder.Redirect to(java.io.File);
+    method public abstract java.lang.ProcessBuilder.Redirect.Type type();
+    field public static final java.lang.ProcessBuilder.Redirect INHERIT;
+    field public static final java.lang.ProcessBuilder.Redirect PIPE;
+  }
+
+  public static final class ProcessBuilder.Redirect.Type extends java.lang.Enum {
+    method public static java.lang.ProcessBuilder.Redirect.Type valueOf(java.lang.String);
+    method public static final java.lang.ProcessBuilder.Redirect.Type[] values();
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type APPEND;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type INHERIT;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type PIPE;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type READ;
+    enum_constant public static final java.lang.ProcessBuilder.Redirect.Type WRITE;
+  }
+
   public abstract interface Readable {
     method public abstract int read(java.nio.CharBuffer) throws java.io.IOException;
   }
@@ -51897,6 +51934,7 @@
     method protected T initialValue();
     method public void remove();
     method public void set(T);
+    method public static java.lang.ThreadLocal<S> withInitial(java.util.function.Supplier<? extends S>);
   }
 
   public class Throwable implements java.io.Serializable {
@@ -55221,9 +55259,7 @@
 
   public static class KeyStore.PasswordProtection implements javax.security.auth.Destroyable java.security.KeyStore.ProtectionParameter {
     ctor public KeyStore.PasswordProtection(char[]);
-    method public synchronized void destroy() throws javax.security.auth.DestroyFailedException;
     method public synchronized char[] getPassword();
-    method public synchronized boolean isDestroyed();
   }
 
   public static final class KeyStore.PrivateKeyEntry implements java.security.KeyStore.Entry {
@@ -59007,9 +59043,17 @@
     ctor public HashMap();
     ctor public HashMap(java.util.Map<? extends K, ? extends V>);
     method public java.lang.Object clone();
+    method public V compute(K, java.util.function.BiFunction<? super K, ? super V, ? extends V>);
+    method public V computeIfAbsent(K, java.util.function.Function<? super K, ? extends V>);
+    method public V computeIfPresent(K, java.util.function.BiFunction<? super K, ? super V, ? extends V>);
     method public java.util.Set<java.util.Map.Entry<K, V>> entrySet();
     method public void forEach(java.util.function.BiConsumer<? super K, ? super V>);
+    method public V getOrDefault(java.lang.Object, V);
+    method public V merge(K, V, java.util.function.BiFunction<? super V, ? super V, ? extends V>);
+    method public V putIfAbsent(K, V);
+    method public boolean remove(java.lang.Object, java.lang.Object);
     method public boolean replace(K, V, V);
+    method public V replace(K, V);
     method public void replaceAll(java.util.function.BiFunction<? super K, ? super V, ? extends V>);
   }
 
@@ -64969,8 +65013,8 @@
   }
 
   public abstract interface Destroyable {
-    method public abstract void destroy() throws javax.security.auth.DestroyFailedException;
-    method public abstract boolean isDestroyed();
+    method public default void destroy() throws javax.security.auth.DestroyFailedException;
+    method public default boolean isDestroyed();
   }
 
   public final class PrivateCredentialPermission extends java.security.Permission {
diff --git a/cmds/app_process/Android.mk b/cmds/app_process/Android.mk
index fae0400..e530184 100644
--- a/cmds/app_process/Android.mk
+++ b/cmds/app_process/Android.mk
@@ -12,7 +12,8 @@
 LOCAL_SRC_FILES:= \
     app_main.cpp
 
-LOCAL_LDFLAGS := -Wl,--version-script,art/sigchainlib/version-script.txt -Wl,--export-dynamic
+LOCAL_LDFLAGS_32 := -Wl,--version-script,art/sigchainlib/version-script32.txt -Wl,--export-dynamic
+LOCAL_LDFLAGS_64 := -Wl,--version-script,art/sigchainlib/version-script64.txt -Wl,--export-dynamic
 
 LOCAL_SHARED_LIBRARIES := \
     libdl \
@@ -58,7 +59,9 @@
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libsigchain
 
-LOCAL_LDFLAGS := -ldl -Wl,--version-script,art/sigchainlib/version-script.txt -Wl,--export-dynamic
+LOCAL_LDFLAGS := -ldl
+LOCAL_LDFLAGS_32 := -Wl,--version-script,art/sigchainlib/version-script32.txt -Wl,--export-dynamic
+LOCAL_LDFLAGS_64 := -Wl,--version-script,art/sigchainlib/version-script64.txt -Wl,--export-dynamic
 LOCAL_CPPFLAGS := -std=c++11
 
 LOCAL_MODULE := app_process__asan
diff --git a/cmds/app_process/app_main.cpp b/cmds/app_process/app_main.cpp
index 72a21e3..80af5ea 100644
--- a/cmds/app_process/app_main.cpp
+++ b/cmds/app_process/app_main.cpp
@@ -17,7 +17,6 @@
 #include <binder/ProcessState.h>
 #include <utils/Log.h>
 #include <cutils/memory.h>
-#include <cutils/process_name.h>
 #include <cutils/properties.h>
 #include <cutils/trace.h>
 #include <android_runtime/AndroidRuntime.h>
diff --git a/core/java/android/app/ActivityThread.java b/core/java/android/app/ActivityThread.java
index 0386cff..5cffb78 100644
--- a/core/java/android/app/ActivityThread.java
+++ b/core/java/android/app/ActivityThread.java
@@ -4294,6 +4294,16 @@
         }
     }
 
+    /**
+     * Public entrypoint to stop profiling. This is required to end profiling when the app crashes,
+     * so that profiler data won't be lost.
+     *
+     * @hide
+     */
+    public void stopProfiling() {
+        mProfiler.stopProfiling();
+    }
+
     static final void handleDumpHeap(boolean managed, DumpHeapData dhd) {
         if (managed) {
             try {
diff --git a/core/java/android/bluetooth/BluetoothGatt.java b/core/java/android/bluetooth/BluetoothGatt.java
index 800dd43..9cfe417 100644
--- a/core/java/android/bluetooth/BluetoothGatt.java
+++ b/core/java/android/bluetooth/BluetoothGatt.java
@@ -250,9 +250,6 @@
                 if (VDBG) Log.d(TAG, "onCharacteristicRead() - Device=" + address
                             + " handle=" + handle + " Status=" + status);
 
-                 Log.w(TAG, "onCharacteristicRead() - Device=" + address
-                            + " handle=" + handle + " Status=" + status);
-
                 if (!address.equals(mDevice.getAddress())) {
                     return;
                 }
@@ -422,7 +419,6 @@
                     try {
                         mAuthRetry = true;
                         mService.writeDescriptor(mClientIf, address, handle,
-                            BluetoothGattCharacteristic.WRITE_TYPE_DEFAULT,
                             AUTHENTICATION_MITM, descriptor.getValue());
                         return;
                     } catch (RemoteException e) {
@@ -945,8 +941,7 @@
 
         try {
             mService.writeDescriptor(mClientIf, device.getAddress(), descriptor.getInstanceId(),
-                BluetoothGattCharacteristic.WRITE_TYPE_DEFAULT, AUTHENTICATION_NONE,
-                descriptor.getValue());
+                AUTHENTICATION_NONE, descriptor.getValue());
         } catch (RemoteException e) {
             Log.e(TAG,"",e);
             mDeviceBusy = false;
diff --git a/core/java/android/bluetooth/BluetoothGattCharacteristic.java b/core/java/android/bluetooth/BluetoothGattCharacteristic.java
index 01f82e6..1cc2270 100644
--- a/core/java/android/bluetooth/BluetoothGattCharacteristic.java
+++ b/core/java/android/bluetooth/BluetoothGattCharacteristic.java
@@ -321,10 +321,10 @@
     }
 
     /**
-     * Returns the deisred key size.
+     * Returns the desired key size.
      * @hide
      */
-    /*package*/ int getKeySize() {
+    public int getKeySize() {
         return mKeySize;
     }
 
@@ -393,6 +393,14 @@
     }
 
     /**
+     * Force the instance ID.
+     * @hide
+     */
+    public void setInstanceId(int instanceId) {
+        mInstance = instanceId;
+    }
+
+    /**
      * Returns the properties of this characteristic.
      *
      * <p>The properties contain a bit mask of property flags indicating
diff --git a/core/java/android/bluetooth/BluetoothGattDescriptor.java b/core/java/android/bluetooth/BluetoothGattDescriptor.java
index 28317c4..1a4fa48 100644
--- a/core/java/android/bluetooth/BluetoothGattDescriptor.java
+++ b/core/java/android/bluetooth/BluetoothGattDescriptor.java
@@ -227,6 +227,14 @@
     }
 
     /**
+     * Force the instance ID.
+     * @hide
+     */
+    public void setInstanceId(int instanceId) {
+        mInstance = instanceId;
+    }
+
+    /**
      * Returns the permissions for this descriptor.
      *
      * @return Permissions of this descriptor
diff --git a/core/java/android/bluetooth/BluetoothGattServer.java b/core/java/android/bluetooth/BluetoothGattServer.java
index f451340..c2bcbb2 100644
--- a/core/java/android/bluetooth/BluetoothGattServer.java
+++ b/core/java/android/bluetooth/BluetoothGattServer.java
@@ -52,6 +52,7 @@
     private Object mServerIfLock = new Object();
     private int mServerIf;
     private int mTransport;
+    private BluetoothGattService mPendingService;
     private List<BluetoothGattService> mServices;
 
     private static final int CALLBACK_REG_TIMEOUT = 10000;
@@ -109,17 +110,37 @@
              * Service has been added
              * @hide
              */
-            public void onServiceAdded(int status, int srvcType,
-                                       int srvcInstId, ParcelUuid srvcId) {
-                UUID srvcUuid = srvcId.getUuid();
-                if (DBG) Log.d(TAG, "onServiceAdded() - service=" + srvcUuid
-                    + "status=" + status);
+            public void onServiceAdded(int status, BluetoothGattService service) {
+                if (DBG) Log.d(TAG, "onServiceAdded() - handle=" + service.getInstanceId()
+                    + " uuid=" + service.getUuid() + " status=" + status);
 
-                BluetoothGattService service = getService(srvcUuid, srvcInstId, srvcType);
-                if (service == null) return;
+                if (mPendingService == null)
+                    return;
+
+                BluetoothGattService tmp = mPendingService;
+                mPendingService = null;
+
+                // Rewrite newly assigned handles to existing service.
+                tmp.setInstanceId(service.getInstanceId());
+                List<BluetoothGattCharacteristic> temp_chars = tmp.getCharacteristics();
+                List<BluetoothGattCharacteristic> svc_chars = service.getCharacteristics();
+                for (int i=0; i<svc_chars.size(); i++) {
+                    BluetoothGattCharacteristic temp_char = temp_chars.get(i);
+                    BluetoothGattCharacteristic svc_char = svc_chars.get(i);
+
+                    temp_char.setInstanceId(svc_char.getInstanceId());
+
+                    List<BluetoothGattDescriptor> temp_descs = temp_char.getDescriptors();
+                    List<BluetoothGattDescriptor> svc_descs = svc_char.getDescriptors();
+                    for (int j=0; j<svc_descs.size(); j++) {
+                        temp_descs.get(j).setInstanceId(svc_descs.get(j).getInstanceId());
+                    }
+                }
+
+                mServices.add(tmp);
 
                 try {
-                    mCallback.onServiceAdded((int)status, service);
+                    mCallback.onServiceAdded((int)status, tmp);
                 } catch (Exception ex) {
                     Log.w(TAG, "Unhandled exception in callback", ex);
                 }
@@ -130,19 +151,15 @@
              * @hide
              */
             public void onCharacteristicReadRequest(String address, int transId,
-                            int offset, boolean isLong, int srvcType, int srvcInstId,
-                            ParcelUuid srvcId, int charInstId, ParcelUuid charId) {
-                UUID srvcUuid = srvcId.getUuid();
-                UUID charUuid = charId.getUuid();
-                if (VDBG) Log.d(TAG, "onCharacteristicReadRequest() - "
-                    + "service=" + srvcUuid + ", characteristic=" + charUuid);
+                            int offset, boolean isLong, int handle) {
+                if (VDBG) Log.d(TAG, "onCharacteristicReadRequest() - handle=" + handle);
 
                 BluetoothDevice device = mAdapter.getRemoteDevice(address);
-                BluetoothGattService service = getService(srvcUuid, srvcInstId, srvcType);
-                if (service == null) return;
-
-                BluetoothGattCharacteristic characteristic = service.getCharacteristic(charUuid);
-                if (characteristic == null) return;
+                BluetoothGattCharacteristic characteristic = getCharacteristicByHandle(handle);
+                if (characteristic == null) {
+                    Log.w(TAG, "onCharacteristicReadRequest() no char for handle " + handle);
+                    return;
+                }
 
                 try {
                     mCallback.onCharacteristicReadRequest(device, transId, offset, characteristic);
@@ -156,25 +173,15 @@
              * @hide
              */
             public void onDescriptorReadRequest(String address, int transId,
-                            int offset, boolean isLong, int srvcType, int srvcInstId,
-                            ParcelUuid srvcId, int charInstId, ParcelUuid charId,
-                            ParcelUuid descrId) {
-                UUID srvcUuid = srvcId.getUuid();
-                UUID charUuid = charId.getUuid();
-                UUID descrUuid = descrId.getUuid();
-                if (VDBG) Log.d(TAG, "onCharacteristicReadRequest() - "
-                    + "service=" + srvcUuid + ", characteristic=" + charUuid
-                    + "descriptor=" + descrUuid);
+                            int offset, boolean isLong, int handle) {
+                if (VDBG) Log.d(TAG, "onCharacteristicReadRequest() - handle=" + handle);
 
                 BluetoothDevice device = mAdapter.getRemoteDevice(address);
-                BluetoothGattService service = getService(srvcUuid, srvcInstId, srvcType);
-                if (service == null) return;
-
-                BluetoothGattCharacteristic characteristic = service.getCharacteristic(charUuid);
-                if (characteristic == null) return;
-
-                BluetoothGattDescriptor descriptor = characteristic.getDescriptor(descrUuid);
-                if (descriptor == null) return;
+                BluetoothGattDescriptor descriptor = getDescriptorByHandle(handle);
+                if (descriptor == null) {
+                    Log.w(TAG, "onDescriptorReadRequest() no desc for handle " + handle);
+                    return;
+                }
 
                 try {
                     mCallback.onDescriptorReadRequest(device, transId, offset, descriptor);
@@ -189,19 +196,15 @@
              */
             public void onCharacteristicWriteRequest(String address, int transId,
                             int offset, int length, boolean isPrep, boolean needRsp,
-                            int srvcType, int srvcInstId, ParcelUuid srvcId,
-                            int charInstId, ParcelUuid charId, byte[] value) {
-                UUID srvcUuid = srvcId.getUuid();
-                UUID charUuid = charId.getUuid();
-                if (VDBG) Log.d(TAG, "onCharacteristicWriteRequest() - "
-                    + "service=" + srvcUuid + ", characteristic=" + charUuid);
+                            int handle, byte[] value) {
+                if (VDBG) Log.d(TAG, "onCharacteristicWriteRequest() - handle=" + handle);
 
                 BluetoothDevice device = mAdapter.getRemoteDevice(address);
-                BluetoothGattService service = getService(srvcUuid, srvcInstId, srvcType);
-                if (service == null) return;
-
-                BluetoothGattCharacteristic characteristic = service.getCharacteristic(charUuid);
-                if (characteristic == null) return;
+                BluetoothGattCharacteristic characteristic = getCharacteristicByHandle(handle);
+                if (characteristic == null) {
+                    Log.w(TAG, "onCharacteristicWriteRequest() no char for handle " + handle);
+                    return;
+                }
 
                 try {
                     mCallback.onCharacteristicWriteRequest(device, transId, characteristic,
@@ -216,28 +219,16 @@
              * Remote client descriptor write request.
              * @hide
              */
-            public void onDescriptorWriteRequest(String address, int transId,
-                            int offset, int length, boolean isPrep, boolean needRsp,
-                            int srvcType, int srvcInstId, ParcelUuid srvcId,
-                            int charInstId, ParcelUuid charId, ParcelUuid descrId,
-                            byte[] value) {
-                UUID srvcUuid = srvcId.getUuid();
-                UUID charUuid = charId.getUuid();
-                UUID descrUuid = descrId.getUuid();
-                if (VDBG) Log.d(TAG, "onDescriptorWriteRequest() - "
-                    + "service=" + srvcUuid + ", characteristic=" + charUuid
-                    + "descriptor=" + descrUuid);
+            public void onDescriptorWriteRequest(String address, int transId, int offset,
+                            int length, boolean isPrep, boolean needRsp, int handle, byte[] value) {
+                if (VDBG) Log.d(TAG, "onDescriptorWriteRequest() - handle=" + handle);
 
                 BluetoothDevice device = mAdapter.getRemoteDevice(address);
-
-                BluetoothGattService service = getService(srvcUuid, srvcInstId, srvcType);
-                if (service == null) return;
-
-                BluetoothGattCharacteristic characteristic = service.getCharacteristic(charUuid);
-                if (characteristic == null) return;
-
-                BluetoothGattDescriptor descriptor = characteristic.getDescriptor(descrUuid);
-                if (descriptor == null) return;
+                BluetoothGattDescriptor descriptor = getDescriptorByHandle(handle);
+                if (descriptor == null) {
+                    Log.w(TAG, "onDescriptorWriteRequest() no desc for handle " + handle);
+                    return;
+                }
 
                 try {
                     mCallback.onDescriptorWriteRequest(device, transId, descriptor,
@@ -318,6 +309,36 @@
     }
 
     /**
+     * Returns a characteristic with given handle.
+     * @hide
+     */
+    /*package*/ BluetoothGattCharacteristic getCharacteristicByHandle(int handle) {
+        for(BluetoothGattService svc : mServices) {
+            for(BluetoothGattCharacteristic charac : svc.getCharacteristics()) {
+                if (charac.getInstanceId() == handle)
+                    return charac;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Returns a descriptor with given handle.
+     * @hide
+     */
+    /*package*/ BluetoothGattDescriptor getDescriptorByHandle(int handle) {
+        for(BluetoothGattService svc : mServices) {
+            for(BluetoothGattCharacteristic charac : svc.getCharacteristics()) {
+                for(BluetoothGattDescriptor desc : charac.getDescriptors()) {
+                    if (desc.getInstanceId() == handle)
+                        return desc;
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
      * Close this GATT server instance.
      *
      * Application should call this method as early as possible after it is done with
@@ -537,9 +558,7 @@
 
         try {
             mService.sendNotification(mServerIf, device.getAddress(),
-                    service.getType(), service.getInstanceId(),
-                    new ParcelUuid(service.getUuid()), characteristic.getInstanceId(),
-                    new ParcelUuid(characteristic.getUuid()), confirm,
+                    characteristic.getInstanceId(), confirm,
                     characteristic.getValue());
         } catch (RemoteException e) {
             Log.e(TAG,"",e);
@@ -568,39 +587,10 @@
         if (DBG) Log.d(TAG, "addService() - service: " + service.getUuid());
         if (mService == null || mServerIf == 0) return false;
 
-        mServices.add(service);
+        mPendingService = service;
 
         try {
-            mService.beginServiceDeclaration(mServerIf, service.getType(),
-                service.getInstanceId(), service.getHandles(),
-                new ParcelUuid(service.getUuid()), service.isAdvertisePreferred());
-
-            List<BluetoothGattService> includedServices = service.getIncludedServices();
-            for (BluetoothGattService includedService : includedServices) {
-                mService.addIncludedService(mServerIf,
-                    includedService.getType(),
-                    includedService.getInstanceId(),
-                    new ParcelUuid(includedService.getUuid()));
-            }
-
-            List<BluetoothGattCharacteristic> characteristics = service.getCharacteristics();
-            for (BluetoothGattCharacteristic characteristic : characteristics) {
-                int permission = ((characteristic.getKeySize() - 7) << 12)
-                                    + characteristic.getPermissions();
-                mService.addCharacteristic(mServerIf,
-                    new ParcelUuid(characteristic.getUuid()),
-                    characteristic.getProperties(), permission);
-
-                List<BluetoothGattDescriptor> descriptors = characteristic.getDescriptors();
-                for (BluetoothGattDescriptor descriptor: descriptors) {
-                    permission = ((characteristic.getKeySize() - 7) << 12)
-                                        + descriptor.getPermissions();
-                    mService.addDescriptor(mServerIf,
-                        new ParcelUuid(descriptor.getUuid()), permission);
-                }
-            }
-
-            mService.endServiceDeclaration(mServerIf);
+            mService.addService(mServerIf, service);
         } catch (RemoteException e) {
             Log.e(TAG,"",e);
             return false;
@@ -626,8 +616,7 @@
         if (intService == null) return false;
 
         try {
-            mService.removeService(mServerIf, service.getType(),
-                service.getInstanceId(), new ParcelUuid(service.getUuid()));
+            mService.removeService(mServerIf, service.getInstanceId());
             mServices.remove(intService);
         } catch (RemoteException e) {
             Log.e(TAG,"",e);
diff --git a/core/java/android/bluetooth/BluetoothGattService.java b/core/java/android/bluetooth/BluetoothGattService.java
index a4e1dc0..c888a45 100644
--- a/core/java/android/bluetooth/BluetoothGattService.java
+++ b/core/java/android/bluetooth/BluetoothGattService.java
@@ -250,7 +250,6 @@
 
     /**
      * Force the instance ID.
-     * This is needed for conformance testing only.
      * @hide
      */
     public void setInstanceId(int instanceId) {
diff --git a/core/java/android/bluetooth/BluetoothHeadset.java b/core/java/android/bluetooth/BluetoothHeadset.java
index 09a15de..f46a3b3 100644
--- a/core/java/android/bluetooth/BluetoothHeadset.java
+++ b/core/java/android/bluetooth/BluetoothHeadset.java
@@ -220,6 +220,46 @@
      * {@link #EXTRA_STATE} or {@link #EXTRA_PREVIOUS_STATE} of
      * {@link #ACTION_AUDIO_STATE_CHANGED} intent.
      */
+
+    /**
+     * Intent used to broadcast the headset's indicator status
+     *
+     * <p>This intent will have 3 extras:
+     * <ul>
+     *   <li> {@link #EXTRA_IND_ID} - The Assigned number of headset Indicator which is supported by
+                                        the headset ( as indicated by AT+BIND
+                                        command in the SLC sequence).or whose value
+                                        is changed (indicated by AT+BIEV command)</li>
+     *   <li> {@link #EXTRA_IND_VALUE}- The updated value of headset indicator. </li>
+     *   <li> {@link BluetoothDevice#EXTRA_DEVICE} - The remote device. </li>
+     * </ul>
+     * <p>{@link #EXTRA_IND_ID} is defined by Bluetooth SIG and each of the indicators are
+     * given an assigned number. Below shows the assigned number of Indicator added so far
+     * - Enhanced Safety - 1
+     * <p>Requires {@link android.Manifest.permission#BLUETOOTH} permission to
+     * receive.
+     * @hide
+     */
+    public static final String ACTION_HF_INDICATORS_VALUE_CHANGED =
+            "android.bluetooth.headset.action.HF_INDICATORS_VALUE_CHANGED";
+
+    /**
+     * A String extra field in {@link #ACTION_HF_INDICATORS_VALUE_CHANGED}
+     * intents that contains the UUID of the headset  indicator (as defined by Bluetooth SIG)
+     * that is being sent.
+     * @hide
+     */
+    public static final String EXTRA_HF_INDICATORS_IND_ID =
+            "android.bluetooth.headset.extra.HF_INDICATORS_IND_ID";
+
+    /**
+     * A int  extra field in {@link #ACTION_HF_INDICATORS_VALUE_CHANGED}
+     * intents that contains the value of the Headset indicator that is being sent.
+     * @hide
+     */
+    public static final String EXTRA_HF_INDICATORS_IND_VALUE =
+            "android.bluetooth.headset.extra.HF_INDICATORS_IND_VALUE";
+
     public static final int STATE_AUDIO_CONNECTED = 12;
 
     private static final int MESSAGE_HEADSET_SERVICE_CONNECTED = 100;
@@ -969,6 +1009,29 @@
         return false;
     }
 
+    /**
+     * Send Headset the BIND response from AG to report change in the status of the
+     * HF indicators to the headset
+     *
+     * @param ind_id Assigned Number of the indicator (defined by SIG)
+     * @param ind_status
+     * possible values- false-Indicator is disabled, no value changes shall be sent for this indicator
+     *                  true-Indicator is enabled, value changes may be sent for this indicator
+     * @hide
+     */
+    public void bindResponse(int ind_id, boolean ind_status) {
+        if (mService != null && isEnabled()) {
+            try {
+                mService.bindResponse(ind_id, ind_status);
+            } catch (RemoteException e) {
+                Log.e(TAG, e.toString());
+            }
+        } else {
+            Log.w(TAG, "Proxy not attached to service");
+            if (DBG) Log.d(TAG, Log.getStackTraceString(new Throwable()));
+        }
+    }
+
     private final IBluetoothProfileServiceConnection mConnection
             = new IBluetoothProfileServiceConnection.Stub()  {
         @Override
diff --git a/core/java/android/bluetooth/IBluetoothGatt.aidl b/core/java/android/bluetooth/IBluetoothGatt.aidl
index 45b5122..124d39b 100644
--- a/core/java/android/bluetooth/IBluetoothGatt.aidl
+++ b/core/java/android/bluetooth/IBluetoothGatt.aidl
@@ -17,6 +17,7 @@
 package android.bluetooth;
 
 import android.bluetooth.BluetoothDevice;
+import android.bluetooth.BluetoothGattService;
 import android.bluetooth.le.AdvertiseSettings;
 import android.bluetooth.le.AdvertiseData;
 import android.bluetooth.le.ScanFilter;
@@ -55,7 +56,7 @@
                             in int writeType, in int authReq, in byte[] value);
     void readDescriptor(in int clientIf, in String address, in int handle, in int authReq);
     void writeDescriptor(in int clientIf, in String address, in int handle,
-                            in int writeType, in int authReq, in byte[] value);
+                            in int authReq, in byte[] value);
     void registerForNotification(in int clientIf, in String address, in int handle, in boolean enable);
     void beginReliableWrite(in int clientIf, in String address);
     void endReliableWrite(in int clientIf, in String address, in boolean execute);
@@ -65,26 +66,14 @@
 
     void registerServer(in ParcelUuid appId, in IBluetoothGattServerCallback callback);
     void unregisterServer(in int serverIf);
-    void serverConnect(in int servertIf, in String address, in boolean isDirect, in int transport);
+    void serverConnect(in int serverIf, in String address, in boolean isDirect, in int transport);
     void serverDisconnect(in int serverIf, in String address);
-    void beginServiceDeclaration(in int serverIf, in int srvcType,
-                            in int srvcInstanceId, in int minHandles,
-                            in ParcelUuid srvcId, boolean advertisePreferred);
-    void addIncludedService(in int serverIf, in int srvcType,
-                            in int srvcInstanceId, in ParcelUuid srvcId);
-    void addCharacteristic(in int serverIf, in ParcelUuid charId,
-                            in int properties, in int permissions);
-    void addDescriptor(in int serverIf, in ParcelUuid descId,
-                            in int permissions);
-    void endServiceDeclaration(in int serverIf);
-    void removeService(in int serverIf, in int srvcType,
-                            in int srvcInstanceId, in ParcelUuid srvcId);
+    void addService(in int serverIf, in BluetoothGattService service);
+    void removeService(in int serverIf, in int handle);
     void clearServices(in int serverIf);
     void sendResponse(in int serverIf, in String address, in int requestId,
                             in int status, in int offset, in byte[] value);
-    void sendNotification(in int serverIf, in String address, in int srvcType,
-                            in int srvcInstanceId, in ParcelUuid srvcId,
-                            in int charInstanceId, in ParcelUuid charId,
+    void sendNotification(in int serverIf, in String address, in int handle,
                             in boolean confirm, in byte[] value);
     void disconnectAll();
     void unregAll();
diff --git a/core/java/android/bluetooth/IBluetoothGattServerCallback.aidl b/core/java/android/bluetooth/IBluetoothGattServerCallback.aidl
index 8b202b2..0bcb07b 100644
--- a/core/java/android/bluetooth/IBluetoothGattServerCallback.aidl
+++ b/core/java/android/bluetooth/IBluetoothGattServerCallback.aidl
@@ -15,8 +15,7 @@
  */
 package android.bluetooth;
 
-import android.os.ParcelUuid;
-
+import android.bluetooth.BluetoothGattService;
 
 /**
  * Callback definitions for interacting with BLE / GATT
@@ -27,36 +26,18 @@
     void onScanResult(in String address, in int rssi, in byte[] advData);
     void onServerConnectionState(in int status, in int serverIf,
                                  in boolean connected, in String address);
-    void onServiceAdded(in int status, in int srvcType,
-                        in int srvcInstId, in ParcelUuid srvcId);
-    void onCharacteristicReadRequest(in String address, in int transId,
-                                     in int offset, in boolean isLong,
-                                     in int srvcType,
-                                     in int srvcInstId, in ParcelUuid srvcId,
-                                     in int charInstId, in ParcelUuid charId);
+    void onServiceAdded(in int status, in BluetoothGattService service);
+    void onCharacteristicReadRequest(in String address, in int transId, in int offset,
+                                     in boolean isLong, in int handle);
     void onDescriptorReadRequest(in String address, in int transId,
                                      in int offset, in boolean isLong,
-                                     in int srvcType,
-                                     in int srvcInstId, in ParcelUuid srvcId,
-                                     in int charInstId, in ParcelUuid charId,
-                                     in ParcelUuid descrId);
-    void onCharacteristicWriteRequest(in String address, in int transId,
-                                     in int offset, in int length,
-                                     in boolean isPrep,
-                                     in boolean needRsp,
-                                     in int srvcType,
-                                     in int srvcInstId, in ParcelUuid srvcId,
-                                     in int charInstId, in ParcelUuid charId,
-                                     in byte[] value);
-    void onDescriptorWriteRequest(in String address, in int transId,
-                                     in int offset, in int length,
-                                     in boolean isPrep,
-                                     in boolean needRsp,
-                                     in int srvcType,
-                                     in int srvcInstId, in ParcelUuid srvcId,
-                                     in int charInstId, in ParcelUuid charId,
-                                     in ParcelUuid descrId,
-                                     in byte[] value);
+                                     in int handle);
+    void onCharacteristicWriteRequest(in String address, in int transId, in int offset,
+                                     in int length, in boolean isPrep, in boolean needRsp,
+                                     in int handle, in byte[] value);
+    void onDescriptorWriteRequest(in String address, in int transId, in int offset,
+                                     in int length, in boolean isPrep, in boolean needRsp,
+                                     in int handle, in byte[] value);
     void onExecuteWrite(in String address, in int transId, in boolean execWrite);
     void onNotificationSent(in String address, in int status);
     void onMtuChanged(in String address, in int mtu);
diff --git a/core/java/android/bluetooth/IBluetoothHeadset.aidl b/core/java/android/bluetooth/IBluetoothHeadset.aidl
index 0bb4088..6ad442b 100755
--- a/core/java/android/bluetooth/IBluetoothHeadset.aidl
+++ b/core/java/android/bluetooth/IBluetoothHeadset.aidl
@@ -59,4 +59,5 @@
                       String number, int type);
     boolean enableWBS();
     boolean disableWBS();
+    void bindResponse(int ind_id, boolean ind_status);
 }
diff --git a/core/java/android/bluetooth/OobData.java b/core/java/android/bluetooth/OobData.java
index 01f72ef..53ca974 100644
--- a/core/java/android/bluetooth/OobData.java
+++ b/core/java/android/bluetooth/OobData.java
@@ -26,6 +26,8 @@
  */
 public class OobData implements Parcelable {
     private byte[] securityManagerTk;
+    private byte[] leSecureConnectionsConfirmation;
+    private byte[] leSecureConnectionsRandom;
 
     public byte[] getSecurityManagerTk() {
         return securityManagerTk;
@@ -35,10 +37,28 @@
         this.securityManagerTk = securityManagerTk;
     }
 
+    public byte[] getLeSecureConnectionsConfirmation() {
+        return leSecureConnectionsConfirmation;
+    }
+
+    public void setLeSecureConnectionsConfirmation(byte[] leSecureConnectionsConfirmation) {
+        this.leSecureConnectionsConfirmation = leSecureConnectionsConfirmation;
+    }
+
+    public byte[] getLeSecureConnectionsRandom() {
+        return leSecureConnectionsRandom;
+    }
+
+    public void setLeSecureConnectionsRandom(byte[] leSecureConnectionsRandom) {
+        this.leSecureConnectionsRandom = leSecureConnectionsRandom;
+    }
+
     public OobData() { }
 
     private OobData(Parcel in) {
         securityManagerTk = in.createByteArray();
+        leSecureConnectionsConfirmation = in.createByteArray();
+        leSecureConnectionsRandom = in.createByteArray();
     }
 
     public int describeContents() {
@@ -48,6 +68,8 @@
     @Override
     public void writeToParcel(Parcel out, int flags) {
         out.writeByteArray(securityManagerTk);
+        out.writeByteArray(leSecureConnectionsConfirmation);
+        out.writeByteArray(leSecureConnectionsRandom);
     }
 
     public static final Parcelable.Creator<OobData> CREATOR
diff --git a/core/java/android/content/UriMatcher.java b/core/java/android/content/UriMatcher.java
index 71a035e..444edd0 100644
--- a/core/java/android/content/UriMatcher.java
+++ b/core/java/android/content/UriMatcher.java
@@ -167,7 +167,7 @@
         if (path != null) {
             String newPath = path;
             // Strip leading slash if present.
-            if (path.length() > 0 && path.charAt(0) == '/') {
+            if (path.length() > 1 && path.charAt(0) == '/') {
                 newPath = path.substring(1);
             }
             tokens = newPath.split("/");
diff --git a/core/java/android/security/FrameworkNetworkSecurityPolicy.java b/core/java/android/security/FrameworkNetworkSecurityPolicy.java
index 83f173ec..ee4f871 100644
--- a/core/java/android/security/FrameworkNetworkSecurityPolicy.java
+++ b/core/java/android/security/FrameworkNetworkSecurityPolicy.java
@@ -37,4 +37,9 @@
     public boolean isCleartextTrafficPermitted(String hostname) {
         return isCleartextTrafficPermitted();
     }
+
+    @Override
+    public boolean isCertificateTransparencyVerificationRequired(String hostname) {
+        return false;
+    }
 }
diff --git a/core/java/android/security/net/config/ConfigNetworkSecurityPolicy.java b/core/java/android/security/net/config/ConfigNetworkSecurityPolicy.java
index e7d17c2..a708f5b 100644
--- a/core/java/android/security/net/config/ConfigNetworkSecurityPolicy.java
+++ b/core/java/android/security/net/config/ConfigNetworkSecurityPolicy.java
@@ -37,4 +37,9 @@
     public boolean isCleartextTrafficPermitted(String hostname) {
         return mConfig.isCleartextTrafficPermitted(hostname);
     }
+
+    @Override
+    public boolean isCertificateTransparencyVerificationRequired(String hostname) {
+        return false;
+    }
 }
diff --git a/core/java/android/util/jar/StrictJarFile.java b/core/java/android/util/jar/StrictJarFile.java
index fd57806..2386aa3 100644
--- a/core/java/android/util/jar/StrictJarFile.java
+++ b/core/java/android/util/jar/StrictJarFile.java
@@ -175,6 +175,18 @@
         }
     }
 
+    @Override
+    protected void finalize() throws Throwable {
+        try {
+            if (guard != null) {
+                guard.warnIfOpen();
+            }
+            close();
+        } finally {
+            super.finalize();
+        }
+    }
+
     private InputStream getZipInputStream(ZipEntry ze) {
         if (ze.getMethod() == ZipEntry.STORED) {
             return new RAFStream(raf, ze.getDataOffset(),
diff --git a/core/java/com/android/internal/midi/MidiDispatcher.java b/core/java/com/android/internal/midi/MidiDispatcher.java
index 1a3c37c..c16628a 100644
--- a/core/java/com/android/internal/midi/MidiDispatcher.java
+++ b/core/java/com/android/internal/midi/MidiDispatcher.java
@@ -26,11 +26,23 @@
  * Utility class for dispatching MIDI data to a list of {@link android.media.midi.MidiReceiver}s.
  * This class subclasses {@link android.media.midi.MidiReceiver} and dispatches any data it receives
  * to its receiver list. Any receivers that throw an exception upon receiving data will
- * be automatically removed from the receiver list, but no IOException will be returned
- * from the dispatcher's {@link android.media.midi.MidiReceiver#onSend} in that case.
+ * be automatically removed from the receiver list. If a MidiReceiverFailureHandler has been
+ * provided to the MidiDispatcher, it will be notified about the failure, but the exception
+ * itself will be swallowed.
  */
 public final class MidiDispatcher extends MidiReceiver {
 
+    // MidiDispatcher's client and MidiReceiver's owner can be different
+    // classes (e.g. MidiDeviceService is a client, but MidiDeviceServer is
+    // the owner), and errors occuring during sending need to be reported
+    // to the owner rather than to the sender.
+    //
+    // Note that the callbacks will be called on the sender's thread.
+    public interface MidiReceiverFailureHandler {
+        void onReceiverFailure(MidiReceiver receiver, IOException failure);
+    }
+
+    private final MidiReceiverFailureHandler mFailureHandler;
     private final CopyOnWriteArrayList<MidiReceiver> mReceivers
             = new CopyOnWriteArrayList<MidiReceiver>();
 
@@ -46,6 +58,14 @@
         }
     };
 
+    public MidiDispatcher() {
+        this(null);
+    }
+
+    public MidiDispatcher(MidiReceiverFailureHandler failureHandler) {
+        mFailureHandler = failureHandler;
+    }
+
     /**
      * Returns the number of {@link android.media.midi.MidiReceiver}s this dispatcher contains.
      * @return the number of receivers
@@ -70,8 +90,13 @@
             try {
                 receiver.send(msg, offset, count, timestamp);
             } catch (IOException e) {
-                // if the receiver fails we remove the receiver but do not propagate the exception
+                // If the receiver fails we remove the receiver but do not propagate the exception.
+                // Note that this may also happen if the client code stalls, and thus underlying
+                // MidiInputPort.onSend has raised IOException for EAGAIN / EWOULDBLOCK error.
                 mReceivers.remove(receiver);
+                if (mFailureHandler != null) {
+                    mFailureHandler.onReceiverFailure(receiver, e);
+                }
             }
         }
     }
@@ -79,7 +104,15 @@
     @Override
     public void onFlush() throws IOException {
        for (MidiReceiver receiver : mReceivers) {
-            receiver.flush();
+            try {
+                receiver.flush();
+            } catch (IOException e) {
+                // This is just a special case of 'send' thus handle in the same way.
+                mReceivers.remove(receiver);
+                if (mFailureHandler != null) {
+                    mFailureHandler.onReceiverFailure(receiver, e);
+                }
+            }
        }
     }
 }
diff --git a/core/java/com/android/internal/os/RuntimeInit.java b/core/java/com/android/internal/os/RuntimeInit.java
index f81658e..ff81bc6 100644
--- a/core/java/com/android/internal/os/RuntimeInit.java
+++ b/core/java/com/android/internal/os/RuntimeInit.java
@@ -62,28 +62,54 @@
     }
 
     /**
-     * Use this to log a message when a thread exits due to an uncaught
-     * exception.  The framework catches these for the main threads, so
-     * this should only matter for threads created by applications.
+     * Logs a message when a thread encounters an uncaught exception. By
+     * default, {@link KillApplicationHandler} will terminate this process later,
+     * but apps can override that behavior.
      */
-    private static class UncaughtHandler implements Thread.UncaughtExceptionHandler {
+    private static class LoggingHandler implements Thread.UncaughtExceptionHandler {
+        @Override
+        public void uncaughtException(Thread t, Throwable e) {
+            // Don't re-enter if KillApplicationHandler has already run
+            if (mCrashing) return;
+            if (mApplicationObject == null) {
+                // The "FATAL EXCEPTION" string is still used on Android even though
+                // apps can set a custom UncaughtExceptionHandler that renders uncaught
+                // exceptions non-fatal.
+                Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
+            } else {
+                StringBuilder message = new StringBuilder();
+                // The "FATAL EXCEPTION" string is still used on Android even though
+                // apps can set a custom UncaughtExceptionHandler that renders uncaught
+                // exceptions non-fatal.
+                message.append("FATAL EXCEPTION: ").append(t.getName()).append("\n");
+                final String processName = ActivityThread.currentProcessName();
+                if (processName != null) {
+                    message.append("Process: ").append(processName).append(", ");
+                }
+                message.append("PID: ").append(Process.myPid());
+                Clog_e(TAG, message.toString(), e);
+            }
+        }
+    }
+
+    /**
+     * Handle application death from an uncaught exception.  The framework
+     * catches these for the main threads, so this should only matter for
+     * threads created by applications.  Before this method runs,
+     * {@link LoggingHandler} will already have logged details.
+     */
+    private static class KillApplicationHandler implements Thread.UncaughtExceptionHandler {
         public void uncaughtException(Thread t, Throwable e) {
             try {
                 // Don't re-enter -- avoid infinite loops if crash-reporting crashes.
                 if (mCrashing) return;
                 mCrashing = true;
 
-                if (mApplicationObject == null) {
-                    Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
-                } else {
-                    StringBuilder message = new StringBuilder();
-                    message.append("FATAL EXCEPTION: ").append(t.getName()).append("\n");
-                    final String processName = ActivityThread.currentProcessName();
-                    if (processName != null) {
-                        message.append("Process: ").append(processName).append(", ");
-                    }
-                    message.append("PID: ").append(Process.myPid());
-                    Clog_e(TAG, message.toString(), e);
+                // Try to end profiling. If a profiler is running at this point, and we kill the
+                // process (below), the in-memory buffer will be lost. So try to stop, which will
+                // flush the buffer. (This makes method trace profiling useful to debug crashes.)
+                if (ActivityThread.currentActivityThread() != null) {
+                    ActivityThread.currentActivityThread().stopProfiling();
                 }
 
                 // Bring up crash dialog, wait for it to be dismissed
@@ -106,8 +132,12 @@
     private static final void commonInit() {
         if (DEBUG) Slog.d(TAG, "Entered RuntimeInit!");
 
-        /* set default handler; this applies to all threads in the VM */
-        Thread.setDefaultUncaughtExceptionHandler(new UncaughtHandler());
+        /*
+         * set handlers; these apply to all threads in the VM. Apps can replace
+         * the default handler, but not the pre handler.
+         */
+        Thread.setUncaughtExceptionPreHandler(new LoggingHandler());
+        Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler());
 
         /*
          * Install a TimezoneGetter subclass for ZoneInfo.db
diff --git a/core/java/com/android/internal/os/ZygoteInit.java b/core/java/com/android/internal/os/ZygoteInit.java
index 8c6653d..4ebe6d6 100644
--- a/core/java/com/android/internal/os/ZygoteInit.java
+++ b/core/java/com/android/internal/os/ZygoteInit.java
@@ -595,7 +595,8 @@
             OsConstants.CAP_SYS_NICE,
             OsConstants.CAP_SYS_RESOURCE,
             OsConstants.CAP_SYS_TIME,
-            OsConstants.CAP_SYS_TTY_CONFIG
+            OsConstants.CAP_SYS_TTY_CONFIG,
+            OsConstants.CAP_WAKE_ALARM
         );
         /* Hardcoded command line to start the system server */
         String args[] = {
diff --git a/core/jni/android_util_Process.cpp b/core/jni/android_util_Process.cpp
index 2395ece..1491f90 100644
--- a/core/jni/android_util_Process.cpp
+++ b/core/jni/android_util_Process.cpp
@@ -20,7 +20,6 @@
 #include <utils/Log.h>
 #include <binder/IPCThreadState.h>
 #include <binder/IServiceManager.h>
-#include <cutils/process_name.h>
 #include <cutils/sched_policy.h>
 #include <utils/String8.h>
 #include <utils/Vector.h>
diff --git a/core/res/AndroidManifest.xml b/core/res/AndroidManifest.xml
index 9a64db0..51b3453 100644
--- a/core/res/AndroidManifest.xml
+++ b/core/res/AndroidManifest.xml
@@ -170,8 +170,13 @@
     <protected-broadcast
         android:name="android.bluetooth.map.profile.action.CONNECTION_STATE_CHANGED" />
     <protected-broadcast
+        android:name="com.android.bluetooth.BluetoothMapContentObserver.action.MESSAGE_SENT" />
+    <protected-broadcast
+        android:name="com.android.bluetooth.BluetoothMapContentObserver.action.MESSAGE_DELIVERY" />
+    <protected-broadcast
         android:name="android.bluetooth.pan.profile.action.CONNECTION_STATE_CHANGED" />
     <protected-broadcast android:name="android.bluetooth.pbap.intent.action.PBAP_STATE_CHANGED" />
+    <protected-broadcast android:name="android.bluetooth.sap.profile.action.CONNECTION_STATE_CHANGED" />
     <protected-broadcast android:name="android.btopp.intent.action.INCOMING_FILE_NOTIFICATION" />
     <protected-broadcast android:name="android.btopp.intent.action.USER_CONFIRMATION_TIMEOUT" />
     <protected-broadcast android:name="android.btopp.intent.action.LIST" />
@@ -187,6 +192,8 @@
     <protected-broadcast android:name="com.android.bluetooth.pbap.userconfirmtimeout" />
     <protected-broadcast android:name="com.android.bluetooth.pbap.authresponse" />
     <protected-broadcast android:name="com.android.bluetooth.pbap.authcancelled" />
+    <protected-broadcast android:name="com.android.bluetooth.sap.USER_CONFIRM_TIMEOUT" />
+    <protected-broadcast android:name="com.android.bluetooth.sap.action.DISCONNECT_ACTION" />
 
     <protected-broadcast android:name="android.hardware.display.action.WIFI_DISPLAY_STATUS_CHANGED" />
 
diff --git a/core/tests/coretests/src/android/app/DownloadManagerBaseTest.java b/core/tests/coretests/src/android/app/DownloadManagerBaseTest.java
index af2a944..ab40e0f 100644
--- a/core/tests/coretests/src/android/app/DownloadManagerBaseTest.java
+++ b/core/tests/coretests/src/android/app/DownloadManagerBaseTest.java
@@ -245,6 +245,12 @@
         // Note: callers overriding this should call mServer.play() with the desired port #
     }
 
+    @Override
+    public void tearDown() throws Exception {
+        mServer.shutdown();
+        super.tearDown();
+    }
+
     /**
      * Helper to build a response from the MockWebServer with no body.
      *
diff --git a/core/tests/coretests/src/android/net/UriMatcherTest.java b/core/tests/coretests/src/android/net/UriMatcherTest.java
index a728d4f..dd46fa3 100644
--- a/core/tests/coretests/src/android/net/UriMatcherTest.java
+++ b/core/tests/coretests/src/android/net/UriMatcherTest.java
@@ -82,9 +82,31 @@
         checkAll(matcher);
     }
 
+    @SmallTest
+    public void testContentUrisWithLeadingSlashAndOnlySlash() {
+        UriMatcher matcher = new UriMatcher(ROOT);
+        matcher.addURI("people", "/", PEOPLE);
+        matcher.addURI("people", "/#", PEOPLE_ID);
+        matcher.addURI("people", "/#/phones", PEOPLE_PHONES);
+        matcher.addURI("people", "/#/phones/blah", PEOPLE_PHONES_ID);
+        matcher.addURI("people", "/#/phones/#", PEOPLE_PHONES_ID);
+        matcher.addURI("people", "/#/addresses", PEOPLE_ADDRESSES);
+        matcher.addURI("people", "/#/addresses/#", PEOPLE_ADDRESSES_ID);
+        matcher.addURI("people", "/#/contact-methods", PEOPLE_CONTACTMETH);
+        matcher.addURI("people", "/#/contact-methods/#", PEOPLE_CONTACTMETH_ID);
+        matcher.addURI("calls", "/", CALLS);
+        matcher.addURI("calls", "/#", CALLS_ID);
+        matcher.addURI("caller-id", "/", CALLERID);
+        matcher.addURI("caller-id", "/*", CALLERID_TEXT);
+        matcher.addURI("filter-recent", null, FILTERRECENT);
+        matcher.addURI("auth", "/another/path/segment", ANOTHER_PATH_SEGMENT);
+        checkAll(matcher);
+    }
+
     private void checkAll(UriMatcher matcher) {
         check("content://asdf", UriMatcher.NO_MATCH, matcher);
         check("content://people", PEOPLE, matcher);
+        check("content://people/", PEOPLE, matcher);
         check("content://people/1", PEOPLE_ID, matcher);
         check("content://people/asdf", UriMatcher.NO_MATCH, matcher);
         check("content://people/2/phones", PEOPLE_PHONES, matcher);
@@ -97,9 +119,11 @@
         check("content://people/2/contact-methods/3", PEOPLE_CONTACTMETH_ID, matcher);
         check("content://people/2/contact-methods/asdf", UriMatcher.NO_MATCH, matcher);
         check("content://calls", CALLS, matcher);
+        check("content://calls/", CALLS, matcher);
         check("content://calls/1", CALLS_ID, matcher);
         check("content://calls/asdf", UriMatcher.NO_MATCH, matcher);
         check("content://caller-id", CALLERID, matcher);
+        check("content://caller-id/", CALLERID, matcher);
         check("content://caller-id/asdf", CALLERID_TEXT, matcher);
         check("content://caller-id/1", CALLERID_TEXT, matcher);
         check("content://filter-recent", FILTERRECENT, matcher);
diff --git a/core/tests/coretests/src/android/net/http/CookiesTest.java b/core/tests/coretests/src/android/net/http/CookiesTest.java
index 29e590f..a53330e 100644
--- a/core/tests/coretests/src/android/net/http/CookiesTest.java
+++ b/core/tests/coretests/src/android/net/http/CookiesTest.java
@@ -36,7 +36,13 @@
 
 public final class CookiesTest extends TestCase {
 
-    private MockWebServer server = new MockWebServer();
+    private MockWebServer server;
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+        server = new MockWebServer();
+    }
 
     @Override protected void tearDown() throws Exception {
         server.shutdown();
diff --git a/core/tests/coretests/src/android/net/http/DefaultHttpClientTest.java b/core/tests/coretests/src/android/net/http/DefaultHttpClientTest.java
index cf9e6e6..80c7a4c 100644
--- a/core/tests/coretests/src/android/net/http/DefaultHttpClientTest.java
+++ b/core/tests/coretests/src/android/net/http/DefaultHttpClientTest.java
@@ -41,7 +41,13 @@
  */
 public final class DefaultHttpClientTest extends TestCase {
 
-    private MockWebServer server = new MockWebServer();
+    private MockWebServer server;
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        server = new MockWebServer();
+    }
 
     @Override protected void tearDown() throws Exception {
         server.shutdown();
diff --git a/docs/html/training/articles/smp.jd b/docs/html/training/articles/smp.jd
index 0b45987..20d2ee0 100644
--- a/docs/html/training/articles/smp.jd
+++ b/docs/html/training/articles/smp.jd
@@ -11,27 +11,12 @@
   <li><a href="#theory">Theory</a>
     <ol class="nolist">
       <li style="margin: 3px 0 0"><a href="#mem_consistency">Memory consistency models</a>
-        <ol class="nolist">
-          <li style="margin:0"><a href="#proc_consistency">Processor consistency</a></li>
-          <li style="margin:0"><a href="#cpu_cache">CPU cache behavior</a></li>
-          <li style="margin:0"><a href="#observability">Observability</a></li>
-          <li style="margin:0"><a href="#ordering">ARM’s weak ordering</a></li>
-        </ol>
       </li>
-      <li style="margin:3px 0 0"><a href="#datamem_barriers">Data memory barriers</a>
+      <li style="margin:3px 0 0"><a href="#racefree">Data-race-free programming</a>
         <ol class="nolist">
-          <li style="margin:0"><a href="#ss_ll">Store/store and load/load</a></li>
-          <li style="margin:0"><a href="#ls_sl">Load/store and store/load</a></li>
-          <li style="margin:0"><a href="#barrier_inst">Barrier instructions</a></li>
-          <li style="margin:0"><a href="#addr_dep">Address dependencies and causal consistency</a></li>
-          <li style="margin:0"><a href="#membarrier_summry">Memory barrier summary</a></li>
-        </ol>
-      </li>
-      <li style="margin:3px 0 0"><a href="#atomic_ops">Atomic operations</a>
-        <ol class="nolist">
-          <li style="margin:0"><a href="#atomic_essentials">Atomic essentials</a></li>
-          <li style="margin:0"><a href="#atomic_barrierpairing">Atomic + barrier pairing</a></li>
-          <li style="margin:0"><a href="#acq_rel">Acquire and release</a></li>
+          <li style="margin:0"><a href="#dataraces">What's a "data race"?</a></li>
+          <li style="margin:0"><a href="#avoiding">Avoiding data races</a></li>
+          <li style="margin:0"><a href="#reordering">When memory reordering becomes visible</a></li>
         </ol>
       </li>
     </ol>
@@ -51,18 +36,21 @@
         </ol>
       </li>
       <li style="margin:3px 0 0"><a href="#bestpractice">What to do</a>
-        <ol class="nolist">
-          <li style="margin:0"><a href="#advice">General advice</a></li>
-          <li style="margin:0"><a href="#sync_guarantees">Synchronization primitive guarantees</a></li>
-          <li style="margin:0"><a href="#ccpp_changes">Upcoming changes to C/C++</a></li>
-        </ol>
       </li>
     </ol>
   </li>
+  <li><a href="#weak">A little more about weak memory orders</a>
+    <ol class="nolist">
+      <li style="margin:0"><a href="#nonracing">Non-racing accesses</a></li>
+      <li style="margin:0"><a href="#hint_only">Result is not relied upon for correctness</a></li>
+      <li style="margin:0"><a href="#unread">Atomically modified but unread data</a></li>
+      <li style="margin:0"><a href="#flag">Simple flag communication</a></li>
+      <li style="margin:0"><a href="#immutable">Immutable fields</a></li>
+    </ol>
+  </li>
   <li><a href="#closing_notes">Closing Notes</a></li>
   <li><a href="#appendix">Appendix</a>
     <ol class="nolist">
-      <li style="margin:0"><a href="#smp_failure_example">SMP failure example</a></li>
       <li style="margin:0"><a href="#sync_stores">Implementing synchronization stores</a></li>
       <li style="margin:0"><a href="#more">Further reading</a></li>
     </ol>
@@ -73,15 +61,10 @@
 
 <p>Android 3.0 and later platform versions are optimized to support
 multiprocessor architectures. This document introduces issues that
-can arise when writing code for symmetric multiprocessor systems in C, C++, and the Java
+can arise when writing multithreaded code for symmetric multiprocessor systems in C, C++, and the Java
 programming language (hereafter referred to simply as “Java” for the sake of
-brevity). It's intended as a primer for Android app developers, not as a complete 
-discussion on the subject. The focus is on the ARM CPU architecture.</p>
-
-<p>If you’re in a hurry, you can skip the <a href="#theory">Theory</a> section
-and go directly to <a href="#practice">Practice</a> for best practices, but this
-is not recommended.</p>
-
+brevity). It's intended as a primer for Android app developers, not as a complete
+discussion on the subject.</p>
 
 <h2 id="intro">Introduction</h2>
 
@@ -89,35 +72,38 @@
 which two or more identical CPU cores share access to main memory.  Until
 a few years ago, all Android devices were UP (Uni-Processor).</p>
 
-<p>Most &mdash; if not all &mdash; Android devices do have multiple CPUs, but generally one
-of them is used to run applications while others manage various bits of device
-hardware (for example, the radio).  The CPUs may have different architectures, and the
-programs running on them can’t use main memory to communicate with each
+<p>Most &mdash; if not all &mdash; Android devices always had multiple CPUs, but
+in the past only one of them was used to run applications while others manage various bits of device
+hardware (for example, the radio).  The CPUs may have had different architectures, and the
+programs running on them couldn’t use main memory to communicate with each
 other.</p>
 
 <p>Most Android devices sold today are built around SMP designs,
-making things a bit more complicated for software developers.  The sorts of race
-conditions you might encounter in a multi-threaded program are much worse on SMP
-when two or more of your threads are running simultaneously on different cores.
-What’s more, SMP on ARM is more challenging to work with than SMP on x86.  Code
-that has been thoroughly tested on x86 may break badly on ARM.</p>
+making things a bit more complicated for software developers.  Race conditions
+in a multi-threaded program may not cause visible problems on a uniprocessor,
+but may fail regularly when two or more of your threads
+are running simultaneously on different cores.
+What’s more, code may be more or less prone to failures when run on different
+processor architectures, or even on different implementations of the same
+architecture.  Code that has been thoroughly tested on x86 may break badly on ARM.
+Code may start to fail when recompiled with a more modern compiler.</p>
 
 <p>The rest of this document will explain why, and tell you what you need to do
 to ensure that your code behaves correctly.</p>
 
 
-<h2 id="theory">Theory</h2>
+<h2 id="theory">Memory consistency models: Why SMPs are a bit different</h2>
 
 <p>This is a high-speed, glossy overview of a complex subject.  Some areas will
-be incomplete, but none of it should be misleading or wrong.</p>
+be incomplete, but none of it should be misleading or wrong.  As you
+will see in the next section, the details here are usually not important.</p>
 
 <p>See <a href="#more">Further reading</a> at the end of the document for
 pointers to more thorough treatments of the subject.</p>
 
-<h3 id="mem_consistency">Memory consistency models</h3>
-
 <p>Memory consistency models, or often just “memory models”, describe the
-guarantees the hardware architecture makes about memory accesses.  For example,
+guarantees the programming language or hardware architecture
+makes about memory accesses.  For example,
 if you write a value to address A, and then write a value to address B, the
 model might guarantee that every CPU core sees those writes happen in that
 order.</p>
@@ -129,23 +115,26 @@
 
 <ul>
 <li>All memory operations appear to execute one at a time</li>
-<li>All operations on a single processor appear to execute in the order described
+<li>All operations in a single thread appear to execute in the order described
 by that processor's program.</li>
 </ul>
 
+<p>Let's assume temporarily that we have a very simple compiler or interpreter
+that introduces no surprises: It translates
+assignments in the source code to load and store instructions in exactly the
+corresponding order, one instruction per access.  We'll also assume for
+simplicity that each thread executes on its own processor.
+
 <p>If you look at a bit of code and see that it does some reads and writes from
 memory, on a sequentially-consistent CPU architecture you know that the code
 will do those reads and writes in the expected order.  It’s possible that the
 CPU is actually reordering instructions and delaying reads and writes, but there
 is no way for code running on the device to tell that the CPU is doing anything
-other than execute instructions in a straightforward manner.  (We’re ignoring
-memory-mapped device driver I/O for the moment.)</p>
+other than execute instructions in a straightforward manner.  (We’ll ignore
+memory-mapped device driver I/O.)</p>
 
 <p>To illustrate these points it’s useful to consider small snippets of code,
-commonly referred to as <em>litmus tests</em>.  These are assumed to execute in
-<em>program order</em>, that is, the order in which the instructions appear here is
-the order in which the CPU will execute them.  We don’t want to consider
-instruction reordering performed by compilers just yet.</p>
+commonly referred to as <em>litmus tests</em>.</p>
 
 <p>Here’s a simple example, with code running on two threads:</p>
 
@@ -205,19 +194,80 @@
 the reads or the writes would have to happen out of order.  On a
 sequentially-consistent machine, that can’t happen.</p>
 
-<p>Most uni-processors, including x86 and ARM, are sequentially consistent.
-Most SMP systems, including x86 and ARM, are not.</p>
+<p>Uni-processors, including x86 and ARM, are normally sequentially consistent.
+Threads appear to execute in interleaved fashion, as the OS kernel switches
+between them.  Most SMP systems, including x86 and ARM,
+are not sequentially consistent.  For example, it is common for
+hardware to buffer stores on their way to memory, so that they
+don't immediately reach memory and become visible to other cores.</p>
 
-<h4 id="proc_consistency">Processor consistency</h4>
+<p>The details vary substantially.  For example, x86, though not sequentially
+consistent, still guarantees that reg0 = 5 and reg1 = 0 remains impossible.
+Stores are buffered, but their order is maintained.
+ARM, on the other hand, does not. The order of buffered stores is not
+maintained, and stores may not reach all other cores at the same time.
+These differences are important to assembly programmers.
+However, as we will see below, C, C++, or Java programmers can
+and should program in a way that hides such architectural differences.</p>
 
-<p>x86 SMP provides <em>processor consistency</em>, which is slightly weaker than
-sequential.  While the architecture guarantees that loads are not reordered with
-respect to other loads, and stores are not reordered with respect to other
-stores, it does not guarantee that a store followed by a load will be observed
-in the expected order.</p>
+<p>So far, we've unrealistically assumed that it is only the hardware that
+reorders instructions.  In reality, the compiler also reorders instructions to
+improve performance.  In our example, the compiler might decide that some later
+code in Thread 2 needed the value of reg1 before it needed reg0, and thus load
+reg1 first.  Or some prior code may already have loaded A, and the compiler
+might decide to reuse that value instead of loading A again.  In either case,
+the loads to reg0 and reg1 might be reordered.</p>
 
-<p>Consider the following example, which is a piece of Dekker’s Algorithm for
-mutual exclusion:</p>
+<p>Reordering accesses to different memory locations,
+either in the hardware, or in the compiler, is
+allowed, since it doesn't affect the execution of a single thread, and
+it can significantly improve performance.  As we will see, with a bit of care,
+we can also prevent it from affecting the results of multithreaded programs.</p>
+
+<p>Since compilers can also reorder memory accesses, this problem is actually
+not new to SMPs.  Even on a uniprocessor, a compiler could reorder the loads to
+reg0 and reg1 in our example, and Thread 1 could be scheduled between the
+reordered instructions. But if our compiler happened to not reorder, we might
+never observe this problem.  On most ARM SMPs, even without compiler
+reordering, the reordering will probably be seen, possibly after a very large
+number of successful executions.  Unless you're programming in assembly
+language, SMPs generally just make it more likely you'll see problems that were
+there all along.</p>
+
+<h2 id="racefree">Data-race-free programming</h2>
+
+<p>Fortunately, there is usually an easy way to avoid thinking about any of
+these details.  If you follow some straightforward rules, it's usually safe
+to forget all of the preceding section except the "sequential consistency" part.
+Unfortunately, the other complications may become visible if you
+accidentally violate those rules.
+
+<p>Modern programming languages encourage what's known as a "data-race-free"
+programming style.  So long as you promise not to introduce "data races",
+and avoid a handful of constructs that tell the compiler otherwise, the compiler
+and hardware promise to provide sequentially consistent results.  This doesn't
+really mean they avoid memory access reordering.  It does mean that if you
+follow the rules you won't be able to tell that memory accesses are being
+reordered.  It's a lot like telling you that sausage is a delicious and
+appetizing food, so long as you promise not to visit the
+sausage factory.  Data races are what expose the ugly truth about memory
+reordering.</p>
+
+<h3 id="dataraces">What's a "data race"?</h3>
+
+<p>A <i>data race</i> occurs when at least two threads simultaneously access
+the same ordinary data, and at least one of them modifies it.  By "ordinary
+data" we mean something that's not specifically a synchronization object
+intended for thread communication.  Mutexes, condition variables, Java
+volatiles, or C++ atomic objects are not ordinary data, and their accesses
+are allowed to race.  In fact they are used to prevent data races on other
+objects.</p>
+
+<p>In order to determine whether two threads simultaneously access the same
+memory location, we can ignore the memory-reordering discussion from above, and
+assume sequential consistency.  The following program doesn't have a data race
+if <code>A</code> and <code>B</code> are ordinary boolean variables that are
+initially false:</p>
 
 <table>
 <tr>
@@ -225,28 +275,91 @@
 <th>Thread 2</th>
 </tr>
 <tr>
-<td><code>A = true<br />
-reg1 = B<br />
-if (reg1 == false)<br />
-&nbsp;&nbsp;&nbsp;&nbsp;<em>critical-stuff</em></code></td>
-<td><code>B = true<br />
-reg2 = A<br />
-if (reg2 == false)<br />
-&nbsp;&nbsp;&nbsp;&nbsp;<em>critical-stuff</em></code></td>
+<td><code>if (A) B = true</code></td>
+<td><code>if (B) A = true</code></td>
 </tr>
 </table>
 
-<p>The idea is that thread 1 uses A to indicate that it’s busy, and thread 2
-uses B.  Thread 1 sets A and then checks to see if B is set; if not, it can
-safely assume that it has exclusive access to the critical section.  Thread 2
-does something similar.  (If a thread discovers that both A and B are set, a
-turn-taking algorithm is used to ensure fairness.)</p>
+<p>Since operations are not reordered, both conditions will evaluate to false, and
+neither variable is ever updated.  Thus there cannot be a data race.  There is
+no need to think about what might happen if the load from <code>A</code>
+and store to <code>B</code> in
+Thread 1 were somehow reordered.  The compiler is not allowed to reorder Thread
+1 by rewriting it as "<code>B = true; if (!A) B = false</code>".  That would be
+like making sausage in the middle of town in broad daylight.
 
-<p>On a sequentially-consistent machine, this works correctly.  On x86 and ARM
-SMP, the store to A and the load from B in thread 1 can be “observed” in a
-different order by thread 2.  If that happened, we could actually appear to
-execute this sequence (where blank lines have been inserted to highlight the
-apparent order of operations):</p>
+<p>Data races are officially defined on basic built-in types like integers and
+references or pointers.  Assigning to an <code>int</code> while simultaneously
+reading it in another thread is clearly a data race.  But both the C++
+standard library and
+the Java Collections libraries are written to allow you to also reason about
+data races at the library level.  They promise to not introduce data races
+unless there are concurrent accesses to the same container, at least one of
+which updates it.  Updating a <code>set&lt;T&gt;</code> in one thread while
+simultaneously reading it in another allows the library to introduce a
+data race, and can thus be thought of informally as a "library-level data race".
+Conversely, updating one <code>set&lt;T&gt;</code> in one thread, while reading
+a different one in another, does not result in a data race, because the
+library promises not to introduce a (low-level) data race in that case.
+
+<p>Normally concurrent accesses to different fields in a data structure
+cannot introduce a data race.  However there is one important exception to
+this rule: Contiguous sequences of bit-fields in C or C++ are treated as
+a single "memory location".  Accessing any bit-field in such a sequence
+is treated as accessing all of them for purposes of determining the
+existence of a data race.  This reflects the inability of common hardware
+to update individual bits without also reading and re-writing adjacent bits.
+Java programmers have no analogous concerns.</p>
+
+<h3 id="avoiding">Avoiding data races</h3>
+
+Modern programming languages provide a number of synchronization
+mechanisms to avoid data races.  The most basic tools are:
+
+<dl>
+<dt>Locks or Mutexes</dt>
+
+<dd>Mutexes (C++11 <code>std::mutex</code>, or <code>pthread_mutex_t</code>), or
+<code>synchronized</code> blocks in Java can be used to ensure that certain
+section of code do not run concurrently with other sections of code accessing
+the same data.  We'll refer to these and other similar facilities generically
+as "locks."  Consistently acquiring a specific lock before accessing a shared
+data structure and releasing it afterwards, prevents data races when accessing
+the data structure.  It also ensures that updates and accesses are atomic, i.e. no
+other update to the data structure can run in the middle.  This is deservedly
+by far the most common tool for preventing data races. The use of Java
+<code>synchronized</code> blocks or C++ <code>lock_guard</code>
+or <code>unique_lock</code> ensure that locks are properly released in the
+event of an exception.
+</dd>
+
+<dt>Volatile/atomic variables</dt>
+
+<dd>Java provides <code>volatile</code> fields that support concurrent access
+without introducing data races.  Since 2011, C and C++ support
+<code>atomic</code> variables and fields with similar semantics.  These are
+typically more difficult to use then locks, since they only ensure that
+individual accesses to a single variable are atomic.  (In C++ this normally
+extends to simple read-modify-write operations, like increments.  Java
+requires special method calls for that.)
+Unlike locks, <code>volatile</code> or <code>atomic</code> variables can't
+be used directly to prevent other threads from interfering with longer code sequences.</dd>
+
+</dl>
+
+<p>It's important to note that <code>volatile</code> has very different
+meanings in C++ and Java.  In C++, <code>volatile</code> does not prevent data
+races, though older code often uses it as a workaround for the lack of
+<code>atomic</code> objects.  This is no longer recommended; in
+C++, use <code>atomic&lt;T&gt;</code> for variables that can be concurrently
+accessed by multiple threads.  C++ <code>volatile</code> is meant for
+device registers and the like.</p>
+
+<p>C/C++ <code>atomic</code> variables or Java <code>volatile</code> variables
+can be used to prevent data races on other variables.  If <code>flag</code> is
+declared to have type <code>atomic&lt;bool&gt;</code>
+or <code>atomic_bool</code>(C/C++) or <code>volatile boolean</code> (Java),
+and is initially false then the following snippet is data-race-free:</p>
 
 <table>
 <tr>
@@ -254,800 +367,92 @@
 <th>Thread 2</th>
 </tr>
 <tr>
-<td><code>reg1 = B<br />
-<br />
-<br />
-A = true<br />
-if (reg1 == false)<br />
-&nbsp;&nbsp;&nbsp;&nbsp;<em>critical-stuff</em></code></td>
-
-<td><code><br />
-B = true<br />
-reg2 = A<br />
-<br />
-if (reg2 == false)<br />
-&nbsp;&nbsp;&nbsp;&nbsp;<em>critical-stuff</em></code></td>
+<td><code>A = ...<br />
+&nbsp;&nbsp;flag = true</code>
+</td>
+<td><code>while (!flag) {}<br />
+... = A</code>
+</td>
 </tr>
 </table>
 
-<p>This results in both reg1 and reg2 set to “false”, allowing the threads to
-execute code in the critical section simultaneously.  To understand how this can
-happen, it’s useful to know a little about CPU caches.</p>
+<p>Since Thread 2 waits for <code>flag</code> to be set, the access to
+<code>A</code> in Thread 2 must happen after, and not concurrently with, the
+assignment to <code>A</code> in Thread 1.  Thus there is no data race on
+<code>A</code>.  The race on <code>flag</code> doesn't count as a data race,
+since volatile/atomic accesses are not "ordinary memory accesses".</p>
 
-<h4 id="cpu_cache">CPU cache behavior</h4>
+<p>The implementation is required to prevent or hide memory reordering
+sufficiently to make code like the preceding litmus test behave as expected.
+This normally makes volatile/atomic memory accesses
+substantially more expensive than ordinary accesses.</p>
 
-<p>This is a substantial topic in and of itself.  An extremely brief overview
-follows.  (The motivation for this material is to provide some basis for
-understanding why SMP systems behave as they do.)</p>
+<p>Although the preceding example is data-race-free, locks together with
+<code>Object.wait()</code> in Java or condition variables in C/C++ usually
+provide a better solution that does not involve waiting in a loop while
+draining battery power.</p>
 
-<p>Modern CPUs have one or more caches between the processor and main memory.
-These are labeled L1, L2, and so on, with the higher numbers being successively
-“farther” from the CPU.  Cache memory adds size and cost to the hardware, and
-increases power consumption, so the ARM CPUs used in Android devices typically
-have small L1 caches and little or no L2/L3.</p>
+<h3 id="reordering">When memory reordering becomes visible</h3>
 
-<p>Loading or storing a value into the L1 cache is very fast.  Doing the same to
-main memory can be 10-100x slower.  The CPU will therefore try to operate out of
-the cache as much as possible.  The <em>write policy</em> of a cache determines when data
-written to it is forwarded to main memory.  A <em>write-through</em> cache will initiate
-a write to memory immediately, while a <em>write-back</em> cache will wait until it runs
-out of space and has to evict some entries.  In either case, the CPU will
-continue executing instructions past the one that did the store, possibly
-executing dozens of them before the write is visible in main memory.  (While the
-write-through cache has a policy of immediately forwarding the data to main
-memory, it only <strong>initiates</strong> the write.  It does not have to wait
-for it to finish.)</p>
-
-<p>The cache behavior becomes relevant to this discussion when each CPU core has
-its own private cache.  In a simple model, the caches have no way to interact
-with each other directly.  The values held by core #1’s cache are not shared
-with or visible to core #2’s cache except as loads or stores from main memory.
-The long latencies on memory accesses would make inter-thread interactions
-sluggish, so it’s useful to define a way for the caches to share data.  This
-sharing is called <em>cache coherency</em>, and the coherency rules are defined
-by the CPU architecture’s <em>cache consistency model</em>.</p>
-
-<p>With that in mind, let’s return to the Dekker example.  When core 1 executes
-“A = 1”, the value gets stored in core 1’s cache.  When core 2 executes “if (A
-== 0)”, it might read from main memory or it might read from core 2’s cache;
-either way it won’t see the store performed by core 1.  (“A” could be in core
-2’s cache because of a previous load from “A”.)</p>
-
-<p>For the memory consistency model to be sequentially consistent, core 1 would
-have to wait for all other cores to be aware of “A = 1” before it could execute
-“if (B == 0)” (either through strict cache coherency rules, or by disabling the
-caches entirely so everything operates out of main memory).  This would impose a
-performance penalty on every store operation.  Relaxing the rules for the
-ordering of stores followed by loads improves performance but imposes a burden
-on software developers.</p>
-
-<p>The other guarantees made by the processor consistency model are less
-expensive to make.  For example, to ensure that memory writes are not observed
-out of order, it just needs to ensure that the stores are published to other
-cores in the same order that they were issued.  It doesn’t need to wait for
-store #1 to <strong>finish</strong> being published before it can start on store
-#2, it just needs to ensure that it doesn’t finish publishing #2 before it
-finishes publishing #1.  This avoids a performance bubble.</p>
-
-<p>Relaxing the guarantees even further can provide additional opportunities for
-CPU optimization, but creates more opportunities for code to behave in ways the
-programmer didn’t expect.</p>
-
-<p>One additional note: CPU caches don’t operate on individual bytes.  Data is
-read or written as <em>cache lines</em>; for many ARM CPUs these are 32 bytes.  If you
-read data from a location in main memory, you will also be reading some adjacent
-values.  Writing data will cause the cache line to be read from memory and
-updated.  As a result, you can cause a value to be loaded into cache as a
-side-effect of reading or writing something nearby, adding to the general aura
-of mystery.</p>
-
-<h4 id="observability">Observability</h4>
-
-<p>Before going further, it’s useful to define in a more rigorous fashion what
-is meant by “observing” a load or store.  Suppose core 1 executes “A = 1”.  The
-store is <em>initiated</em> when the CPU executes the instruction.  At some
-point later, possibly through cache coherence activity, the store is
-<em>observed</em> by core 2.  In a write-through cache it doesn’t really
-<em>complete</em> until the store arrives in main memory, but the memory
-consistency model doesn’t dictate when something completes, just when it can be
-<em>observed</em>.</p>
-
-
-<p>(In a kernel device driver that accesses memory-mapped I/O locations, it may
-be very important to know when things actually complete.  We’re not going to go
-into that here.)</p>
-
-<p>Observability may be defined as follows:</p>
-
-<ul>
-<li>"A write to a location in memory is said to be observed by an observer Pn
-when a subsequent read of the location by Pn would return the value written by
-the write."</li>
-<li>"A read of a location in memory is said to be observed by an observer Pm
-when a subsequent write to the location by Pm would have no effect on the value
-returned by the read." <span style="font-size:.9em;color:#777">(<em><a
-href="#more" style="color:#777">Reasoning about the ARM weakly consistent memory
-model</a></em>)</span></li>
-</ul>
-
-
-<p>A less formal way to describe it (where “you” and “I” are CPU cores) would be:</p>
-
-<ul>
-<li>I have observed your write when I can read what you wrote</li>
-<li>I have observed your read when I can no longer affect the value you read</li>
-</ul>
-
-<p>The notion of observing a write is intuitive; observing a read is a bit less
-so (don’t worry, it grows on you).</p>
-
-<p>With this in mind, we’re ready to talk about ARM.</p>
-
-<h4 id="ordering">ARM's weak ordering</h4>
-
-<p>ARM SMP provides weak memory consistency guarantees.  It does not guarantee that
-loads or stores are ordered with respect to each other.</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>A = 41<br />
-B = 1    // “A is ready”</code></td>
-<td><code>loop_until (B == 1)<br />
-reg = A</code></td>
-</tr>
-</table>
-
-<p>Recall that all addresses are initially zero.  The “loop_until” instruction
-reads B repeatedly, looping until we read 1 from B.  The idea here is that
-thread 2 is waiting for thread 1 to update A.  Thread 1 sets A, and then sets B
-to 1 to indicate data availability.</p>
-
-<p>On x86 SMP, this is guaranteed to work.  Thread 2 will observe the stores
-made by thread 1 in program order, and thread 1 will observe thread 2’s loads in
-program order.</p>
-
-<p>On ARM SMP, the loads and stores can be observed in any order.  It is
-possible, after all the code has executed, for reg to hold 0.  It’s also
-possible for it to hold 41.  Unless you explicitly define the ordering, you
-don’t know how this will come out.</p>
-
-<p>(For those with experience on other systems, ARM’s memory model is equivalent
-to PowerPC in most respects.)</p>
-
-
-<h3 id="datamem_barriers">Data memory barriers</h3>
-
-<p>Memory barriers provide a way for your code to tell the CPU that memory
-access ordering matters.  ARM/x86 uniprocessors offer sequential consistency,
-and thus have no need for them.  (The barrier instructions can be executed but
-aren’t useful; in at least one case they’re hideously expensive, motivating
-separate builds for SMP targets.)</p>
-
-<p>There are four basic situations to consider:</p>
+Data-race-free programming normally saves us from having to explicitly deal
+with memory access reordering issues.  However, there are several cases in
+which reordering does become visible:
 
 <ol>
-<li>store followed by another store</li>
-<li>load followed by another load</li>
-<li>load followed by store</li>
-<li>store followed by load</li>
+<li> If your program has a bug resulting in an unintentional data race,
+compiler and hardware transformations can become visible, and the behavior
+of your program may be surprising.  For example, if we forgot to declare
+<code>flag</code> volatile in the preceding example, Thread 2 may see an
+uninitialized <code>A</code>.  Or the compiler may decide that flag can't
+possibly change during Thread 2's loop and transform the program to
+
+<table>
+<tr>
+<th>Thread 1</th>
+<th>Thread 2</th>
+</tr>
+<tr>
+<td><code>A = ...<br />
+&nbsp;&nbsp;flag = true</code>
+</td>
+<td>reg0 = flag;
+while (!reg0) {}<br />
+... = A
+</td>
+</tr>
+</table>
+
+When you debug, you may well see the loop continuing forever in spite of
+the fact that <code>flag</code> is true.</li>
+
+<li> C++ provides facilities for explicitly relaxing
+sequential consistency even if there are no races.  Atomic operations
+can take explicit <code>memory_order_</code>... arguments.  Similarly, the
+<code>java.util.concurrent.atomic</code> package provides a more restricted
+set of similar facilities, notably <code>lazySet()</code>.  And Java
+programmers occasionally use intentional data races for similar effect.
+All of these provide performance improvements at a large
+cost in programming complexity.  We discuss them only briefly
+<a href="#weak">below</a>.</li>
+
+<li> Some C and C++ code is written in an older style, not entirely
+consistent with current language standards, in which <code>volatile</code>
+variables are used instead of <code>atomic</code> ones, and memory ordering
+is explicitly disallowed by inserting so called <i>fences</i> or
+<i>barriers</i>.  This requires explicit reasoning about access
+reordering and understanding of hardware memory models.  A coding style
+along these lines is still used in the Linux kernel.  It should not
+be used in new Android applications, and is also not further discussed here.
+</li>
 </ol>
 
-<h4 id="ss_ll">Store/store and load/load</h4>
-
-<p>Recall our earlier example:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>A = 41<br />
-B = 1    // “A is ready”</code></td>
-<td><code>loop_until (B == 1)<br />
-reg = A</code></td>
-</tr>
-</table>
-
-
-<p>Thread 1 needs to ensure that the store to A happens before the store to B.
-This is a “store/store” situation.  Similarly, thread 2 needs to ensure that the
-load of B happens before the load of A; this is a load/load situation.  As
-mentioned earlier, the loads and stores can be observed in any order.</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>Going back to the cache discussion, assume A and B are on separate cache
-lines, with minimal cache coherency.  If the store to A stays local but the
-store to B is published, core 2 will see B=1 but won’t see the update to A.  On
-the other side, assume we read A earlier, or it lives on the same cache line as
-something else we recently read.  Core 2 spins until it sees the update to B,
-then loads A from its local cache, where the value is still zero.</p>
-</div>
-</div>
-
-<p>We can fix it like this:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>A = 41<br />
-<em>store/store barrier</em><br />
-B = 1    // “A is ready”</code></td>
-<td><code>loop_until (B == 1)<br />
-<em>load/load barrier</em><br />
-reg = A</code></td>
-</tr>
-</table>
-
-<p>The store/store barrier guarantees that <strong>all observers</strong> will
-observe the write to A before they observe the write to B.  It makes no
-guarantees about the ordering of loads in thread 1, but we don’t have any of
-those, so that’s okay.  The load/load barrier in thread 2 makes a similar
-guarantee for the loads there.</p>
-
-<p>Since the store/store barrier guarantees that thread 2 observes the stores in
-program order, why do we need the load/load barrier in thread 2?  Because we
-also need to guarantee that thread 1 observes the loads in program order.</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>The store/store barrier could work by flushing all
-dirty entries out of the local cache, ensuring that other cores see them before
-they see any future stores.  The load/load barrier could purge the local cache
-completely and wait for any “in-flight” loads to finish, ensuring that future
-loads are observed after previous loads.  What the CPU actually does doesn’t
-matter, so long as the appropriate guarantees are kept.  If we use a barrier in
-core 1 but not in core 2, core 2 could still be reading A from its local
-cache.</p>
-</div>
-</div>
-
-<p>Because the architectures have different memory models, these barriers are
-required on ARM SMP but not x86 SMP.</p>
-
-<h4 id="ls_sl">Load/store and store/load</h4>
-
-<p>The Dekker’s Algorithm fragment shown earlier illustrated the need for a
-store/load barrier.  Here’s an example where a load/store barrier is
-required:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>reg = A<br />
-B = 1    // “I have latched A”</code></td>
-<td><code>loop_until (B == 1)<br />
-A = 41    // update A</code></td>
-</tr>
-</table>
-
-<p>Thread 2 could observe thread 1’s store of B=1 before it observe’s thread 1’s
-load from A, and as a result store A=41 before thread 1 has a chance to read A.
-Inserting a load/store barrier in each thread solves the problem:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>reg = A<br />
-<em>load/store barrier</em><br />
-B = 1    // “I have latched A”</code></td>
-<td><code>loop_until (B == 1)<br />
-<em>load/store barrier</em><br />
-A = 41    // update A</code></td>
-</tr>
-</table>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>A store to local cache may be observed before a load from main memory,
-because accesses to main memory are so much slower.  In this case, assume core
-1’s cache has the cache line for B but not A.  The load from A is initiated, and
-while that’s in progress execution continues.  The store to B happens in local
-cache, and by some means becomes available to core 2 while the load from A is
-still in progress.  Thread 2 is able to exit the loop before it has observed
-thread 1’s load from A.</p>
-
-<p>A thornier question is: do we need a barrier in thread 2?  If the CPU doesn’t
-perform speculative writes, and doesn’t execute instructions out of order, can
-thread 2 store to A before thread 1’s read if thread 1 guarantees the load/store
-ordering?  (Answer: no.)  What if there’s a third core watching A and B?
-(Answer: now you need one, or you could observe B==0 / A==41 on the third core.)
- It’s safest to insert barriers in both places and not worry about the
-details.</p>
-</div>
-</div>
-
-<p>As mentioned earlier, store/load barriers are the only kind required on x86
-SMP.</p>
-
-<h4 id="barrier_inst">Barrier instructions</h4>
-
-<p>Different CPUs provide different flavors of barrier instruction.  For
-example:</p>
-
-<ul>
-<li>Sparc V8 has a “membar” instruction that takes a 4-element bit vector.  The
-four categories of barrier can be specified individually.</li>
-<li>Alpha provides “rmb” (load/load), “wmb” (store/store), and “mb” (full).
-(Trivia: the linux kernel provides three memory barrier functions with these
-names and behaviors.)</li>
-<li>x86 has a variety of options; “mfence” (introduced with SSE2) provides a
-full barrier.</li>
-<li>ARMv7 has “dmb st” (store/store) and “dmb sy” (full).</li>
-</ul>
-
-<p>“Full barrier” means all four categories are included.</p>
-
-<p>It is important to recognize that the only thing guaranteed by barrier
-instructions is ordering.  Do not treat them as cache coherency “sync points” or
-synchronous “flush” instructions.  The ARM “dmb” instruction has no direct
-effect on other cores.  This is important to understand when trying to figure
-out where barrier instructions need to be issued.</p>
-
-
-<h4 id="addr_dep">Address dependencies and causal consistency</h4>
-
-<p><em>(This is a slightly more advanced topic and can be skipped.)</em>
-
-<p>The ARM CPU provides one special case where a load/load barrier can be
-avoided.  Consider the following example from earlier, modified slightly:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>[A+8] = 41<br />
-<em>store/store barrier</em><br />
-B = 1    // “A is ready”</code></td>
-<td><code>loop:<br />
-&nbsp;&nbsp;&nbsp;&nbsp;reg0 = B<br />
-&nbsp;&nbsp;&nbsp;&nbsp;if (reg0 == 0) goto loop<br />
-reg1 = 8<br />
-reg2 = [A + reg1]</code></td>
-</tr>
-</table>
-
-<p>This introduces a new notation.  If “A” refers to a memory address, “A+n”
-refers to a memory address offset by 8 bytes from A.  If A is the base address
-of an object or array, [A+8] could be a field in the object or an element in the
-array.</p>
-
-<p>The “loop_until” seen in previous examples has been expanded to show the load
-of B into reg0.  reg1 is assigned the numeric value 8, and reg2 is loaded from
-the address [A+reg1] (the same location that thread 1 is accessing).</p>
-
-<p>This will not behave correctly because the load from B could be observed
-after the load from [A+reg1].  We can fix this with a load/load barrier after
-the loop, but on ARM we can also just do this:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>[A+8] = 41<br />
-<em>store/store barrier</em><br />
-B = 1    // “A is ready”</code></td>
-<td><code>loop:<br />
-&nbsp;&nbsp;&nbsp;&nbsp;reg0 = B<br />
-&nbsp;&nbsp;&nbsp;&nbsp;if (reg0 == 0) goto loop<br />
-reg1 = 8 <strong>+ (reg0 & 0)</strong><br />
-reg2 = [A + reg1]</code></td>
-</tr>
-</table>
-
-<p>What we’ve done here is change the assignment of reg1 from a constant (8) to
-a value that depends on what we loaded from B.  In this case, we do a bitwise
-AND of the value with 0, which yields zero, which means reg1 still has the value
-8.  However, the ARM CPU believes that the load from [A+reg1] depends upon the
-load from B, and will ensure that the two are observed in program order.</p>
-
-<p>This is called an <em>address dependency</em>.  Address dependencies exist
-when the value returned by a load is used to compute the address of a subsequent
-load or store.  It can let you avoid the need for an explicit barrier in certain
-situations.</p>
-
-<p>ARM does not provide <em>control dependency</em> guarantees.  To illustrate
-this it’s necessary to dip into ARM code for a moment: <span
-style="font-size:.9em;color:#777">(<em><a href="#more"
-style="color:#777">Barrier Litmus Tests and Cookbook</a></em>)</span>.</p>
-
-<pre>
-LDR r1, [r0]
-CMP r1, #55
-LDRNE r2, [r3]
-</pre>
-
-<p>The loads from r0 and r3 may be observed out of order, even though the load
-from r3 will not execute at all if [r0] doesn’t hold 55.  Inserting AND r1, r1,
-#0 and replacing the last instruction with LDRNE r2, [r3, r1] would ensure
-proper ordering without an explicit barrier.  (This is a prime example of why
-you can’t think about consistency issues in terms of instruction execution.
-Always think in terms of memory accesses.)</p>
-
-<p>While we’re hip-deep, it’s worth noting that ARM does not provide <em>causal
-consistency</em>:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-<th>Thread 3</th>
-</tr>
-<tr>
-<td><code>A = 1</code></td>
-<td><code>loop_until (A == 1)<br />
-B = 1</code></td>
-<td><code>loop:<br />
-&nbsp;&nbsp;reg0 = B<br />
-&nbsp;&nbsp;if (reg0 == 0) goto loop<br />
-reg1 = reg0 & 0<br />
-reg2 = [A+reg1]</code></td>
-</tr>
-</table>
-
-<p>Here, thread 1 sets A, signaling thread 2. Thread 2 sees that and sets B to
-signal thread 3.  Thread 3 sees it and loads from A, using an address dependency
-to ensure that the load of B and the load of A are observed in program
-order.</p>
-
-<p>It’s possible for reg2 to hold zero at the end of this.  The fact that a
-store in thread 1 causes something to happen in thread 2 which causes something
-to happen in thread 3 does not mean that thread 3 will observe the stores in
-that order.  (Inserting a load/store barrier in thread 2 fixes this.)</p>
-
-<h4 id="membarrier_summary">Memory barrier summary</h4>
-
-<p>Barriers come in different flavors for different situations.  While there can
-be performance advantages to using exactly the right barrier type, there are
-code maintenance risks in doing so &mdash; unless the person updating the code
-fully understands it, they might introduce the wrong type of operation and cause
-a mysterious breakage.  Because of this, and because ARM doesn’t provide a wide
-variety of barrier choices, many atomic primitives use full
-barrier instructions when a barrier is required.</p>
-
-<p>The key thing to remember about barriers is that they define ordering.  Don’t
-think of them as a “flush” call that causes a series of actions to happen.
-Instead, think of them as a dividing line in time for operations on the current
-CPU core.</p>
-
-
-<h3 id="atomic_ops">Atomic operations</h3>
-
-<p>Atomic operations guarantee that an operation that requires a series of steps
-always behaves as if it were a single operation.  For example, consider a
-non-atomic increment (“++A”) executed on the same variable by two threads
-simultaneously:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>reg = A<br />
-reg = reg + 1<br />
-A = reg</code></td>
-<td><code>reg = A<br />
-reg = reg + 1<br />
-A = reg</code></td>
-</tr>
-</table>
-
-<p>If the threads execute concurrently from top to bottom, both threads will
-load 0 from A, increment it to 1, and store it back, leaving a final result of
-1.  If we used an atomic increment operation, you would be guaranteed that the
-final result will be 2.</p>
-
-<h4 id="atomic_essentials">Atomic essentials</h4>
-
-<p>The most fundamental operations &mdash; loading and storing 32-bit values
-&mdash; are inherently atomic on ARM so long as the data is aligned on a 32-bit
-boundary.  For example:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>reg = 0x00000000<br />
-A = reg</code></td>
-<td><code>reg = 0xffffffff<br />
-A = reg</code></td>
-</tr>
-</table>
-
-<p>The CPU guarantees that A will hold 0x00000000 or 0xffffffff.  It will never
-hold 0x0000ffff or any other partial “mix” of bytes.</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>The atomicity guarantee is lost if the data isn’t aligned.  Misaligned data
-could straddle a cache line, so other cores could see the halves update
-independently.  Consequently, the ARMv7 documentation declares that it provides
-“single-copy atomicity” for all byte accesses, halfword accesses to
-halfword-aligned locations, and word accesses to word-aligned locations.
-Doubleword (64-bit) accesses are <strong>not</strong> atomic, unless the
-location is doubleword-aligned and special load/store instructions are used.
-This behavior is important to understand when multiple threads are performing
-unsynchronized updates to packed structures or arrays of primitive types.</p>
-</div>
-</div>
-
-<p>There is no need for 32-bit “atomic read” or “atomic write” functions on ARM
-or x86.  Where one is provided for completeness, it just does a trivial load or
-store.</p>
-
-<p>Operations that perform more complex actions on data in memory are
-collectively known as <em>read-modify-write</em> (RMW) instructions, because
-they load data, modify it in some way, and write it back.  CPUs vary widely in
-how these are implemented.  ARM uses a technique called “Load Linked / Store
-Conditional”, or LL/SC.</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>A <em>linked</em> or <em>locked</em> load reads the data from memory as
-usual, but also establishes a reservation, tagging the physical memory address.
-The reservation is cleared when another core tries to write to that address.  To
-perform an LL/SC, the data is read with a reservation, modified, and then a
-conditional store instruction is used to try to write the data back.  If the
-reservation is still in place, the store succeeds; if not, the store will fail.
-Atomic functions based on LL/SC usually loop, retrying the entire
-read-modify-write sequence until it completes without interruption.</p>
-</div>
-</div>
-
-<p>It’s worth noting that the read-modify-write operations would not work
-correctly if they operated on stale data.  If two cores perform an atomic
-increment on the same address, and one of them is not able to see what the other
-did because each core is reading and writing from local cache, the operation
-won’t actually be atomic.  The CPU’s cache coherency rules ensure that the
-atomic RMW operations remain atomic in an SMP environment.</p>
-
-<p>This should not be construed to mean that atomic RMW operations use a memory
-barrier.  On ARM, atomics have no memory barrier semantics.  While a series of
-atomic RMW operations on a single address will be observed in program order by
-other cores, there are no guarantees when it comes to the ordering of atomic and
-non-atomic operations.</p>
-
-<p>It often makes sense to pair barriers and atomic operations together. The
-next section describes this in more detail.</p>
-
-<h4 id="atomic_barrierpairing">Atomic + barrier pairing</h4>
-
-<p>As usual, it’s useful to illuminate the discussion with an example.  We’re
-going to consider a basic mutual-exclusion primitive called a <em>spin
-lock</em>.  The idea is that a memory address (which we’ll call “lock”)
-initially holds zero.  When a thread wants to execute code in the critical
-section, it sets the lock to 1, executes the critical code, and then changes it
-back to zero when done.  If another thread has already set the lock to 1, we sit
-and spin until the lock changes back to zero.</p>
-
-<p>To make this work we use an atomic RMW primitive called
-<em>compare-and-swap</em>.  The function takes three arguments: the memory
-address, the expected current value, and the new value.  If the value currently
-in memory matches what we expect, it is replaced with the new value, and the old
-value is returned.  If the current value is not what we expect, we don’t change
-anything.  A minor variation on this is called <em>compare-and-set</em>; instead
-of returning the old value it returns a boolean indicating whether the swap
-succeeded.  For our needs either will work, but compare-and-set is slightly
-simpler for examples, so we use it and just refer to it as “CAS”.</p>
-
-<p>The acquisition of the spin lock is written like this (using a C-like
-language):</p>
-
-<pre>do {
-    success = atomic_cas(&lock, 0, 1)
-} while (!success)
-
-full_memory_barrier()
-
-<em>critical-section</em></pre>
-
-<p>If no thread holds the lock, the lock value will be 0, and the CAS operation
-will set it to 1 to indicate that we now have it.  If another thread has it, the
-lock value will be 1, and the CAS operation will fail because the expected
-current value does not match the actual current value.  We loop and retry.
-(Note this loop is on top of whatever loop the LL/SC code might be doing inside
-the atomic_cas function.)</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>On SMP, a spin lock is a useful way to guard a small critical section.  If we
-know that another thread is going to execute a handful of instructions and then
-release the lock, we can just burn a few cycles while we wait our turn.
-However, if the other thread happens to be executing on the same core, we’re
-just wasting time because the other thread can’t make progress until the OS
-schedules it again (either by migrating it to a different core or by preempting
-us).  A proper spin lock implementation would optimistically spin a few times
-and then fall back on an OS primitive (such as a Linux futex) that allows the
-current thread to sleep while waiting for the other thread to finish up.  On a
-uniprocessor you never want to spin at all.  For the sake of brevity we’re
-ignoring all this.</p>
-</div>
-</div>
-
-<p>The memory barrier is necessary to ensure that other threads observe the
-acquisition of the lock before they observe any loads or stores in the critical
-section.  Without that barrier, the memory accesses could be observed while the
-lock is not held.</p>
-
-<p>The <code>full_memory_barrier</code> call here actually does
-<strong>two</strong> independent operations.  First, it issues the CPU’s full
-barrier instruction.  Second, it tells the compiler that it is not allowed to
-reorder code around the barrier.  That way, we know that the
-<code>atomic_cas</code> call will be executed before anything in the critical
-section.  Without this <em>compiler reorder barrier</em>, the compiler has a
-great deal of freedom in how it generates code, and the order of instructions in
-the compiled code might be much different from the order in the source code.</p>
-
-<p>Of course, we also want to make sure that none of the memory accesses
-performed in the critical section are observed after the lock is released.  The
-full version of the simple spin lock is:</p>
-
-<pre>do {
-    success = atomic_cas(&lock, 0, 1)   // acquire
-} while (!success)
-full_memory_barrier()
-
-<em>critical-section</em>
-
-full_memory_barrier()
-atomic_store(&lock, 0)                  // release</pre>
-
-<p>We perform our second CPU/compiler memory barrier immediately
-<strong>before</strong> we release the lock, so that loads and stores in the
-critical section are observed before the release of the lock.</p>
-
-<p>As mentioned earlier, the <code>atomic_store</code> operation is a simple
-assignment on ARM and x86.  Unlike the atomic RMW operations, we don’t guarantee
-that other threads will see this value immediately.  This isn’t a problem,
-though, because we only need to keep the other threads <strong>out</strong>. The
-other threads will stay out until they observe the store of 0.  If it takes a
-little while for them to observe it, the other threads will spin a little
-longer, but we will still execute code correctly.</p>
-
-<p>It’s convenient to combine the atomic operation and the barrier call into a
-single function.  It also provides other advantages, which will become clear
-shortly.</p>
-
-
-<h4 id="acq_rel">Acquire and release</h4>
-
-<p>When acquiring the spinlock, we issue the atomic CAS and then the barrier.
-When releasing the spinlock, we issue the barrier and then the atomic store.
-This inspires a particular naming convention: operations followed by a barrier
-are “acquiring” operations, while operations preceded by a barrier are
-“releasing” operations.  (It would be wise to install the spin lock example
-firmly in mind, as the names are not otherwise intuitive.)</p>
-
-<p>Rewriting the spin lock example with this in mind:</p>
-
-<pre>do {
-    success = atomic_<strong>acquire</strong>_cas(&lock, 0, 1)
-} while (!success)
-
-<em>critical-section</em>
-
-atomic_<strong>release</strong>_store(&lock, 0)</pre>
-
-<p>This is a little more succinct and easier to read, but the real motivation
-for doing this lies in a couple of optimizations we can now perform.</p>
-
-<p>First, consider <code>atomic_release_store</code>.  We need to ensure that
-the store of zero to the lock word is observed after any loads or stores in the
-critical section above it.  In other words, we need a load/store and store/store
-barrier.  In an earlier section we learned that these aren’t necessary on x86
-SMP -- only store/load barriers are required.  The implementation of
-<code>atomic_release_store</code> on x86 is therefore just a compiler reorder
-barrier followed by a simple store.  No CPU barrier is required.</p>
-
-<p>The second optimization mostly applies to the compiler (although some CPUs,
-such as the Itanium, can take advantage of it as well).  The basic principle is
-that code can move across acquire and release barriers, but only in one
-direction.</p>
-
-<p>Suppose we have a mix of locally-visible and globally-visible memory
-accesses, with some miscellaneous computation as well:</p>
-
-<pre>local1 = arg1 / 41
-local2 = threadStruct->field2
-threadStruct->field3 = local2
-
-do {
-    success = atomic_acquire_cas(&lock, 0, 1)
-} while (!success)
-
-local5 = globalStruct->field5
-globalStruct->field6 = local5
-
-atomic_release_store(&lock, 0)</pre>
-
-<p>Here we see two completely independent sets of operations.  The first set
-operates on a thread-local data structure, so we’re not concerned about clashes
-with other threads.  The second set operates on a global data structure, which
-must be protected with a lock.</p>
-
-<p>A full compiler reorder barrier in the atomic ops will ensure that the
-program order matches the source code order at the lock boundaries.  However,
-allowing the compiler to interleave instructions can improve performance.  Loads
-from memory can be slow, but the CPU can continue to execute instructions that
-don’t require the result of that load while waiting for it to complete.  The
-code might execute more quickly if it were written like this instead:</p>
-
-<pre>do {
-    success = atomic_acquire_cas(&lock, 0, 1)
-} while (!success)
-
-local2 = threadStruct->field2
-local5 = globalStruct->field5
-local1 = arg1 / 41
-threadStruct->field3 = local2
-globalStruct->field6 = local5
-
-atomic_release_store(&lock, 0)</pre>
-
-<p>We issue both loads, do some unrelated computation, and then execute the
-instructions that make use of the loads.  If the integer division takes less
-time than one of the loads, we essentially get it for free, since it happens
-during a period where the CPU would have stalled waiting for a load to
-complete.</p>
-
-<p>Note that <strong>all</strong> of the operations are now happening inside the
-critical section.  Since none of the “threadStruct” operations are visible
-outside the current thread, nothing else can see them until we’re finished here,
-so it doesn’t matter exactly when they happen.</p>
-
-<p>In general, it is always safe to move operations <strong>into</strong> a
-critical section, but never safe to move operations <strong>out of</strong> a
-critical section.  Put another way, you can migrate code “downward” across an
-acquire barrier, and “upward” across a release barrier.  If the atomic ops used
-a full barrier, this sort of migration would not be possible.</p>
-
-<p>Returning to an earlier point, we can state that on x86 all loads are
-acquiring loads, and all stores are releasing stores.  As a result:</p>
-
-<ul>
-<li>Loads may not be reordered with respect to each other.  You can’t take a
-load and move it “upward” across another load’s acquire barrier.</li>
-<li>Stores may not be reordered with respect to each other, because you can’t
-move a store “downward” across another store’s release barrier.</li>
-<li>A load followed by a store can’t be reordered, because neither instruction
-will tolerate it.</li>
-<li>A store followed by a load <strong>can</strong> be reordered, because each
-instruction can move across the other in that direction.</li>
-</ul>
-
-<p>Hence, you only need store/load barriers on x86 SMP.</p>
-
-<p>Labeling atomic operations with “acquire” or “release” describes not only
-whether the barrier is executed before or after the atomic operation, but also
-how the compiler is allowed to reorder code.</p>
-
 <h2 id="practice">Practice</h2>
 
 <p>Debugging memory consistency problems can be very difficult.  If a missing
-memory barrier causes some code to read stale data, you may not be able to
+lock, <code>atomic</code> or <code>volatile</code> declaration causes
+some code to read stale data, you may not be able to
 figure out why by examining memory dumps with a debugger.  By the time you can
-issue a debugger query, the CPU cores will have all observed the full set of
+issue a debugger query, the CPU cores may have all observed the full set of
 accesses, and the contents of memory and the CPU registers will appear to be in
 an “impossible” state.</p>
 
@@ -1059,51 +464,52 @@
 
 <h4 id="volatile">C/C++ and "volatile"</h4>
 
-<p>When writing single-threaded code, declaring a variable “volatile” can be
-very useful.  The compiler will not omit or reorder accesses to volatile
-locations.  Combine that with the sequential consistency provided by the
-hardware, and you’re guaranteed that the loads and stores will appear to happen
-in the expected order.</p>
+<p>C and C++ <code>volatile</code> declarations are a very special purpose tool.
+They prevent <i>the compiler</i> from reordering or removing <i>volatile</i>
+accesses.  This can be helpful for code accessing hardware device registers,
+memory mapped to more than one location, or in connection with
+<code>setjmp</code>.  But C and C++ <code>volatile</code>, unlike Java
+<code>volatile</code>, is not designed for thread communication.</p>
 
-<p>However, accesses to volatile storage may be reordered with non-volatile
-accesses, so you have to be careful in multi-threaded uniprocessor environments
-(explicit compiler reorder barriers may be required).  There are no atomicity
-guarantees, and no memory barrier provisions, so “volatile” doesn’t help you at
-all in multi-threaded SMP environments.  The C and C++ language standards are
-being updated to address this with built-in atomic operations.</p>
+<p>In C and C++, accesses to <code>volatile</code>
+data may be reordered with accessed to non-volatile data, and there are no
+atomicity guarantees.  Thus <code>volatile</code> can't be used for sharing data between
+threads in portable code, even on a uniprocessor. C <code>volatile</code> usually does not
+prevent access reordering by the hardware, so by itself it is even less useful in
+multi-threaded SMP environments.  This is the reason C11 and C++11 support
+<code>atomic</code> objects.  You should use those instead.</p>
 
-<p>If you think you need to declare something “volatile”, that is a strong
-indicator that you should be using one of the atomic operations instead.</p>
+<p>A lot of older C and C++ code still abuses <code>volatile</code> for thread
+communication.  This often works correctly for data that fits
+in a machine register, provided it is used with either explicit fences or in cases
+in which memory ordering is not important. But it is not guaranteed to work
+correctly with future compilers.</p>
+
 
 <h4 id="examplesc">Examples</h4>
 
-<p>In most cases you’d be better off with a synchronization primitive (like a
-pthread mutex) rather than an atomic operation, but we will employ the latter to
-illustrate how they would be used in a practical situation.</p>
+<p>In most cases you’d be better off with a lock (like a
+<code>pthread_mutex_t</code> or C++11 <code>std::mutex</code>) rather than an
+atomic operation, but we will employ the latter to illustrate how they would be
+used in a practical situation.</p>
 
-<p>For the sake of brevity we’re ignoring the effects of compiler optimizations
-here &mdash; some of this code is broken even on uniprocessors &mdash; so for
-all of these examples you must assume that the compiler generates
-straightforward code (for example, compiled with gcc -O0).  The fixes presented here do
-solve both compiler-reordering and memory-access-ordering issues, but we’re only
-going to discuss the latter.</p>
 
-<pre>MyThing* gGlobalThing = NULL;
+<pre>MyThing* gGlobalThing = NULL;  // Wrong!  See below.
 
-void initGlobalThing()    // runs in thread 1
+void initGlobalThing()    // runs in Thread 1
 {
     MyStruct* thing = malloc(sizeof(*thing));
     memset(thing, 0, sizeof(*thing));
-    thing->x = 5;
-    thing->y = 10;
+    thing-&gt;x = 5;
+    thing-&gt;y = 10;
     /* initialization complete, publish */
     gGlobalThing = thing;
 }
 
-void useGlobalThing()    // runs in thread 2
+void useGlobalThing()    // runs in Thread 2
 {
     if (gGlobalThing != NULL) {
-        int i = gGlobalThing->x;    // could be 5, 0, or uninitialized data
+        int i = gGlobalThing-&gt;x;    // could be 5, 0, or uninitialized data
         ...
     }
 }</pre>
@@ -1111,162 +517,81 @@
 <p>The idea here is that we allocate a structure, initialize its fields, and at
 the very end we “publish” it by storing it in a global variable.  At that point,
 any other thread can see it, but that’s fine since it’s fully initialized,
-right?  At least, it would be on x86 SMP or a uniprocessor (again, making the
-erroneous assumption that the compiler outputs code exactly as we have it in the
-source).</p>
+right?</p>
 
-<p>Without a memory barrier, the store to <code>gGlobalThing</code> could be observed before
-the fields are initialized on ARM.  Another thread reading from <code>thing->x</code> could
+<p>The problem is that the store to <code>gGlobalThing</code> could be observed
+before the fields are initialized, typically because either the compiler or the
+processor reordered the stores to <code>gGlobalThing</code> and
+<code>thing-&gt;x</code>.  Another thread reading from <code>thing-&gt;x</code> could
 see 5, 0, or even uninitialized data.</p>
 
-<p>This can be fixed by changing the last assignment to:</p>
+<p>The core problem here is a data race on <code>gGlobalThing</code>.
+If Thread 1 calls <code>initGlobalThing()</code> while Thread 2
+calls <code>useGlobalThing()</code>, <code>gGlobalThing</code> can be
+read while being written.
 
-<pre>    atomic_release_store(&gGlobalThing, thing);</pre>
+<p>This can be fixed by declaring <code>gGlobalThing</code> as
+atomic.  In C++11:</p>
 
-<p>That ensures that all other threads will observe the writes in the proper
-order, but what about reads?  In this case we should be okay on ARM, because the
-address dependency rules will ensure that any loads from an offset of
-<code>gGlobalThing</code> are observed after the load of
-<code>gGlobalThing</code>.  However, it’s unwise to rely on architectural
-details, since it means your code will be very subtly unportable.  The complete
-fix also requires a barrier after the load:</p>
+<pre>atomic&lt;MyThing*&gt; gGlobalThing(NULL);</pre>
 
-<pre>    MyThing* thing = atomic_acquire_load(&gGlobalThing);
-    int i = thing->x;</pre>
-
-<p>Now we know the ordering will be correct.  This may seem like an awkward way
-to write code, and it is, but that’s the price you pay for accessing data
-structures from multiple threads without using locks.  Besides, address
-dependencies won’t always save us:</p>
-
-<pre>MyThing gGlobalThing;
-
-void initGlobalThing()    // runs in thread 1
-{
-    gGlobalThing.x = 5;
-    gGlobalThing.y = 10;
-    /* initialization complete */
-    gGlobalThing.initialized = true;
-}
-
-void useGlobalThing()    // runs in thread 2
-{
-    if (gGlobalThing.initialized) {
-        int i = gGlobalThing.x;    // could be 5 or 0
-    }
-}</pre>
-
-<p>Because there is no relationship between the <code>initialized</code> field and the
-others, the reads and writes can be observed out of order.  (Note global data is
-initialized to zero by the OS, so it shouldn’t be possible to read “random”
-uninitialized data.)</p>
-
-<p>We need to replace the store with:</p>
-<pre>    atomic_release_store(&gGlobalThing.initialized, true);</pre>
-
-<p>and replace the load with:</p>
-<pre>    int initialized = atomic_acquire_load(&gGlobalThing.initialized);</pre>
-
-<p>Another example of the same problem occurs when implementing
-reference-counted data structures.  The reference count itself will be
-consistent so long as atomic increment and decrement operations are used, but
-you can still run into trouble at the edges, for example:</p>
-
-<pre>void RefCounted::release()
-{
-    int oldCount = atomic_dec(&mRefCount);
-    if (oldCount == 1) {    // was decremented to zero
-        recycleStorage();
-    }
-}
-
-void useSharedThing(RefCountedThing sharedThing)
-{
-    int localVar = sharedThing->x;
-    sharedThing->release();
-    sharedThing = NULL;    // can’t use this pointer any more
-    doStuff(localVar);    // value of localVar might be wrong
-}</pre>
-
-<p>The <code>release()</code> call decrements the reference count using a
-barrier-free atomic decrement operation.  Because this is an atomic RMW
-operation, we know that it will work correctly.  If the reference count goes to
-zero, we recycle the storage.</p>
-
-<p>The <code>useSharedThing()</code> function extracts what it needs from
-<code>sharedThing</code> and then releases its copy.  However, because we didn’t
-use a memory barrier, and atomic and non-atomic operations can be reordered,
-it’s possible for other threads to observe the read of
-<code>sharedThing->x</code> <strong>after</strong> they observe the recycle
-operation.  It’s therefore possible for <code>localVar</code> to hold a value
-from "recycled" memory, for example a new object created in the same
-location by another thread after <code>release()</code> is called.</p>
-
-<p>This can be fixed by replacing the call to <code>atomic_dec()</code> with
-<code>atomic_release_dec()</code>. The barrier ensures that the reads from
-<code>sharedThing</code> are observed before we recycle the object.</p>
-
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #ccc;padding:0 1em;font-style:italic;">
-<p>In most cases the above won’t actually fail, because the “recycle” function
-is likely guarded by functions that themselves employ barriers (libc heap
-<code>free()</code>/<code>delete()</code>, or an object pool guarded by a
-mutex).  If the recycle function used a lock-free algorithm implemented without
-barriers, however, the above code could fail on ARM SMP.</p>
-</div>
-</div>
+<p>This ensures that the writes will become visible to other threads
+in the proper order.  It also guarantees to prevent some other failure
+modes that are otherwise allowed, but unlikely to occur on real
+Android hardware.  For example, it ensures that we cannot see a
+<code>gGlobalThing</code> pointer that has only been partially written.</p>
 
 <h3 id="j_dont">What not to do in Java</h3>
 
 <p>We haven’t discussed some relevant Java language features, so we’ll take a
 quick look at those first.</p>
 
+<p>Java technically does not require code to be data-race-free.  And there
+is a small amount of very-carefully-written Java code that works correctly
+in the presence of data races.  However, writing such code is extremely
+tricky, and we discuss it only briefly below.  To make matters
+worse, the experts who specified the meaning of such code no longer believe the
+specification is correct.  (The specification is fine for data-race-free
+code.)
+
+<p>For now we will adhere to the data-race-free model, for which Java provides
+essentially the same guarantees as C and C++.  Again, the language provides
+some primitives that explicitly relax sequential consistency, notably the
+<code>lazySet()</code> and <code>weakCompareAndSet()</code> calls
+in <code>java.util.concurrent.atomic</code>.
+As with C and C++, we will ignore these for now.
+
 <h4 id="sync_volatile">Java's "synchronized" and "volatile" keywords</h4>
 
 <p>The “synchronized” keyword provides the Java language’s in-built locking
 mechanism.  Every object has an associated “monitor” that can be used to provide
-mutually exclusive access.</p>
+mutually exclusive access. If two threads try to "synchronize" on the
+same object, one of them will wait until the other completes.</p>
 
-<p>The implementation of the “synchronized” block has the same basic structure
-as the spin lock example: it begins with an acquiring CAS, and ends with a
-releasing store.  This means that compilers and code optimizers are free to
-migrate code into a “synchronized” block.  One practical consequence: you must
-<strong>not</strong> conclude that code inside a synchronized block happens
-after the stuff above it or before the stuff below it in a function.  Going
-further, if a method has two synchronized blocks that lock the same object, and
-there are no operations in the intervening code that are observable by another
-thread, the compiler may perform “lock coarsening” and combine them into a
-single block.</p>
+<p>As we mentioned above, Java's <code>volatile T</code> is the analog of
+C++11's <code>atomic&lt;T&gt;</code>.  Concurrent accesses to
+<code>volatile</code> fields are allowed, and don't result in data races.
+Ignoring <code>lazySet()</code> et al. and data races, it is the Java VM's job to
+make sure that the result still appears sequentially consistent.
 
-<p>The other relevant keyword is “volatile”.  As defined in the specification
-for Java 1.4 and earlier, a volatile declaration was about as weak as its C
-counterpart.  The spec for Java 1.5 was updated to provide stronger guarantees,
-almost to the level of monitor synchronization.</p>
+<p>In particular, if thread 1 writes to a <code>volatile</code> field, and
+thread 2 subsequently reads from that same field and sees the newly written
+value, then thread 2 is also guaranteed to see all writes previously made by
+thread 1.  In terms of memory effect, writing to
+a volatile is analogous to a monitor release, and
+reading from a volatile is like a monitor acquire.</p>
 
-<p>The effects of volatile accesses can be illustrated with an example.  If
-thread 1 writes to a volatile field, and thread 2 subsequently reads from that
-same field, then thread 2 is guaranteed to see that write and all writes
-previously made by thread 1.  More generally, the writes made by
-<strong>any</strong> thread up to the point where it writes the field will be
-visible to thead 2 when it does the read.  In effect, writing to a volatile is
-like a monitor release, and reading from a volatile is like a monitor
-acquire.</p>
-
-<p>Non-volatile accesses may be reorded with respect to volatile accesses in the
-usual ways, for example the compiler could move a non-volatile load or store “above” a
-volatile store, but couldn’t move it “below”.  Volatile accesses may not be
-reordered with respect to each other.  The VM takes care of issuing the
-appropriate memory barriers.</p>
-
-<p>It should be mentioned that, while loads and stores of object references and
-most primitive types are atomic, <code>long</code> and <code>double</code>
-fields are not accessed atomically unless they are marked as volatile.
-Multi-threaded updates to non-volatile 64-bit fields are problematic even on
-uniprocessors.</p>
+<p>There is one notable difference from C++'s <code>atomic</code>:
+If we write <code>volatile int x;</code>
+in Java, then <code>x++</code> is the same as <code>x = x + 1</code>; it
+performs an atomic load, increments the result, and then performs an atomic
+store.  Unlike C++, the increment as a whole is not atomic.
+Atomic increment operations are instead provided by
+the <code>java.util.concurrent.atomic</code>.</p>
 
 <h4 id="examplesj">Examples</h4>
 
-<p>Here’s a simple, incorrect implementation of a monotonic counter: <span
+<p>Here’s a simple, <em>incorrect</em> implementation of a monotonic counter: <span
 style="font-size:.9em;color:#777">(<em><a href="#more" style="color:#777">Java
 theory and practice: Managing volatility</a></em>)</span>.</p>
 
@@ -1294,23 +619,29 @@
 
 <p>If two threads execute in <code>incr()</code> simultaneously, one of the
 updates could be lost.  To make the increment atomic, we need to declare
-<code>incr()</code> “synchronized”.  With this change, the code will run
-correctly in multi-threaded uniprocessor environments.</p>
+<code>incr()</code> “synchronized”.</p>
 
-<p>It’s still broken on SMP, however.  Different threads might see different
-results from <code>get()</code>, because we’re reading the value with an ordinary load.  We
-can correct the problem by declaring <code>get()</code> to be synchronized.
-With this change, the code is obviously correct.</p>
+<p>It’s still broken however, especially on SMP.  There is still a data race,
+in that <code>get()</code> can access <code>mValue</code> concurrently with
+<code>incr()</code>.  Under Java rules, the <code>get()</code> call can be
+appear to be reordered with respect to other code.  For example, if we read two
+counters in a row, the results might appear to be inconsistent
+because the  <code>get()</code> calls we reordered, either by the hardware or
+compiler. We can correct the problem by declaring <code>get()</code> to be
+synchronized.  With this change, the code is obviously correct.</p>
 
 <p>Unfortunately, we’ve introduced the possibility of lock contention, which
 could hamper performance.  Instead of declaring <code>get()</code> to be
 synchronized, we could declare <code>mValue</code> with “volatile”.  (Note
-<code>incr()</code> must still use <code>synchronize</code>.)  Now we know that
-the volatile write to <code>mValue</code> will be visible to any subsequent volatile read of
-<code>mValue</code>. <code>incr()</code> will be slightly slower, but
+<code>incr()</code> must still use <code>synchronize</code> since
+<code>mValue++</code> is otherwise not a single atomic operation.)
+This also avoids all data races, so sequential consistency is preserved.
+<code>incr()</code> will be somewhat slower, since it incurs both monitor entry/exit
+overhead, and the overhead associated with a volatile store, but
 <code>get()</code> will be faster, so even in the absence of contention this is
-a win if reads outnumber writes. (See also {@link
-java.util.concurrent.atomic.AtomicInteger}.)</p>
+a win if reads greatly outnumber writes. (See also {@link
+java.util.concurrent.atomic.AtomicInteger} for a way to completely
+remove the synchronized block.)</p>
 
 <p>Here’s another example, similar in form to the earlier C examples:</p>
 
@@ -1335,19 +666,21 @@
     }
 }</pre>
 
-<p>This has the same problem as the C code, namely that the assignment
+<p>This has the same problem as the C code, namely that there is
+a data race on <code>sGoodies</code>.  Thus the assignment
 <code>sGoodies = goods</code> might be observed before the initialization of the
 fields in <code>goods</code>.  If you declare <code>sGoodies</code> with the
-volatile keyword, you can think about the loads as if they were
-<code>atomic_acquire_load()</code> calls, and the stores as if they were
-<code>atomic_release_store()</code> calls.</p>
+<code>volatile</code> keyword, sequential consistency is restored, and things will work
+as expected.
 
-<p>(Note that only the <code>sGoodies</code> reference itself is volatile.  The
-accesses to the fields inside it are not.  The statement <code>z =
+<p>Note that only the <code>sGoodies</code> reference itself is volatile.  The
+accesses to the fields inside it are not.  Once <code>sGoodies</code> is
+<code>volatile</code>, and memory ordering is properly preserved, the fields
+cannot be concurrently accessed. The statement <code>z =
 sGoodies.x</code> will perform a volatile load of <code>MyClass.sGoodies</code>
 followed by a non-volatile load of <code>sGoodies.x</code>.  If you make a local
-reference <code>MyGoodies localGoods = sGoodies</code>, <code>z =
-localGoods.x</code> will not perform any volatile loads.)</p>
+reference <code>MyGoodies localGoods = sGoodies</code>, then a subsequent <code>z =
+localGoods.x</code> will not perform any volatile loads.</p>
 
 <p>A more common idiom in Java programming is the infamous “double-checked
 locking”:</p>
@@ -1375,41 +708,35 @@
 the “synchronized” block on every call, so we only do that part if
 <code>helper</code> is currently null.</p>
 
-<p>This doesn’t work correctly on uniprocessor systems, unless you’re using a
-traditional Java source compiler and an interpreter-only VM.  Once you add fancy
-code optimizers and JIT compilers it breaks down.  See the “‘Double Checked
-Locking is Broken’ Declaration” link in the appendix for more details, or Item
-71 (“Use lazy initialization judiciously”) in Josh Bloch’s <em>Effective Java,
-2nd Edition.</em>.</p>
+<p>This has a data race on the <code>helper</code> field.  It can be
+set concurrently with the <code>helper == null</code> in another thread.
+</p>
 
-<p>Running this on an SMP system introduces an additional way to fail.  Consider
+<p>To see how this can fail, consider
 the same code rewritten slightly, as if it were compiled into a C-like language
 (I’ve added a couple of integer fields to represent <code>Helper’s</code>
 constructor activity):</p>
 
 <pre>if (helper == null) {
-    // acquire monitor using spinlock
-    while (atomic_acquire_cas(&this.lock, 0, 1) != success)
-        ;
-    if (helper == null) {
-        newHelper = malloc(sizeof(Helper));
-        newHelper->x = 5;
-        newHelper->y = 10;
-        helper = newHelper;
+    synchronized() {
+        if (helper == null) {
+            newHelper = malloc(sizeof(Helper));
+            newHelper-&gt;x = 5;
+            newHelper-&gt;y = 10;
+            helper = newHelper;
+        }
     }
-    atomic_release_store(&this.lock, 0);
+    return helper;
 }</pre>
 
-<p>Now the problem should be obvious: the store to <code>helper</code> is
-happening before the memory barrier, which means another thread could observe
-the non-null value of <code>helper</code> before the stores to the
-<code>x</code>/<code>y</code> fields.</p>
-
-<p>You could try to ensure that the store to <code>helper</code> happens after
-the <code>atomic_release_store()</code> on <code>this.lock</code> by rearranging
-the code, but that won’t help, because it’s okay to migrate code upward &mdash;
-the compiler could move the assignment back above the
-<code>atomic_release_store()</code> to its original position.</p>
+<p>There is nothing to prevent either the hardware or the compiler
+from reordering the store to <code>helper</code> with those to the
+<code>x</code>/<code>y</code> fields.  Another thread could find
+<code>helper</code> non-null but its fields not yet set and ready to use.
+For more details and more failure modes, see the “‘Double Checked
+Locking is Broken’ Declaration” link in the appendix for more details, or Item
+71 (“Use lazy initialization judiciously”) in Josh Bloch’s <em>Effective Java,
+2nd Edition.</em>.</p>
 
 <p>There are two ways to fix this:</p>
 <ol>
@@ -1420,125 +747,378 @@
 a minute to convince yourself that this is true.)</li>
 </ol>
 
-<p>This next example illustrates two important issues when using volatile:</p>
+<p>Here is another illustration of <code>volatile</code> behavior:</p>
 
 <pre>class MyClass {
     int data1, data2;
     volatile int vol1, vol2;
 
-    void setValues() {    // runs in thread 1
+    void setValues() {    // runs in Thread 1
         data1 = 1;
         vol1 = 2;
         data2 = 3;
     }
 
-    void useValues1() {    // runs in thread 2
+    void useValues() {    // runs in Thread 2
         if (vol1 == 2) {
             int l1 = data1;    // okay
             int l2 = data2;    // wrong
         }
     }
-    void useValues2() {    // runs in thread 2
-        int dummy = vol2;
-        int l1 = data1;    // wrong
-        int l2 = data2;    // wrong
-    }</pre>
+}</pre>
 
-<p>Looking at <code>useValues1()</code>, if thread 2 hasn’t yet observed the
+<p>Looking at <code>useValues()</code>, if Thread 2 hasn’t yet observed the
 update to <code>vol1</code>, then it can’t know if <code>data1</code> or
 <code>data2</code> has been set yet.  Once it sees the update to
-<code>vol1</code>, it knows that the change to <code>data1</code> is also
-visible, because that was made before <code>vol1</code> was changed.  However,
+<code>vol1</code>, it knows that <code>data1</code> can be safely accessed
+and correctly read without introducing a data race.  However,
 it can’t make any assumptions about <code>data2</code>, because that store was
 performed after the volatile store.</p>
 
-<P>The code in <code>useValues2()</code> uses a second volatile field,
-<code>vol2</code>, in an attempt to force the VM to generate a memory barrier.
-This doesn’t generally work.  To establish a proper “happens-before”
-relationship, both threads need to be interacting with the same volatile field.
-You’d have to know that <code>vol2</code> was set after <code>data1/data2</code>
-in thread 1.  (The fact that this doesn’t work is probably obvious from looking
-at the code; the caution here is against trying to cleverly “cause” a memory
-barrier instead of creating an ordered series of accesses.)</p>
+<p>Note that <code>volatile</code> cannot be used to prevent reordering
+of other memory accesses that race with each other.  It is not guaranteed to
+generate a machine memory fence instruction.  It can be used to prevent
+data races by executing code only when another thread has satisfied a
+certain condition.
 
 <h3 id="bestpractice">What to do</h3>
 
-<h4 id="advice">General advice</h4>
+<p>In C/C++, prefer C++11
+synchronization classes, such as <code>std::mutex</code>.  If not, use
+the corresponding <code>pthread</code> operations.
+These include the proper memory fences, providing correct (sequentially consistent
+unless otherwise specified)
+and efficient behavior on all Android platform versions.  Be sure to use them
+correctly. For example, remember that condition variable waits may spuriously
+return without being signaled, and should thus appear in a loop.</p>
 
-<p>In C/C++, use the <code>pthread</code> operations, like mutexes and
-semaphores.  These include the proper memory barriers, providing correct and
-efficient behavior on all Android platform versions.  Be sure to use them
-correctly, for example be wary of signaling a condition variable without holding the
-corresponding mutex.</p>
-
-<p>It's best to avoid using atomic functions directly. Locking and
-unlocking a pthread mutex require a single atomic operation each if there’s no
+<p>It's best to avoid using atomic functions directly, unless the data structure
+you are implementing is extremely simple, like a counter. Locking and
+unlocking a pthread mutex require a single atomic operation each,
+and often cost less than a single cache miss, if there’s no
 contention, so you’re not going to save much by replacing mutex calls with
-atomic ops.  If you need a lock-free design, you must fully understand the
-concepts in this entire document before you begin (or, better yet, find an
-existing code library that is known to be correct on SMP ARM).</p>
+atomic ops.  Lock-free designs for non-trivial data structures require
+much more care to ensure that higher level operations on the data structure
+appear atomic (as a whole, not just their explicitly atomic pieces).</p>
 
-<p>Be extremely circumspect with "volatile” in C/C++.  It often indicates a
-concurrency problem waiting to happen.</p>
+<p>If you do use atomic operations, relaxing ordering with
+<code>memory_order</code>... or <code>lazySet()</code> may provide performance
+advantages, but requires deeper understanding than we have conveyed so far.
+A large fraction of existing code using
+these is discovered to have bugs after the fact.  Avoid these if possible.
+If your use cases doesn't exactly fit one of those in the next section,
+make sure you either are an expert, or have consulted one.
 
-<p>In Java, the best answer is usually to use an appropriate utility class from
+<p>Avoid using <code>volatile</code> for thread communication in C/C++.</p>
+
+<p>In Java, concurrency problems are often best solved by
+using an appropriate utility class from
 the {@link java.util.concurrent} package.  The code is well written and well
 tested on SMP.</p>
 
-<p>Perhaps the safest thing you can do is make your class immutable.  Objects
-from classes like String and Integer hold data that cannot be changed once the
-class is created, avoiding all synchronization issues.  The book <em>Effective
+<p>Perhaps the safest thing you can do is make your objects immutable. Objects
+from classes like Java's String and Integer hold data that cannot be changed once an
+object is created, avoiding all potential for data races on those objects.
+The book <em>Effective
 Java, 2nd Ed.</em> has specific instructions in “Item 15: Minimize Mutability”. Note in
-particular the importance of declaring fields “final" <span
+particular the importance of declaring Java fields “final" <span
 style="font-size:.9em;color:#777">(<a href="#more" style="color:#777">Bloch</a>)</span>.</p>
 
-<p>If neither of these options is viable, the Java “synchronized” statement
-should be used to guard any field that can be accessed by more than one thread.
-If mutexes won’t work for your situation, you should declare shared fields
-“volatile”, but you must take great care to understand the interactions between
-threads.  The volatile declaration won’t save you from common concurrent
-programming mistakes, but it will help you avoid the mysterious failures
-associated with optimizing compilers and SMP mishaps.</p>
+<p>Even if an object is immutable, remember that communicating it to another
+thread without any kind of synchronization is a data race. This can occasionally
+be acceptable in Java (see below), but requires great care, and is likely to result in
+brittle code.  If it's not extremely performance critical, add a
+<code>volatile</code> declaration.  In C++, communicating a pointer or
+reference to an immutable object without proper synchronization,
+like any data race, is a bug.
+In this case, it is reasonably likely to result in intermittent crashes since,
+for example, the receiving thread may see an uninitialized method table
+pointer due to store reordering.</p>
 
-<p>The Java Memory Model guarantees that assignments to final fields are visible
-to all threads once the constructor has finished &mdash; this is what ensures
-proper synchronization of fields in immutable classes.  This guarantee does not
-hold if a partially-constructed object is allowed to become visible to other
-threads.  It is necessary to follow safe construction practices.<span
-style="font-size:.9em;color:#777">(<a href="#more" style="color:#777">Safe
-Construction Techniques in Java</a>)</span>.</p>
+<p>If neither an existing library class, nor an immutable class is
+appropriate, the Java <code>synchronized</code> statement or C++
+<code>lock_guard</code> / <code>unique_lock</code> should be used to guard
+accesses to any field that can be accessed by more than one thread.  If mutexes won’t
+work for your situation, you should declare shared fields
+<code>volatile</code> or <code>atomic</code>, but you must take great care to
+understand the interactions between threads.  These declarations won’t
+save you from common concurrent programming mistakes, but they will help you
+avoid the mysterious failures associated with optimizing compilers and SMP
+mishaps.</p>
 
-<h4 id="sync_guarantees">Synchronization primitive guarantees</h4>
+<p>You should avoid
+"publishing" a reference to an object, i.e. making it available to other
+threads, in its constructor.  This is less critical in C++ or if you stick to
+our "no data races" advice in Java.  But it's always good advice, and becomes
+critical if your Java code is
+run in other contexts in which the Java security model matters, and untrusted
+code may introduce a data race by accessing that "leaked" object reference.
+It's also critical if you choose to ignore our warnings and use some of the techniques
+in the next section.
+See <span style="font-size:.9em;color:#777">(<a href="#more"
+style="color:#777">Safe Construction Techniques in Java</a>)</span> for
+details</p>
 
-<p>The pthread library and VM make a couple of useful guarantees: all accesses
-previously performed by a thread that creates a new thread are observable by
-that new thread as soon as it starts, and all accesses performed by a thread
-that is exiting are observable when a <code>join()</code> on that thread
-returns.  This means you don’t need any additional synchronization when
-preparing data for a new thread or examining the results of a joined thread.</p>
+<h2 id="weak">A little more about weak memory orders</h2>
 
-<p>Whether or not these guarantees apply to interactions with pooled threads
-depends on the thread pool implementation.</p>
+<p>C++11 and later provide explicit mechanisms for relaxing the sequential
+consistency guarantees for data-race-free programs.  Explicit
+<code>memory_order_relaxed</code>, <code>memory_order_acquire</code> (loads
+only), and <code>memory_order_release</code>(stores only) arguments for atomic
+operations each provide strictly weaker guarantees than the default, typically
+implicit, <code>memory_order_seq_cst</code>.  <code>memory_order_acq_rel</code>
+provides both  <code>memory_order_acquire</code> and
+<code>memory_order_release</code> guarantees for atomic read-modify write
+operations.  <code>memory_order_consume</code> is not yet sufficiently
+well specified or implemented to be useful, and should be ignored for now.</p>
 
-<p>In C/C++, the pthread library guarantees that any accesses made by a thread
-before it unlocks a mutex will be observable by another thread after it locks
-that same mutex.  It also guarantees that any accesses made before calling
-<code>signal()</code> or <code>broadcast()</code> on a condition variable will
-be observable by the woken thread.</p>
+<p>The  <code>lazySet</code> methods in <code>Java.util.concurrent.atomic</code>
+are similar to C++ <code>memory_order_release</code> stores.  Java's
+ordinary variables are sometimes used as a replacement for
+<code>memory_order_relaxed</code> accesses, though they are actually
+even weaker.  Unlike C++, there is no real mechanism for unordered
+accesses to variables that are declared as <code>volatile</code>.</p>
 
-<p>Java language threads and monitors make similar guarantees for the comparable
-operations.</p>
+<p>You should generally avoid these unless there are pressing performance reasons to
+use them.  On weakly ordered machine architectures like ARM, using them will
+commonly save on the order of a few dozen machine cycles for each atomic operation.
+On x86, the performance win is limited to stores, and likely to be less
+noticeable.
+Somewhat counter-intuitively, the benefit may decrease with larger core counts,
+as the memory system becomes more of a limiting factor.</p>
 
-<h4 id="ccpp_changes">Upcoming changes to C/C++</h4>
+<p>The full semantics of weakly ordered atomics are complicated.
+In general they require
+precise understanding of the language rules, which we will
+not go into here.  For example:
 
-<p>The C and C++ language standards are evolving to include a sophisticated
-collection of atomic operations.  A full matrix of calls for common data types
-is defined, with selectable memory barrier semantics (choose from relaxed,
-consume, acquire, release, acq_rel, seq_cst).</p>
+<ul>
+<li> The compiler or hardware can move <code>memory_order_relaxed</code>
+accesses into (but not out of) a critical section bounded by a lock
+acquisition and release.  This means that two
+<code>memory_order_relaxed</code> stores may become visible out of order,
+even if they are separated by a critical section.
+<li> An ordinary Java variable, when abused as a shared counter, may appear
+to another thread to decrease, even though it is only incremented by a single
+other thread.  But this is not true for C++ atomic
+<code>memory_order_relaxed</code>.
+</ul>
 
-<p>See the <a href="#more">Further Reading</a> section for pointers to the
-specifications.</p>
+With that as a warning,
+here we give a small number of idioms that seem to cover many of the use
+cases for weakly ordered atomics.  Many of these are applicable only to C++:
 
+<h3 id="nonracing">Non-racing accesses</h3>
+<p>It is fairly common that a variable is atomic because it is <em>sometimes</em>
+read concurrently with a write, but not all accesses have this issue.
+For example a variable
+may need to be atomic because it is read outside a critical section, but all
+updates are protected by a lock.  In that case, a read that happens to be
+protected by the same lock
+cannot race, since there cannot be concurrent writes.  In such a case, the
+non-racing access (load in this case), can be annotated with
+<code>memory_order_relaxed</code> without changing the correctness of C++ code.
+The lock implementation already enforces the required memory ordering
+with respect to access by other threads, and <code>memory_order_relaxed</code>
+specifies that essentially no additional ordering constraints need to be
+enforced for the atomic access.</p>
+
+<p>There is no real analog to this in Java.</p>
+
+<h3 id="hint_only">Result is not relied upon for correctness</h3>
+
+<p>When we use a racing load only to generate a hint, it's generally also OK
+to not enforce any memory ordering for the load.  If the value is
+not reliable, we also can't reliably use the result to infer anything about
+other variables. Thus it's OK
+if memory ordering is not guaranteed, and the load is
+supplied with a <code>memory_order_relaxed</code> argument.</p>
+
+<p>A common
+instance of this is the use of C++ <code>compare_exchange</code>
+to atomically replace <code>x</code> by <code>f(x)</code>.
+The initial load of <code>x</code> to compute <code>f(x)</code>
+does not need to be reliable.  If we get it wrong, the
+<code>compare_exchange</code> will fail and we will retry.
+It is fine for the initial load of <code>x</code> to use
+a <code>memory_order_relaxed</code> argument; only memory ordering
+for the actual <code>compare_exchange</code> matters.</p>
+
+<h3 id="unread">Atomically modified but unread data</h3>
+
+<p>Occasionally data is modified in parallel by multiple threads, but
+not examined until the parallel computation is complete.  A good
+example of this is a counter that is atomically incremented (e.g.
+using <code>fetch_add()</code> in C++ or
+<code>atomic_fetch_add_explicit()</code>
+in C) by multiple threads in parallel, but the result of these calls
+is always ignored.  The resulting value is only read at the end,
+after all updates are complete.</p>
+
+<p>In this case, there is no way to tell whether accesses to this data
+was reordered, and hence C++ code may use a <code>memory_order_relaxed</code>
+argument.<p>
+
+<p>Simple event counters are a common example of this. Since it is
+so common, it is worth making some observations about this case:</p>
+
+<ul>
+<li> Use of <code>memory_order_relaxed</code> improves performance,
+but may not address the most important performance issue: Every update
+requires exclusive access to the cache line holding the counter.  This
+results in a cache miss every time a new thread accesses the counter.
+If updates are frequent and alternate between threads, it is much faster
+to avoid updating the shared counter every time by,
+for example, using thread-local counters and summing them at the end.
+<li> This technique is combinable with the previous section: It is possible to
+concurrently read approximate and unreliable values while they are being updated,
+with all operations using <code>memory_order_relaxed</code>.
+But it is important to treat the resulting values as completely unreliable.
+Just because the count appears to have been incremented once does not
+mean another thread can be counted on to have reached the point
+at which the increment has been performed.  The increment may instead have
+been reordered with earlier code.  (As for the similar case we mentioned
+earlier, C++ does guarantee that a second load of such a counter will not
+return a value less than an earlier load in the same thread.  Unless of
+course the counter overflowed.)
+<li> It is common to find code that tries to compute approximate
+counter values by performing individual atomic (or not) reads and writes, but
+not making the increment as a whole atomic.  The usual argument is that
+this is "close enough" for performance counters or the like.
+It's typically not.
+When updates are sufficiently frequent (a case
+you probably care about), a large fraction of the counts are typically
+lost.  On a quad core device, more than half the counts may commonly be lost.
+(Easy exercise: construct a two thread scenario in which the counter is
+updated a million times, but the final counter value is one.)
+</ul>
+
+<h3 id="flag">Simple Flag communication</h3>
+
+<p>A <code>memory_order_release</code> store (or read-modify-write operation)
+ensures that if subsequently a <code>memory_order_acquire</code> load
+(or read-modify-write operation) reads the written value, then it will
+also observe any stores (ordinary or atomic) that preceded the
+A <code>memory_order_release</code> store.  Conversely, any loads
+preceding the <code>memory_order_release</code> will not observe any
+stores that followed the <code>memory_order_acquire</code> load.
+Unlike <code>memory_order_relaxed</code>, this allows such atomic operations
+to be used to communicate the progress of one thread to another.</p>
+
+<p>For example, we can rewrite the double-checked locking example
+from above in C++ as</p>
+
+<pre>
+class MyClass {
+  private:
+    atomic&lt;Helper*&gt; helper {nullptr};
+    mutex mtx;
+
+  public:
+    Helper* getHelper() {
+      Helper* myHelper = helper.load(memory_order_acquire);
+      if (myHelper == nullptr) {
+        lock_guard&lt;mutex&gt; lg(mtx);
+        myHelper = helper.load(memory_order_relaxed);
+        if (myHelper == nullptr) {
+          myHelper = new Helper();
+          helper.store(myHelper, memory_order_release);
+        }
+      }
+      return myHelper;
+    }
+};
+</pre>
+
+<p>The acquire load and release store ensure that if we see a non-null
+<code>helper</code>, then we will also see its fields correctly initialized.
+We've also incorporated the prior observation that non-racing loads
+can use <code>memory_order_relaxed</code>.</p>
+
+<p>A Java programmer could conceivably represent <code>helper</code> as a
+<code>java.util.concurrent.atomic.AtomicReference&lt;Helper&gt;</code>
+and use <code>lazySet()</code> as the release store.  The load
+operations would continue to use plain <code>get()</code> calls.</p>
+
+<p>In both cases, our performance tweaking concentrated on the initialization
+path, which is unlikely to be performance critical.
+A more readable compromise might be:</p>
+
+<pre>
+    Helper* getHelper() {
+      Helper* myHelper = helper.load(memory_order_acquire);
+      if (myHelper != nullptr) {
+        return myHelper;
+      }
+      lock_guard&ltmutex&gt; lg(mtx);
+      if (helper == nullptr) {
+        helper = new Helper();
+      }
+      return helper;
+    }
+</pre>
+
+<p>This provides the same fast path, but resorts to default,
+sequentially-consistent, operations on the non-performance-critical slow
+path.</p>
+
+<p>Even here, <code>helper.load(memory_order_acquire)</code> is
+likely to generate the same code on current Android-supported
+architectures as a plain (sequentially-consistent) reference to
+<code>helper</code>.  Really the most beneficial optimization here
+may be the introduction of <code>myHelper</code> to eliminate a
+second load, though a future compiler might do that automatically.</p>
+
+<p>Acquire/release ordering does not prevent stores from getting visibly
+delayed, and does not ensure that stores become visible to other threads
+in a consistent order.  As a result, it does not support a tricky,
+but fairly common coding pattern exemplified by Dekker's mutual exclusion
+algorithm: All threads first set a flag indicating that they want to do
+something; if a thread <i>t</i> then notices that no other thread is
+trying to do something, it can safely proceed, knowing that there
+will be no interference.  No other thread will be
+able to proceed, since <i>t</i>'s flag is still set.  This fails
+if the flag is accessed using acquire/release ordering, since that doesn't
+prevent making a thread's flag visible to others late, after they have
+erroneously proceeded.  Default <code>memory_order_seq_cst</code>
+does prevent it.</p>
+
+<h3 id="immutable">Immutable fields</h3>
+
+<p> If an object field is initialized on first use and then never changed,
+it may be possible to initialize and subsequently read it using weakly
+ordered accesses.  In C++, it could be declared as <code>atomic</code>
+and accessed using <code>memory_order_relaxed</code> or in Java, it
+could be declared without <code>volatile</code> and accessed without
+special measures.  This requires that all of the following hold:</p>
+
+<ul>
+<li>It should be possible to tell from the value of the field itself
+whether it has already been initialized.  To access the field,
+the fast path test-and-return value should read the field only once.
+In Java the latter is essential.  Even if the field tests as initialized,
+a second load may read the earlier uninitialized value.  In C++
+the "read once" rule is merely good practice.
+<li>Both initialization and subsequent loads must be atomic,
+in that partial updates should not be visible.  For Java, the field
+should not be a <code>long</code> or <code>double</code>.  For C++,
+an atomic assignment is required; constructing it in place will not work, since
+construction of an <code>atomic</code> is not atomic.
+<li>Repeated initializations must be safe, since multiple threads
+may read the uninitialized value concurrently.  In C++, this generally
+follows from the "trivially copyable" requirement imposed for all
+atomic types; types with nested owned pointers would require
+deallocation in the
+copy constructor, and would not be trivially copyable.  For Java,
+certain reference types are acceptable:
+<li>Java references are limited to immutable types containing only final
+fields.  The constructor of the immutable type should not publish
+a reference to the object.  In this case the Java final field rules
+ensure that if a reader sees the reference, it will also see the
+initialized final fields.  C++ has no analog to these rules and
+pointers to owned objects are unacceptable for this reason as well (in
+addition to violating the "trivially copyable" requirements).
+</ul>
 
 <h2 id="closing_notes">Closing Notes</h2>
 
@@ -1547,10 +1127,18 @@
 areas for further exploration:</p>
 
 <ul>
-<li>Learn the definitions of <em>happens-before</em>,
-<em>synchronizes-with</em>, and other essential concepts from the Java Memory
-Model.  (It’s hard to understand what “volatile” really means without getting
-into this.)</li>
+<li>The actual Java and C++ memory models are expressed in terms of a
+<em>happens-before</em> relation that specifies when two actions are guaranteed
+to occur in a certain order.  When we defined a data race, we informally
+talked about two memory accesses happening "simultaneously".
+Officially this is defined as neither one happening before the other.
+It is instructive to learn the actual definitions of <em>happens-before</em>
+and <em>synchronizes-with</em> in the Java or C++ Memory Model.
+Although the intuitive notion of "simultaneously" is generally good
+enough, these definitions are instructive, particularly if you
+are contemplating using weakly ordered atomic operations in C++.
+(The current Java specification only defines <code>lazySet()</code>
+very informally.)</li>
 <li>Explore what compilers are and aren’t allowed to do when reordering code.
 (The JSR-133 spec has some great examples of legal transformations that lead to
 unexpected results.)</li>
@@ -1559,9 +1147,6 @@
 <li>Internalize the recommendations in the Concurrency section of <em>Effective
 Java, 2nd Edition.</em> (For example, you should avoid calling methods that are
 meant to be overridden while inside a synchronized block.)</li>
-<li>Understand what sorts of barriers you can use on x86 and ARM.  (And other
-CPUs for that matter, for example Itanium’s acquire/release instruction
-modifiers.)</li>
 <li>Read through the {@link java.util.concurrent} and {@link
 java.util.concurrent.atomic} APIs to see what's available.  Consider using
 concurrency annotations like <code>@ThreadSafe</code> and
@@ -1573,155 +1158,66 @@
 
 <h2 id="appendix">Appendix</h2>
 
-<h3 id="smp_failure_example">SMP failure example</h3>
-
-<p>This document describes a lot of “weird” things that can, in theory, happen.
-If you’re not convinced that these issues are real, a practical example may be
-useful.</p>
-
-<p>Bill Pugh’s Java memory model <a
-href="http://www.cs.umd.edu/~pugh/java/memoryModel/">web site</a> has a few
-test programs on it.  One interesting test is ReadAfterWrite.java, which does
-the following:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>for (int i = 0; i < ITERATIONS; i++) {<br />
-&nbsp;&nbsp;&nbsp;&nbsp;a = i;<br />
-&nbsp;&nbsp;&nbsp;&nbsp;BB[i] = b;<br />
-}</code></td>
-<td><code>for (int i = 0; i < ITERATIONS; i++) {<br />
-&nbsp;&nbsp;&nbsp;&nbsp;b = i;<br />
-&nbsp;&nbsp;&nbsp;&nbsp;AA[i] = a;<br />
-}</code></td>
-</tr>
-</table>
-
-<p>Where <code>a</code> and <code>b</code> are declared as volatile
-<code>int</code> fields, and <code>AA</code> and <code>BB</code> are ordinary
-integer arrays.
-
-<p>This is trying to determine if the VM ensures that, after a value is written
-to a volatile, the next read from that volatile sees the new value.  The test
-code executes these loops a million or so times, and then runs through afterward
-and searches the results for inconsistencies.</p>
-
-<p>At the end of execution,<code>AA</code> and <code>BB</code> will be full of
-gradually-increasing integers.  The threads will not run side-by-side in a
-predictable way, but we can assert a relationship between the array contents.
-For example, consider this execution fragment:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>(initially a == 1534)<br />
-a = 1535<br />
-BB[1535] = 165<br />
-a = 1536<br />
-BB[1536] = 165<br />
-<br />
-<br />
-<br />
-<br />
-a = 1537<br />
-BB[1537] = 167</code></td>
-<td><code>(initially b == 165)
-<br />
-<br />
-<br />
-<br />
-<br />
-b = 166<br />
-AA[166] = 1536<br />
-b = 167<br />
-AA[167] = 1536<br />
-<br /></code></td>
-</tr>
-</table>
-
-<p>(This is written as if the threads were taking turns executing so that it’s
-more obvious when results from one thread should be visible to the other, but in
-practice that won’t be the case.)</p>
-
-<p>Look at the assignment of <code>AA[166]</code> in thread 2.  We are capturing
-the fact that, at the point where thread 2 was on iteration 166, it can see that
-thread 1 was on iteration 1536.  If we look one step in the future, at thread
-1’s iteration 1537, we expect to see that thread 1 saw that thread 2 was at
-iteration 166 (or later).  <code>BB[1537]</code> holds 167, so it appears things
-are working.</p>
-
-<p>Now suppose we fail to observe a volatile write to <code>b</code>:</p>
-
-<table>
-<tr>
-<th>Thread 1</th>
-<th>Thread 2</th>
-</tr>
-<tr>
-<td><code>(initially a == 1534)<br />
-a = 1535<br />
-BB[1535] = 165<br />
-a = 1536<br />
-BB[1536] = 165<br />
-<br />
-<br />
-<br />
-<br />
-a = 1537<br />
-BB[1537] = 165  // stale b</code></td>
-<td><code>(initially b == 165)<br />
-<br />
-<br />
-<br />
-<br />
-b = 166<br />
-AA[166] = 1536<br />
-b = 167<br />
-AA[167] = 1536</code></td>
-</tr>
-</table>
-
-<p>Now, <code>BB[1537]</code> holds 165, a smaller value than we expected, so we
-know we have a problem.  Put succinctly, for i=166, BB[AA[i]+1] < i.  (This also
-catches failures by thread 2 to observe writes to <code>a</code>, for example if we
-miss an update and assign <code>AA[166] = 1535</code>, we will get
-<code>BB[AA[166]+1] == 165</code>.)</p>
-
-<p>If you run the test program under Dalvik (Android 3.0 “Honeycomb” or later)
-on an SMP ARM device, it will never fail.  If you remove the word “volatile”
-from the declarations of <code>a</code> and <code>b</code>, it will consistently
-fail.  The program is testing to see if the VM is providing sequentially
-consistent ordering for accesses to <code>a</code> and <code>b</code>, so you
-will only see correct behavior when the variables are volatile.  (It will also
-succeed if you run the code on a uniprocessor device, or run it while something
-else is using enough CPU that the kernel doesn’t schedule the test threads on
-separate cores.)</p>
-
-<p>If you run the modified test a few times you will note that it doesn’t fail
-in the same place every time.  The test fails consistently because it performs
-the operations a million times, and it only needs to see out-of-order accesses
-once.  In practice, failures will be infrequent and difficult to locate.  This
-test program could very well succeed on a broken VM if things just happen to
-work out.</p>
-
 <h3 id="sync_stores">Implementing synchronization stores</h3>
 
 <p><em>(This isn’t something most programmers will find themselves implementing,
 but the discussion is illuminating.)</em></p>
 
-<p>Consider once again volatile accesses in Java.  Earlier we made reference to
-their similarities with acquiring loads and releasing stores, which works as a
-starting point but doesn’t tell the full story.</p>
+<p> For small built-in types like <code>int</code>, and hardware supported by
+Android, ordinary load and store instructions ensure that a store
+will be made visible either in its entirety, or not at all, to another
+processor loading the same location.  Thus some basic notion
+of "atomicity" is provided for free.</p>
 
-<p>We start with a fragment of Dekker’s algorithm. Initially both
-<code>flag1</code> and <code>flag2</code> are false:</p>
+<p> As we saw before, this does not suffice.  In order to ensure sequential
+consistency we also need to prevent reordering of operations, and to ensure
+that memory operations become visible to other processes in a consistent
+order.  It turns out that the latter is automatic on Android-supported
+hardware, provided we make judicious choices for enforcing the former,
+so we largely ignore it here.</p>
+
+<p> Order of memory operations is preserved by both preventing reordering
+by the compiler, and preventing reordering by the hardware.  Here we focus
+on the latter.
+
+<p> Memory ordering on current hardware is generally enforced with
+"fence" instructions that
+roughly prevent instructions following the fence from becoming visible
+before instructions preceding the fence. (These are also commonly
+called "barrier" instructions, but that risks confusion with
+<code>pthread_barrier</code>-style barriers, which do much more
+than this.) The precise meaning of
+fence instructions is a fairly complicated topic that has to address
+the way in which guarantees provided by multiple different kinds of fences
+interact, and how these combine with other ordering guarantees usually
+provided by the hardware.  This is a high level overview, so we will
+gloss over these details.</p>
+
+<p> The most basic kind of ordering guarantee is that provided by C++
+<code>memory_order_acquire</code> and <code>memory_order_release</code>
+atomic operations:  Memory operations preceding a release store
+should be visible following an acquire load.  On ARM, this is
+enforced by:</p>
+
+<ul>
+<li>Preceding the store instruction with a suitable fence instruction.
+This prevents all prior memory accesses from being reordered with the
+store instruction.  (It also unnecessarily prevents reordering with
+later store instruction.  ARMv8 provides an alternative that doesn't share
+this problem.)
+<li>Following the load instruction with a suitable fence instruction,
+preventing the load from being reordered with subsequent accesses.
+(And once again providing unneeded ordering with at least earlier loads.)
+</ul>
+
+<p> Together these suffice for C++ acquire/release ordering.
+They are necessary, but not sufficient, for Java <code>volatile</code>
+or C++ sequentially consistent <code>atomic</code>.</p>
+
+<p>To see what else we need, consider the fragment of Dekker’s algorithm
+we briefly mentioned earlier.
+<code>flag1</code> and <code>flag2</code> are C++ <code>atomic</code>
+or Java <code>volatile</code> variables, both initially false.</p>
 
 <table>
 <tr>
@@ -1736,40 +1232,27 @@
 if (flag1 == false)<br />
 &nbsp;&nbsp;&nbsp;&nbsp;<em>critical-stuff</em></code></td>
 </tr>
-</table
+</table>
 
-<p><code>flag1</code> and <code>flag2</code> are declared as volatile boolean
-fields.  The rules for acquiring loads and releasing stores would allow the
-accesses in each thread to be reordered, breaking the algorithm.  Fortunately,
-the JMM has a few things to say here.  Informally:</p>
-
-<ul>
-<li>A write to a volatile field <em>happens-before</em> every subsequent read of that
-same field.  (For this example, it means that if one thread updates a flag, and
-later on the other thread reads that flag, the reader is guaranteed to see the
-write.)</li>
-<li>Every execution has a total order over all volatile field accesses.  The
-order is consistent with program order.</li>
-</ul>
-
-<p>Taken together, these rules say that the volatile accesses in our example
-must be observable in program order by all threads. Thus, we will never see
+<p>Sequential consistency implies that one of the assignments to
+<code>flag</code><i>n</i> must be executed first, and be seen by the
+test in the other thread.  Thus, we will never see
 these threads executing the “critical-stuff” simultaneously.</p>
 
-<div style="padding:.5em 2em;">
-<div style="border-left:4px solid #999;padding:0 1em;font-style:italic;">
-<p>Another way to think about this is in terms of <em>data races</em>.  A data race
-occurs if two accesses to the same memory location by different threads are not
-ordered, at least one of them stores to the memory location, and at least one of
-them is not a synchronization action <span style="font-size:.9em;color:#777">(<a
-href="#more" style="color:#777">Boehm and McKenney</a>)</span>. The memory model
-declares that a program free of data races must behave as if executed by a
-sequentially-consistent machine.  Because both <code>flag1</code> and
-<code>flag2</code> are volatile, and volatile accesses are considered
-synchronization actions, there are no data races and this code must execute in a
-sequentially consistent manner.</p>
-</div>
-</div>
+<p>But the fencing required for acquire-release ordering only adds
+fences at the beginning and end of each thread, which doesn't help
+here.  We additionally need to ensure that if a
+<code>volatile</code>/<code>atomic</code> store is followed by
+a <code>volatile</code>/<code>atomic</code> load, the two are not reordered.
+This is normally enforced by add a fence not just before a
+sequentially consistent store, but also after it.
+(This is again much stronger than required, since this fence typically orders
+all earlier memory accesses with respect to all later ones.  Again ARMv8
+offers a more targeted solution.)</p>
+
+<p>We could instead associate the extra fence with sequentially
+consistent loads.  Since stores are less frequent, the convention
+we described is more common and used on Android.</p>
 
 <p>As we saw in an earlier section, we need to insert a store/load barrier
 between the two operations.  The code executed in the VM for a volatile access
@@ -1782,30 +1265,29 @@
 </tr>
 <tr>
 <td><code>reg = A<br />
-<em>load/load + load/store barrier</em></code></td>
-<td><code><em>store/store barrier</em><br />
+<em>fence for "acquire" (1)</em></code></td>
+<td><code><em>fence for "release" (2)</em><br />
 A = reg<br />
-<em>store/load barrier</em></code></td>
+<em>fence for later atomic load (3)</em></code></td>
 </tr>
 </table>
 
-<p>The volatile load is just an acquiring load.  The volatile store is similar
-to a releasing store, but we’ve omitted load/store from the pre-store barrier,
-and added a store/load barrier afterward.</p>
+<p>Real machine architectures commonly provide multiple types of
+fences, which order different types of accesses and may have
+different cost.  The choice between these is subtle, and influenced
+by the need to ensure that stores are made visible to other cores in
+a consistent order, and that the memory ordering imposed by the
+combination of multiple fences composes correctly.  For more details,
+please see the University of Cambridge page with <a href="#more">
+collected mappings of atomics to actual processors</a>.</p>
 
-<p>What we’re really trying to guarantee, though, is that (using thread 1 as an
-example) the write to flag1 is observed before the read of flag2.  We could
-issue the store/load barrier before the volatile load instead and get the same
-result, but because loads tend to outnumber stores it’s best to associate it
-with the store.</p>
-
-<p>On some architectures, it’s possible to implement volatile stores with an
-atomic operation and skip the explicit store/load barrier. On x86, for example,
-atomics provide a full barrier. The ARM LL/SC operations don’t include a
-barrier, so for ARM we must use explicit barriers.</p>
-
-<p>(Much of this is due to Doug Lea and his “JSR-133 Cookbook for Compiler
-Writers” page.)</p>
+<p>On some architectures, notably x86, the "acquire" and "release"
+barriers are unnecessary, since the hardware always implicitly
+enforces sufficient ordering.  Thus on x86 only the last fence (3)
+is really generated.  Similarly on x86, atomic read-modify-write
+operations implicitly include a strong fence.  Thus these never
+require any fences.  On ARM all fences we discussed above are
+required.</p>
 
 <h3 id="more">Further reading</h3>
 
@@ -1813,7 +1295,7 @@
 
 <dl>
 <dt>Shared Memory Consistency Models: A Tutorial</dt>
-<dd>Written in 1995 by Adve & Gharachorloo, this is a good place to start if you want to dive more deeply into memory consistency models.
+<dd>Written in 1995 by Adve &amp; Gharachorloo, this is a good place to start if you want to dive more deeply into memory consistency models.
 <br /><a href="http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-95-7.pdf">http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-95-7.pdf</a></dd>
 
 <dt>Memory Barriers</dt>
@@ -1822,7 +1304,7 @@
 
 <dt>Threads Basics</dt>
 <dd>An introduction to multi-threaded programming in C++ and Java, by Hans Boehm.  Excellent discussion of data races and basic synchronization methods.
-<br /><a href="http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/threadsintro.html">http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/threadsintro.html</a></dd>
+<br /><a href="http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/threadsintro.html">http://www.hboehm.info/c++mm/threadsintro.html</a></dd>
 
 <dt>Java Concurrency In Practice</dt>
 <dd>Published in 2006, this book covers a wide range of topics in great detail.  Highly recommended for anyone writing multi-threaded code in Java.
@@ -1830,8 +1312,15 @@
 
 <dt>JSR-133 (Java Memory Model) FAQ</dt>
 <dd>A gentle introduction to the Java memory model, including an explanation of synchronization, volatile variables, and construction of final fields.
+(A bit dated, particularly when it discusses other languages.)
 <br /><a href="http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html">http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html</a></dd>
 
+<dt>Validity of Program Transformations in the Java Memory Model</dt>
+<dd>A rather technical explanation of remaining problems with the
+Java memory model. These issues do not apply to data-race-free
+programs.
+<br /><a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.112.1790&type=pdf">http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.112.1790&type=pdf</a></dd>
+
 <dt>Overview of package java.util.concurrent</dt>
 <dd>The documentation for the <code>java.util.concurrent</code> package.  Near the bottom of the page is a section entitled “Memory Consistency Properties” that explains the guarantees made by the various classes.
 <br />{@link java.util.concurrent java.util.concurrent} Package Summary</dd>
@@ -1845,25 +1334,36 @@
 <br /><a href="http://www.ibm.com/developerworks/java/library/j-jtp06197.html">http://www.ibm.com/developerworks/java/library/j-jtp06197.html</a></dd>
 
 <dt>The “Double-Checked Locking is Broken” Declaration</dt>
-<dd>Bill Pugh’s detailed explanation of the various ways in which double-checked locking is broken.  Includes C/C++ and Java.
+<dd>Bill Pugh’s detailed explanation of the various ways in which double-checked locking is broken without <code>volatile</code> or <code>atomic</code>.
+Includes C/C++ and Java.
 <br /><a href="http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html">http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html</a></dd>
 
 <dt>[ARM] Barrier Litmus Tests and Cookbook</dt>
-<dd>A discussion of ARM SMP issues, illuminated with short snippets of ARM code.  If you found the examples in this document too un-specific, or want to read the formal description of the DMB instruction, read this.  Also describes the instructions used for memory barriers on executable code (possibly useful if you’re generating code on the fly).
+<dd>A discussion of ARM SMP issues, illuminated with short snippets of ARM code.  If you found the examples in this document too un-specific, or want to read the formal description of the DMB instruction, read this.  Also describes the instructions used for memory barriers on executable code (possibly useful if you’re generating code on the fly).  Note that this predates ARMv8, which also
+supports an additional set of memory ordering instructions.
 <br /><a href="http://infocenter.arm.com/help/topic/com.arm.doc.genc007826/Barrier_Litmus_Tests_and_Cookbook_A08.pdf">http://infocenter.arm.com/help/topic/com.arm.doc.genc007826/Barrier_Litmus_Tests_and_Cookbook_A08.pdf</a></dd>
 
 <dt>Linux Kernel Memory Barriers
 <dd>Documentation for Linux kernel memory barriers.  Includes some useful examples and ASCII art.
 <br/><a href="http://www.kernel.org/doc/Documentation/memory-barriers.txt">http://www.kernel.org/doc/Documentation/memory-barriers.txt</a></dd>
 
-<dt>ISO/IEC JTC1 SC22 WG21 (C++ standards) 14882 (C++ programming language), chapter 29 (“Atomic operations library”)</dt>
-<dd>Draft standard for C++ atomic operation features.
-<br /><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3090.pdf">http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3090.pdf</a>
-<br/ >(intro: <a href="http://www.hpl.hp.com/techreports/2008/HPL-2008-56.pdf">http://www.hpl.hp.com/techreports/2008/HPL-2008-56.pdf</a>)</dd>
+<dt>ISO/IEC JTC1 SC22 WG21 (C++ standards) 14882 (C++ programming language), section 1.10 and clause 29 (“Atomic operations library”)</dt>
+<dd>Draft standard for C++ atomic operation features.  This version is
+close to the C++14 standard, which includes minor changes in this area
+from C++11.
+<br /><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4527.pdf">http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4527.pdf</a>
+<br />(intro: <a href="http://www.hpl.hp.com/techreports/2008/HPL-2008-56.pdf">http://www.hpl.hp.com/techreports/2008/HPL-2008-56.pdf</a>)</dd>
 
 <dt>ISO/IEC JTC1 SC22 WG14 (C standards) 9899 (C programming language) chapter 7.16 (“Atomics &lt;stdatomic.h&gt;”)</dt>
-<dd>Draft standard for ISO/IEC 9899-201x C atomic operation features. (See also n1484 for errata.)
-<br /><a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1425.pdf">http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1425.pdf</a></dd>
+<dd>Draft standard for ISO/IEC 9899-201x C atomic operation features.
+For details, also check later defect reports.
+<br /><a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf">http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf</a></dd>
+
+<dt>C/C++11 mappings to processors (University of Cambridge)</dt>
+<dd>Jaroslav Sevcik and Peter Sewell's collection of translations
+of C++ atomics to various common processor instruction sets.
+<br /><a href="http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html">
+http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html</a></dd>
 
 <dt>Dekker’s algorithm</dt>
 <dd>The “first known correct solution to the mutual exclusion problem in concurrent programming”.  The wikipedia article has the full algorithm, with a discussion about how it would need to be updated to work with modern optimizing compilers and SMP hardware.
@@ -1882,10 +1382,16 @@
 <br /><a href="http://portal.acm.org/ft_gateway.cfm?id=1353528&type=pdf&coll=&dl=&CFID=96099715&CFTOKEN=57505711">http://portal.acm.org/ft_gateway.cfm?id=1353528&type=pdf&coll=&dl=&CFID=96099715&CFTOKEN=57505711</a></dd>
 
 <dt>The JSR-133 Cookbook for Compiler Writers</dt>
-<dd>Doug Lea wrote this as a companion to the JSR-133 (Java Memory Model) documentation.  It goes much deeper into the details than most people will need to worry about, but it provides good fodder for contemplation.
+<dd>Doug Lea wrote this as a companion to the JSR-133 (Java Memory Model) documentation.  It contains the initial set of implementation guidelines
+for the Java memory model that was used by many compiler writers, and is
+still widely cited and likely to provide insight.
+Unfortunately, the four fence varieties discussed here are not a good
+match for Android-supported architectures, and the above C++11 mappings
+are now a better source of precise recipes, even for Java.
 <br /><a href="http://g.oswego.edu/dl/jmm/cookbook.html">http://g.oswego.edu/dl/jmm/cookbook.html</a></dd>
 
-<dt>The Semantics of Power and ARM Multiprocessor Machine Code</dt>
-<dd>If you prefer your explanations in rigorous mathematical form, this is a fine place to go next.
-<br /><a href="http://www.cl.cam.ac.uk/~pes20/weakmemory/draft-ppc-arm.pdf">http://www.cl.cam.ac.uk/~pes20/weakmemory/draft-ppc-arm.pdf</a></dd>
+<dt>x86-TSO: A Rigorous and Usable Programmer’s Model for x86 Multiprocessors</dt>
+<dd>A precise description of the x86 memory model.  Precise descriptions of
+the ARM memory model are unfortunately significantly more complicated.
+<br /><a href="http://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf">http://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf</a></dd>
 </dl>
diff --git a/libs/common_time/common_clock_service.h b/libs/common_time/common_clock_service.h
index bd663f0..aea507e 100644
--- a/libs/common_time/common_clock_service.h
+++ b/libs/common_time/common_clock_service.h
@@ -53,7 +53,7 @@
     void notifyOnTimelineChanged(uint64_t timelineID);
 
   private:
-    CommonClockService(CommonTimeServer& timeServer)
+    explicit CommonClockService(CommonTimeServer& timeServer)
         : mTimeServer(timeServer) { };
 
     virtual void binderDied(const wp<IBinder>& who);
diff --git a/libs/common_time/common_time_config_service.h b/libs/common_time/common_time_config_service.h
index 89806dd..23abb1a 100644
--- a/libs/common_time/common_time_config_service.h
+++ b/libs/common_time/common_time_config_service.h
@@ -49,7 +49,7 @@
     virtual status_t forceNetworklessMasterMode();
 
   private:
-    CommonTimeConfigService(CommonTimeServer& timeServer)
+    explicit CommonTimeConfigService(CommonTimeServer& timeServer)
         : mTimeServer(timeServer) { }
     CommonTimeServer& mTimeServer;
 
diff --git a/libs/common_time/common_time_server.cpp b/libs/common_time/common_time_server.cpp
index f72ffaa..b1495ef 100644
--- a/libs/common_time/common_time_server.cpp
+++ b/libs/common_time/common_time_server.cpp
@@ -615,12 +615,11 @@
 
     ssize_t recvBytes = recvfrom(
             mSocket, buf, sizeof(buf), 0,
-            reinterpret_cast<const sockaddr *>(&srcAddr), &srcAddrLen);
+            reinterpret_cast<sockaddr *>(&srcAddr), &srcAddrLen);
 
     if (recvBytes < 0) {
-        mBadPktLog.log(ANDROID_LOG_ERROR, LOG_TAG,
-                       "recvfrom failed (res %d, errno %d)",
-                       recvBytes, errno);
+        mBadPktLog.log(ANDROID_LOG_ERROR, LOG_TAG, "recvfrom failed (%s)",
+                       strerror(errno));
         return false;
     }
 
diff --git a/libs/hwui/Animator.h b/libs/hwui/Animator.h
index 1b3d8e7..cd8d383 100644
--- a/libs/hwui/Animator.h
+++ b/libs/hwui/Animator.h
@@ -77,7 +77,7 @@
     void forceEndNow(AnimationContext& context);
 
 protected:
-    BaseRenderNodeAnimator(float finalValue);
+    explicit BaseRenderNodeAnimator(float finalValue);
     virtual ~BaseRenderNodeAnimator();
 
     virtual float getValue(RenderNode* target) const = 0;
diff --git a/libs/hwui/AssetAtlas.cpp b/libs/hwui/AssetAtlas.cpp
index 2889d2f..037bb0c 100644
--- a/libs/hwui/AssetAtlas.cpp
+++ b/libs/hwui/AssetAtlas.cpp
@@ -29,7 +29,7 @@
 // Lifecycle
 ///////////////////////////////////////////////////////////////////////////////
 
-void AssetAtlas::init(sp<GraphicBuffer> buffer, int64_t* map, int count) {
+void AssetAtlas::init(const sp<GraphicBuffer>& buffer, int64_t* map, int count) {
     if (mImage) {
         return;
     }
diff --git a/libs/hwui/AssetAtlas.h b/libs/hwui/AssetAtlas.h
index f1cd0b4..23bca24 100644
--- a/libs/hwui/AssetAtlas.h
+++ b/libs/hwui/AssetAtlas.h
@@ -111,7 +111,7 @@
      * initialized. To re-initialize the atlas, you must
      * first call terminate().
      */
-    ANDROID_API void init(sp<GraphicBuffer> buffer, int64_t* map, int count);
+    ANDROID_API void init(const sp<GraphicBuffer>& buffer, int64_t* map, int count);
 
     /**
      * Destroys the atlas texture. This object can be
diff --git a/libs/hwui/DisplayListOp.h b/libs/hwui/DisplayListOp.h
index 0d7911c..6be6510 100644
--- a/libs/hwui/DisplayListOp.h
+++ b/libs/hwui/DisplayListOp.h
@@ -108,7 +108,7 @@
 class DrawOp : public DisplayListOp {
 friend class MergingDrawBatch;
 public:
-    DrawOp(const SkPaint* paint)
+    explicit DrawOp(const SkPaint* paint)
             : mPaint(paint), mQuickRejected(false) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level,
@@ -245,7 +245,7 @@
     }
 
     // default empty constructor for bounds, to be overridden in child constructor body
-    DrawBoundedOp(const SkPaint* paint): DrawOp(paint) { }
+    explicit DrawBoundedOp(const SkPaint* paint): DrawOp(paint) { }
 
     virtual bool getLocalBounds(Rect& localBounds) override {
         localBounds.set(mLocalBounds);
@@ -270,7 +270,7 @@
 
 class SaveOp : public StateOp {
 public:
-    SaveOp(int flags)
+    explicit SaveOp(int flags)
             : mFlags(flags) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level,
@@ -296,7 +296,7 @@
 
 class RestoreToCountOp : public StateOp {
 public:
-    RestoreToCountOp(int count)
+    explicit RestoreToCountOp(int count)
             : mCount(count) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level,
@@ -409,7 +409,7 @@
 
 class RotateOp : public StateOp {
 public:
-    RotateOp(float degrees)
+    explicit RotateOp(float degrees)
             : mDegrees(degrees) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) const override {
@@ -468,7 +468,7 @@
 
 class SetMatrixOp : public StateOp {
 public:
-    SetMatrixOp(const SkMatrix& matrix)
+    explicit SetMatrixOp(const SkMatrix& matrix)
             : mMatrix(matrix) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) const override {
@@ -491,7 +491,7 @@
 
 class SetLocalMatrixOp : public StateOp {
 public:
-    SetLocalMatrixOp(const SkMatrix& matrix)
+    explicit SetLocalMatrixOp(const SkMatrix& matrix)
             : mMatrix(matrix) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) const override {
@@ -510,7 +510,7 @@
 
 class ConcatMatrixOp : public StateOp {
 public:
-    ConcatMatrixOp(const SkMatrix& matrix)
+    explicit ConcatMatrixOp(const SkMatrix& matrix)
             : mMatrix(matrix) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) const override {
@@ -529,7 +529,7 @@
 
 class ClipOp : public StateOp {
 public:
-    ClipOp(SkRegion::Op op) : mOp(op) {}
+    explicit ClipOp(SkRegion::Op op) : mOp(op) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level,
             bool useQuickReject) override {
@@ -1396,7 +1396,7 @@
 
 class DrawFunctorOp : public DrawOp {
 public:
-    DrawFunctorOp(Functor* functor)
+    explicit DrawFunctorOp(Functor* functor)
             : DrawOp(nullptr), mFunctor(functor) {}
 
     virtual void applyDraw(OpenGLRenderer& renderer, Rect& dirty) override {
diff --git a/libs/hwui/GammaFontRenderer.h b/libs/hwui/GammaFontRenderer.h
index ca55bf1..7b4d375 100644
--- a/libs/hwui/GammaFontRenderer.h
+++ b/libs/hwui/GammaFontRenderer.h
@@ -91,7 +91,7 @@
     void endPrecaching() override;
 
 private:
-    ShaderGammaFontRenderer(bool multiGamma);
+    explicit ShaderGammaFontRenderer(bool multiGamma);
 
     FontRenderer* mRenderer;
     bool mMultiGamma;
diff --git a/libs/hwui/Image.h b/libs/hwui/Image.h
index 2514535..b8f5a5b 100644
--- a/libs/hwui/Image.h
+++ b/libs/hwui/Image.h
@@ -38,7 +38,7 @@
      * cannot be created, getTexture() will return 0 and getImage() will
      * return EGL_NO_IMAGE_KHR.
      */
-    Image(sp<GraphicBuffer> buffer);
+    explicit Image(sp<GraphicBuffer> buffer);
     ~Image();
 
     /**
diff --git a/libs/hwui/Interpolator.h b/libs/hwui/Interpolator.h
index 66ce119..6512008 100644
--- a/libs/hwui/Interpolator.h
+++ b/libs/hwui/Interpolator.h
@@ -43,7 +43,7 @@
 
 class ANDROID_API AccelerateInterpolator : public Interpolator {
 public:
-    AccelerateInterpolator(float factor) : mFactor(factor), mDoubleFactor(factor*2) {}
+    explicit AccelerateInterpolator(float factor) : mFactor(factor), mDoubleFactor(factor*2) {}
     virtual float interpolate(float input) override;
 private:
     const float mFactor;
@@ -52,7 +52,7 @@
 
 class ANDROID_API AnticipateInterpolator : public Interpolator {
 public:
-    AnticipateInterpolator(float tension) : mTension(tension) {}
+    explicit AnticipateInterpolator(float tension) : mTension(tension) {}
     virtual float interpolate(float input) override;
 private:
     const float mTension;
@@ -60,7 +60,7 @@
 
 class ANDROID_API AnticipateOvershootInterpolator : public Interpolator {
 public:
-    AnticipateOvershootInterpolator(float tension) : mTension(tension) {}
+    explicit AnticipateOvershootInterpolator(float tension) : mTension(tension) {}
     virtual float interpolate(float input) override;
 private:
     const float mTension;
@@ -73,7 +73,7 @@
 
 class ANDROID_API CycleInterpolator : public Interpolator {
 public:
-    CycleInterpolator(float cycles) : mCycles(cycles) {}
+    explicit CycleInterpolator(float cycles) : mCycles(cycles) {}
     virtual float interpolate(float input) override;
 private:
     const float mCycles;
@@ -81,7 +81,7 @@
 
 class ANDROID_API DecelerateInterpolator : public Interpolator {
 public:
-    DecelerateInterpolator(float factor) : mFactor(factor) {}
+    explicit DecelerateInterpolator(float factor) : mFactor(factor) {}
     virtual float interpolate(float input) override;
 private:
     const float mFactor;
@@ -94,7 +94,7 @@
 
 class ANDROID_API OvershootInterpolator : public Interpolator {
 public:
-    OvershootInterpolator(float tension) : mTension(tension) {}
+    explicit OvershootInterpolator(float tension) : mTension(tension) {}
     virtual float interpolate(float input) override;
 private:
     const float mTension;
diff --git a/libs/hwui/Snapshot.cpp b/libs/hwui/Snapshot.cpp
index beb2e1d..3da3a57 100644
--- a/libs/hwui/Snapshot.cpp
+++ b/libs/hwui/Snapshot.cpp
@@ -40,6 +40,7 @@
         , mClipArea(&mClipAreaRoot) {
     transform = &mTransformRoot;
     region = nullptr;
+    mRelativeLightCenter.x = mRelativeLightCenter.y = mRelativeLightCenter.z = 0;
 }
 
 /**
diff --git a/libs/hwui/renderstate/RenderState.h b/libs/hwui/renderstate/RenderState.h
index 4fd792c..ac9445b 100644
--- a/libs/hwui/renderstate/RenderState.h
+++ b/libs/hwui/renderstate/RenderState.h
@@ -101,7 +101,7 @@
     void resumeFromFunctorInvoke();
     void assertOnGLThread();
 
-    RenderState(renderthread::RenderThread& thread);
+    explicit RenderState(renderthread::RenderThread& thread);
     ~RenderState();
 
 
diff --git a/libs/hwui/renderthread/EglManager.h b/libs/hwui/renderthread/EglManager.h
index 0a8cfd3..de932fb 100644
--- a/libs/hwui/renderthread/EglManager.h
+++ b/libs/hwui/renderthread/EglManager.h
@@ -58,7 +58,7 @@
 private:
     friend class RenderThread;
 
-    EglManager(RenderThread& thread);
+    explicit EglManager(RenderThread& thread);
     // EglContext is never destroyed, method is purposely not implemented
     ~EglManager();
 
diff --git a/libs/hwui/thread/TaskManager.cpp b/libs/hwui/thread/TaskManager.cpp
index e9dde29..6a4587e 100644
--- a/libs/hwui/thread/TaskManager.cpp
+++ b/libs/hwui/thread/TaskManager.cpp
@@ -104,7 +104,7 @@
     return true;
 }
 
-bool TaskManager::WorkerThread::addTask(TaskWrapper task) {
+bool TaskManager::WorkerThread::addTask(const TaskWrapper& task) {
     if (!isRunning()) {
         run(mName.string(), PRIORITY_DEFAULT);
     } else if (exitPending()) {
diff --git a/libs/hwui/thread/TaskManager.h b/libs/hwui/thread/TaskManager.h
index 4d3b8f0..7d16581 100644
--- a/libs/hwui/thread/TaskManager.h
+++ b/libs/hwui/thread/TaskManager.h
@@ -77,9 +77,9 @@
 
     class WorkerThread: public Thread {
     public:
-        explicit WorkerThread(const String8 name): mSignal(Condition::WAKE_UP_ONE), mName(name) { }
+        explicit WorkerThread(const String8& name): mSignal(Condition::WAKE_UP_ONE), mName(name) { }
 
-        bool addTask(TaskWrapper task);
+        bool addTask(const TaskWrapper& task);
         size_t getTaskCount() const;
         void exit();
 
diff --git a/libs/hwui/utils/Timing.h b/libs/hwui/utils/Timing.h
index dd8847a..4b1fabe 100644
--- a/libs/hwui/utils/Timing.h
+++ b/libs/hwui/utils/Timing.h
@@ -22,7 +22,7 @@
 #define TIME_METHOD() MethodTimer __method_timer(__func__)
 class MethodTimer {
 public:
-    MethodTimer(const char* name)
+    explicit MethodTimer(const char* name)
             : mMethodName(name) {
         gettimeofday(&mStart, nullptr);
     }
diff --git a/media/java/android/media/midi/IMidiDeviceServer.aidl b/media/java/android/media/midi/IMidiDeviceServer.aidl
index c2cc2b9..d5115de 100644
--- a/media/java/android/media/midi/IMidiDeviceServer.aidl
+++ b/media/java/android/media/midi/IMidiDeviceServer.aidl
@@ -28,7 +28,8 @@
     void closeDevice();
 
     // connects the input port pfd to the specified output port
-    void connectPorts(IBinder token, in ParcelFileDescriptor pfd, int outputPortNumber);
+    // Returns the PID of the called process.
+    int connectPorts(IBinder token, in ParcelFileDescriptor pfd, int outputPortNumber);
 
     MidiDeviceInfo getDeviceInfo();
     void setDeviceInfo(in MidiDeviceInfo deviceInfo);
diff --git a/media/java/android/media/midi/MidiDevice.java b/media/java/android/media/midi/MidiDevice.java
index e1990cd..da44ca6 100644
--- a/media/java/android/media/midi/MidiDevice.java
+++ b/media/java/android/media/midi/MidiDevice.java
@@ -19,6 +19,7 @@
 import android.os.Binder;
 import android.os.IBinder;
 import android.os.ParcelFileDescriptor;
+import android.os.Process;
 import android.os.RemoteException;
 import android.util.Log;
 
@@ -41,6 +42,7 @@
     private final IMidiManager mMidiManager;
     private final IBinder mClientToken;
     private final IBinder mDeviceToken;
+    private boolean mIsDeviceClosed;
 
     private final CloseGuard mGuard = CloseGuard.get();
 
@@ -122,6 +124,9 @@
      *         or null in case of failure.
      */
     public MidiInputPort openInputPort(int portNumber) {
+        if (mIsDeviceClosed) {
+            return null;
+        }
         try {
             IBinder token = new Binder();
             ParcelFileDescriptor pfd = mDeviceServer.openInputPort(token, portNumber);
@@ -145,6 +150,9 @@
      *         or null in case of failure.
      */
     public MidiOutputPort openOutputPort(int portNumber) {
+        if (mIsDeviceClosed) {
+            return null;
+        }
         try {
             IBinder token = new Binder();
             ParcelFileDescriptor pfd = mDeviceServer.openOutputPort(token, portNumber);
@@ -174,16 +182,26 @@
         if (outputPortNumber < 0 || outputPortNumber >= mDeviceInfo.getOutputPortCount()) {
             throw new IllegalArgumentException("outputPortNumber out of range");
         }
+        if (mIsDeviceClosed) {
+            return null;
+        }
 
         ParcelFileDescriptor pfd = inputPort.claimFileDescriptor();
         if (pfd == null) {
             return null;
         }
-         try {
+        try {
             IBinder token = new Binder();
-            mDeviceServer.connectPorts(token, pfd, outputPortNumber);
-            // close our copy of the file descriptor
-            IoUtils.closeQuietly(pfd);
+            int calleePid = mDeviceServer.connectPorts(token, pfd, outputPortNumber);
+            // If the service is a different Process then it will duplicate the pfd
+            // and we can safely close this one.
+            // But if the service is in the same Process then closing the pfd will
+            // kill the connection. So don't do that.
+            if (calleePid != Process.myPid()) {
+                // close our copy of the file descriptor
+                IoUtils.closeQuietly(pfd);
+            }
+
             return new MidiConnection(token, inputPort);
         } catch (RemoteException e) {
             Log.e(TAG, "RemoteException in connectPorts");
@@ -194,11 +212,14 @@
     @Override
     public void close() throws IOException {
         synchronized (mGuard) {
-            mGuard.close();
-            try {
-                mMidiManager.closeDevice(mClientToken, mDeviceToken);
-            } catch (RemoteException e) {
-                Log.e(TAG, "RemoteException in closeDevice");
+            if (!mIsDeviceClosed) {
+                mGuard.close();
+                mIsDeviceClosed = true;
+                try {
+                    mMidiManager.closeDevice(mClientToken, mDeviceToken);
+                } catch (RemoteException e) {
+                    Log.e(TAG, "RemoteException in closeDevice");
+                }
             }
         }
     }
diff --git a/media/java/android/media/midi/MidiDeviceServer.java b/media/java/android/media/midi/MidiDeviceServer.java
index 19ff624..4c49f67 100644
--- a/media/java/android/media/midi/MidiDeviceServer.java
+++ b/media/java/android/media/midi/MidiDeviceServer.java
@@ -73,6 +73,10 @@
 
     private final Callback mCallback;
 
+    private final HashMap<IBinder, PortClient> mPortClients = new HashMap<IBinder, PortClient>();
+    private final HashMap<MidiInputPort, PortClient> mInputPortClients =
+            new HashMap<MidiInputPort, PortClient>();
+
     public interface Callback {
         /**
          * Called to notify when an our device status has changed
@@ -102,6 +106,10 @@
 
         abstract void close();
 
+        MidiInputPort getInputPort() {
+            return null;
+        }
+
         @Override
         public void binderDied() {
             close();
@@ -152,9 +160,12 @@
             mInputPorts.remove(mInputPort);
             IoUtils.closeQuietly(mInputPort);
         }
-    }
 
-    private final HashMap<IBinder, PortClient> mPortClients = new HashMap<IBinder, PortClient>();
+        @Override
+        MidiInputPort getInputPort() {
+            return mInputPort;
+        }
+    }
 
     // Binder interface stub for receiving connection requests from clients
     private final IMidiDeviceServer mServer = new IMidiDeviceServer.Stub() {
@@ -215,6 +226,12 @@
                 ParcelFileDescriptor[] pair = ParcelFileDescriptor.createSocketPair(
                                                     OsConstants.SOCK_SEQPACKET);
                 MidiInputPort inputPort = new MidiInputPort(pair[0], portNumber);
+                // Undo the default blocking-mode of the server-side socket for
+                // physical devices to avoid stalling the Java device handler if
+                // client app code gets stuck inside 'onSend' handler.
+                if (mDeviceInfo.getType() != MidiDeviceInfo.TYPE_VIRTUAL) {
+                    IoUtils.setBlocking(pair[0].getFileDescriptor(), false);
+                }
                 MidiDispatcher dispatcher = mOutputPortDispatchers[portNumber];
                 synchronized (dispatcher) {
                     dispatcher.getSender().connect(inputPort);
@@ -228,6 +245,9 @@
                 synchronized (mPortClients) {
                     mPortClients.put(token, client);
                 }
+                synchronized (mInputPortClients) {
+                    mInputPortClients.put(inputPort, client);
+                }
                 return pair[1];
             } catch (IOException e) {
                 Log.e(TAG, "unable to create ParcelFileDescriptors in openOutputPort");
@@ -237,12 +257,19 @@
 
         @Override
         public void closePort(IBinder token) {
+            MidiInputPort inputPort = null;
             synchronized (mPortClients) {
                 PortClient client = mPortClients.remove(token);
                 if (client != null) {
+                    inputPort = client.getInputPort();
                     client.close();
                 }
             }
+            if (inputPort != null) {
+                synchronized (mInputPortClients) {
+                    mInputPortClients.remove(inputPort);
+                }
+            }
         }
 
         @Override
@@ -254,7 +281,7 @@
         }
 
         @Override
-        public void connectPorts(IBinder token, ParcelFileDescriptor pfd,
+        public int connectPorts(IBinder token, ParcelFileDescriptor pfd,
                 int outputPortNumber) {
             MidiInputPort inputPort = new MidiInputPort(pfd, outputPortNumber);
             MidiDispatcher dispatcher = mOutputPortDispatchers[outputPortNumber];
@@ -270,6 +297,10 @@
             synchronized (mPortClients) {
                 mPortClients.put(token, client);
             }
+            synchronized (mInputPortClients) {
+                mInputPortClients.put(inputPort, client);
+            }
+            return Process.myPid(); // for caller to detect same process ID
         }
 
         @Override
@@ -302,7 +333,7 @@
 
         mOutputPortDispatchers = new MidiDispatcher[numOutputPorts];
         for (int i = 0; i < numOutputPorts; i++) {
-            mOutputPortDispatchers[i] = new MidiDispatcher();
+            mOutputPortDispatchers[i] = new MidiDispatcher(mInputPortFailureHandler);
         }
 
         mInputPortOpen = new boolean[mInputPortCount];
@@ -311,6 +342,20 @@
         mGuard.open("close");
     }
 
+    private final MidiDispatcher.MidiReceiverFailureHandler mInputPortFailureHandler =
+            new MidiDispatcher.MidiReceiverFailureHandler() {
+                public void onReceiverFailure(MidiReceiver receiver, IOException failure) {
+                    Log.e(TAG, "MidiInputPort failed to send data", failure);
+                    PortClient client = null;
+                    synchronized (mInputPortClients) {
+                        client = mInputPortClients.remove(receiver);
+                    }
+                    if (client != null) {
+                        client.close();
+                    }
+                }
+            };
+
     // Constructor for MidiDeviceService.onCreate()
     /* package */ MidiDeviceServer(IMidiManager midiManager, MidiReceiver[] inputPortReceivers,
            MidiDeviceInfo deviceInfo, Callback callback) {
diff --git a/media/java/android/media/midi/MidiOutputPort.java b/media/java/android/media/midi/MidiOutputPort.java
index 0096995..54c31e3 100644
--- a/media/java/android/media/midi/MidiOutputPort.java
+++ b/media/java/android/media/midi/MidiOutputPort.java
@@ -83,7 +83,7 @@
                 }
             } catch (IOException e) {
                 // FIXME report I/O failure?
-                Log.e(TAG, "read failed");
+                Log.e(TAG, "read failed", e);
             } finally {
                 IoUtils.closeQuietly(mInputStream);
             }
diff --git a/media/java/android/media/midi/package.html b/media/java/android/media/midi/package.html
index 45fb579..eead8d8 100644
--- a/media/java/android/media/midi/package.html
+++ b/media/java/android/media/midi/package.html
@@ -1,12 +1,13 @@
 <html>
 <body>
-<p>Android MIDI User Guide</p>
+
+<p>
+Provides classes for sending and receiving messages using the standard MIDI
+event protocol over USB, Bluetooth LE, and virtual (inter-app) transports.
+</p>
 
 <h1 id=overview>Overview</h1>
 
-
-<p>This document describes how to use the Android MIDI API in Java.</p>
-
 <p>The Android MIDI package allows users to:</p>
 
 <ul>
diff --git a/native/android/Android.mk b/native/android/Android.mk
index 5386e6f..da4e4ba 100644
--- a/native/android/Android.mk
+++ b/native/android/Android.mk
@@ -44,7 +44,4 @@
 
 LOCAL_CFLAGS += -Wall -Werror -Wunused -Wunreachable-code
 
-# Required because of b/25642296
-LOCAL_CLANG_arm64 := false
-
 include $(BUILD_SHARED_LIBRARY)
diff --git a/obex/javax/obex/ClientOperation.java b/obex/javax/obex/ClientOperation.java
index eb7e280..c627dfb 100644
--- a/obex/javax/obex/ClientOperation.java
+++ b/obex/javax/obex/ClientOperation.java
@@ -207,7 +207,6 @@
      *         object
      */
     public synchronized int getResponseCode() throws IOException {
-        //avoid dup validateConnection
         if ((mReplyHeader.responseCode == -1)
                 || (mReplyHeader.responseCode == ResponseCodes.OBEX_HTTP_CONTINUE)) {
             validateConnection();
@@ -423,8 +422,9 @@
     private void validateConnection() throws IOException {
         ensureOpen();
 
-        // to sure only one privateInput object exist.
-        if (mPrivateInput == null) {
+        // Make sure that a response has been recieved from remote
+        // before continuing
+        if (mPrivateInput == null || mReplyHeader.responseCode == -1) {
             startProcessing();
         }
     }
diff --git a/preloaded-classes b/preloaded-classes
index 8e8faf46..cd146f1 100644
--- a/preloaded-classes
+++ b/preloaded-classes
@@ -2257,7 +2257,8 @@
 com.android.internal.os.RuntimeInit
 com.android.internal.os.RuntimeInit$1
 com.android.internal.os.RuntimeInit$Arguments
-com.android.internal.os.RuntimeInit$UncaughtHandler
+com.android.internal.os.RuntimeInit$KillApplicationHandler
+com.android.internal.os.RuntimeInit$LoggingHandler
 com.android.internal.os.SamplingProfilerIntegration
 com.android.internal.os.SomeArgs
 com.android.internal.os.Zygote
diff --git a/services/appwidget/java/com/android/server/appwidget/AppWidgetService.java b/services/appwidget/java/com/android/server/appwidget/AppWidgetService.java
index 3f95427..c0f77ca 100644
--- a/services/appwidget/java/com/android/server/appwidget/AppWidgetService.java
+++ b/services/appwidget/java/com/android/server/appwidget/AppWidgetService.java
@@ -40,7 +40,7 @@
 
     @Override
     public void onBootPhase(int phase) {
-        if (phase == PHASE_THIRD_PARTY_APPS_CAN_START) {
+        if (phase == PHASE_ACTIVITY_MANAGER_READY) {
             mImpl.setSafeMode(isSafeMode());
         }
     }
diff --git a/services/core/java/com/android/server/DropBoxManagerService.java b/services/core/java/com/android/server/DropBoxManagerService.java
index 3cf00bb..5948477 100644
--- a/services/core/java/com/android/server/DropBoxManagerService.java
+++ b/services/core/java/com/android/server/DropBoxManagerService.java
@@ -696,7 +696,7 @@
      * Trims the files on disk to make sure they aren't using too much space.
      * @return the overall quota for storage (in bytes)
      */
-    private synchronized long trimToFit() {
+    private synchronized long trimToFit() throws IOException {
         // Expunge aged items (including tombstones marking deleted data).
 
         int ageSeconds = Settings.Global.getInt(mContentResolver,
@@ -728,7 +728,12 @@
             int quotaKb = Settings.Global.getInt(mContentResolver,
                     Settings.Global.DROPBOX_QUOTA_KB, DEFAULT_QUOTA_KB);
 
-            mStatFs.restat(mDropBoxDir.getPath());
+            String dirPath = mDropBoxDir.getPath();
+            try {
+                mStatFs.restat(dirPath);
+            } catch (IllegalArgumentException e) {  // restat throws this on error
+                throw new IOException("Can't restat: " + mDropBoxDir);
+            }
             int available = mStatFs.getAvailableBlocks();
             int nonreserved = available - mStatFs.getBlockCount() * reservePercent / 100;
             int maximum = quotaKb * 1024 / mBlockSize;
diff --git a/services/core/java/com/android/server/am/ActiveServices.java b/services/core/java/com/android/server/am/ActiveServices.java
old mode 100755
new mode 100644
diff --git a/services/core/java/com/android/server/am/ActivityRecord.java b/services/core/java/com/android/server/am/ActivityRecord.java
old mode 100755
new mode 100644
diff --git a/services/core/java/com/android/server/am/BaseErrorDialog.java b/services/core/java/com/android/server/am/BaseErrorDialog.java
old mode 100755
new mode 100644
diff --git a/services/core/java/com/android/server/net/NetworkPolicyManagerService.java b/services/core/java/com/android/server/net/NetworkPolicyManagerService.java
index aff87ff..0dba0d7 100644
--- a/services/core/java/com/android/server/net/NetworkPolicyManagerService.java
+++ b/services/core/java/com/android/server/net/NetworkPolicyManagerService.java
@@ -1206,7 +1206,7 @@
         final ArrayList<Pair<String, NetworkIdentity>> connIdents = new ArrayList<>(states.length);
         final ArraySet<String> connIfaces = new ArraySet<String>(states.length);
         for (NetworkState state : states) {
-            if (state.networkInfo.isConnected()) {
+            if (state.networkInfo != null && state.networkInfo.isConnected()) {
                 final NetworkIdentity ident = NetworkIdentity.buildNetworkIdentity(mContext, state);
 
                 final String baseIface = state.linkProperties.getInterfaceName();
diff --git a/services/java/com/android/server/SystemServer.java b/services/java/com/android/server/SystemServer.java
index 0e475b0..33b8840 100644
--- a/services/java/com/android/server/SystemServer.java
+++ b/services/java/com/android/server/SystemServer.java
@@ -1153,6 +1153,8 @@
         }
         Trace.traceEnd(Trace.TRACE_TAG_SYSTEM_SERVER);
 
+        mSystemServiceManager.setSafeMode(safeMode);
+
         // These are needed to propagate to the runnable below.
         final NetworkManagementService networkManagementF = networkManagement;
         final NetworkStatsService networkStatsF = networkStats;
diff --git a/services/midi/java/com/android/server/midi/MidiService.java b/services/midi/java/com/android/server/midi/MidiService.java
index c6d5a7e..723be24 100644
--- a/services/midi/java/com/android/server/midi/MidiService.java
+++ b/services/midi/java/com/android/server/midi/MidiService.java
@@ -126,8 +126,8 @@
         // This client's PID
         private final int mPid;
         // List of all receivers for this client
-        private final ArrayList<IMidiDeviceListener> mListeners
-                = new ArrayList<IMidiDeviceListener>();
+        private final HashMap<IBinder, IMidiDeviceListener> mListeners
+                = new HashMap<IBinder, IMidiDeviceListener>();
         // List of all device connections for this client
         private final HashMap<IBinder, DeviceConnection> mDeviceConnections
                 = new HashMap<IBinder, DeviceConnection>();
@@ -143,11 +143,13 @@
         }
 
         public void addListener(IMidiDeviceListener listener) {
-            mListeners.add(listener);
+            // Use asBinder() so that we can match it in removeListener().
+            // The listener proxy objects themselves do not match.
+            mListeners.put(listener.asBinder(), listener);
         }
 
         public void removeListener(IMidiDeviceListener listener) {
-            mListeners.remove(listener);
+            mListeners.remove(listener.asBinder());
             if (mListeners.size() == 0 && mDeviceConnections.size() == 0) {
                 close();
             }
@@ -184,7 +186,7 @@
 
             MidiDeviceInfo deviceInfo = device.getDeviceInfo();
             try {
-                for (IMidiDeviceListener listener : mListeners) {
+                for (IMidiDeviceListener listener : mListeners.values()) {
                     listener.onDeviceAdded(deviceInfo);
                 }
             } catch (RemoteException e) {
@@ -198,7 +200,7 @@
 
             MidiDeviceInfo deviceInfo = device.getDeviceInfo();
             try {
-                for (IMidiDeviceListener listener : mListeners) {
+                for (IMidiDeviceListener listener : mListeners.values()) {
                     listener.onDeviceRemoved(deviceInfo);
                 }
             } catch (RemoteException e) {
@@ -211,7 +213,7 @@
             if (!device.isUidAllowed(mUid)) return;
 
             try {
-                for (IMidiDeviceListener listener : mListeners) {
+                for (IMidiDeviceListener listener : mListeners.values()) {
                     listener.onDeviceStatusChanged(status);
                 }
             } catch (RemoteException e) {
diff --git a/services/usb/java/com/android/server/usb/UsbMidiDevice.java b/services/usb/java/com/android/server/usb/UsbMidiDevice.java
index 38ede87..cd19795 100644
--- a/services/usb/java/com/android/server/usb/UsbMidiDevice.java
+++ b/services/usb/java/com/android/server/usb/UsbMidiDevice.java
@@ -51,7 +51,7 @@
 
     private MidiDeviceServer mServer;
 
-    // event schedulers for each output port
+    // event schedulers for each input port of the physical device
     private MidiEventScheduler[] mEventSchedulers;
 
     private static final int BUFFER_SIZE = 512;
@@ -127,6 +127,14 @@
         public void setReceiver(MidiReceiver receiver) {
             mReceiver = receiver;
         }
+
+        @Override
+        public void onFlush() throws IOException {
+            MidiReceiver receiver = mReceiver;
+            if (receiver != null) {
+                receiver.flush();
+            }
+        }
     }
 
     public static UsbMidiDevice create(Context context, Bundle properties, int card, int device) {
@@ -152,9 +160,9 @@
         mSubdeviceCount = subdeviceCount;
 
         // FIXME - support devices with different number of input and output ports
-        int inputCount = subdeviceCount;
-        mInputPortReceivers = new InputReceiverProxy[inputCount];
-        for (int port = 0; port < inputCount; port++) {
+        int inputPortCount = subdeviceCount;
+        mInputPortReceivers = new InputReceiverProxy[inputPortCount];
+        for (int port = 0; port < inputPortCount; port++) {
             mInputPortReceivers[port] = new InputReceiverProxy();
         }
     }
@@ -168,14 +176,14 @@
         }
 
         mFileDescriptors = fileDescriptors;
-        int inputCount = fileDescriptors.length;
+        int inputStreamCount = fileDescriptors.length;
         // last file descriptor returned from nativeOpen() is only used for unblocking Os.poll()
         // in our input thread
-        int outputCount = fileDescriptors.length - 1;
+        int outputStreamCount = fileDescriptors.length - 1;
 
-        mPollFDs = new StructPollfd[inputCount];
-        mInputStreams = new FileInputStream[inputCount];
-        for (int i = 0; i < inputCount; i++) {
+        mPollFDs = new StructPollfd[inputStreamCount];
+        mInputStreams = new FileInputStream[inputStreamCount];
+        for (int i = 0; i < inputStreamCount; i++) {
             FileDescriptor fd = fileDescriptors[i];
             StructPollfd pollfd = new StructPollfd();
             pollfd.fd = fd;
@@ -184,9 +192,9 @@
             mInputStreams[i] = new FileInputStream(fd);
         }
 
-        mOutputStreams = new FileOutputStream[outputCount];
-        mEventSchedulers = new MidiEventScheduler[outputCount];
-        for (int i = 0; i < outputCount; i++) {
+        mOutputStreams = new FileOutputStream[outputStreamCount];
+        mEventSchedulers = new MidiEventScheduler[outputStreamCount];
+        for (int i = 0; i < outputStreamCount; i++) {
             mOutputStreams[i] = new FileOutputStream(fileDescriptors[i]);
 
             MidiEventScheduler scheduler = new MidiEventScheduler();
@@ -196,7 +204,7 @@
 
         final MidiReceiver[] outputReceivers = mServer.getOutputPortReceivers();
 
-        // Create input thread which will read from all input ports
+        // Create input thread which will read from all output ports of the physical device
         new Thread("UsbMidiDevice input thread") {
             @Override
             public void run() {
@@ -241,8 +249,8 @@
             }
         }.start();
 
-        // Create output thread for each output port
-        for (int port = 0; port < outputCount; port++) {
+        // Create output thread for each input port of the physical device
+        for (int port = 0; port < outputStreamCount; port++) {
             final MidiEventScheduler eventSchedulerF = mEventSchedulers[port];
             final FileOutputStream outputStreamF = mOutputStreams[port];
             final int portF = port;
diff --git a/tools/aapt/AaptAssets.cpp b/tools/aapt/AaptAssets.cpp
index d346731..f906ca3 100644
--- a/tools/aapt/AaptAssets.cpp
+++ b/tools/aapt/AaptAssets.cpp
@@ -1306,8 +1306,8 @@
 
 status_t AaptAssets::filter(Bundle* bundle)
 {
-    WeakResourceFilter reqFilter;
-    status_t err = reqFilter.parse(bundle->getConfigurations());
+    sp<WeakResourceFilter> reqFilter(new WeakResourceFilter());
+    status_t err = reqFilter->parse(bundle->getConfigurations());
     if (err != NO_ERROR) {
         return err;
     }
@@ -1323,12 +1323,12 @@
         preferredDensity = preferredConfig.density;
     }
 
-    if (reqFilter.isEmpty() && preferredDensity == 0) {
+    if (reqFilter->isEmpty() && preferredDensity == 0) {
         return NO_ERROR;
     }
 
     if (bundle->getVerbose()) {
-        if (!reqFilter.isEmpty()) {
+        if (!reqFilter->isEmpty()) {
             printf("Applying required filter: %s\n",
                     bundle->getConfigurations().string());
         }
@@ -1380,7 +1380,7 @@
                     continue;
                 }
                 const ResTable_config& config(file->getGroupEntry().toParams());
-                if (!reqFilter.match(config)) {
+                if (!reqFilter->match(config)) {
                     if (bundle->getVerbose()) {
                         printf("Pruning unneeded resource: %s\n",
                                 file->getPrintableSource().string());
diff --git a/tools/aapt/AaptAssets.h b/tools/aapt/AaptAssets.h
index 4fdc964..eadd48a 100644
--- a/tools/aapt/AaptAssets.h
+++ b/tools/aapt/AaptAssets.h
@@ -103,7 +103,7 @@
 {
 public:
     AaptGroupEntry() {}
-    AaptGroupEntry(const ConfigDescription& config) : mParams(config) {}
+    explicit AaptGroupEntry(const ConfigDescription& config) : mParams(config) {}
 
     bool initFromDirName(const char* dir, String8* resType);
 
@@ -312,7 +312,7 @@
         : isPublic(false), isJavaSymbol(false), typeCode(TYPE_UNKNOWN)
     {
     }
-    AaptSymbolEntry(const String8& _name)
+    explicit AaptSymbolEntry(const String8& _name)
         : name(_name), isPublic(false), isJavaSymbol(false), typeCode(TYPE_UNKNOWN)
     {
     }
diff --git a/tools/aapt/ApkBuilder.h b/tools/aapt/ApkBuilder.h
index 0d7f06b..5d3abc6 100644
--- a/tools/aapt/ApkBuilder.h
+++ b/tools/aapt/ApkBuilder.h
@@ -32,7 +32,7 @@
 
 class ApkBuilder : public android::RefBase {
 public:
-    ApkBuilder(const sp<WeakResourceFilter>& configFilter);
+    explicit ApkBuilder(const sp<WeakResourceFilter>& configFilter);
 
     /**
      * Tells the builder to generate a separate APK for resources that
diff --git a/tools/aapt/CacheUpdater.h b/tools/aapt/CacheUpdater.h
index 10a1bbc..6fa96d6 100644
--- a/tools/aapt/CacheUpdater.h
+++ b/tools/aapt/CacheUpdater.h
@@ -51,7 +51,7 @@
 class SystemCacheUpdater : public CacheUpdater {
 public:
     // Constructor to set bundle to pass to preProcessImage
-    SystemCacheUpdater (Bundle* b)
+    explicit SystemCacheUpdater (Bundle* b)
         : bundle(b) { };
 
     // Make sure all the directories along this path exist
diff --git a/tools/aapt/Command.cpp b/tools/aapt/Command.cpp
index ba4aac6..8eeb84b 100644
--- a/tools/aapt/Command.cpp
+++ b/tools/aapt/Command.cpp
@@ -248,7 +248,7 @@
 }
 
 static void printResolvedResourceAttribute(const ResTable& resTable, const ResXMLTree& tree,
-        uint32_t attrRes, String8 attrLabel, String8* outError)
+        uint32_t attrRes, const String8& attrLabel, String8* outError)
 {
     Res_value value;
     AaptXml::getResolvedResourceAttribute(resTable, tree, attrRes, &value, outError);
@@ -399,7 +399,7 @@
             ResTable::normalizeForOutput(reason.string()).string());
 }
 
-Vector<String8> getNfcAidCategories(AssetManager& assets, String8 xmlPath, bool offHost,
+Vector<String8> getNfcAidCategories(AssetManager& assets, const String8& xmlPath, bool offHost,
         String8 *outError = NULL)
 {
     Asset* aidAsset = assets.openNonAsset(xmlPath, Asset::ACCESS_BUFFER);
diff --git a/tools/aapt/ConfigDescription.h b/tools/aapt/ConfigDescription.h
index 4f999a2..09430f2 100644
--- a/tools/aapt/ConfigDescription.h
+++ b/tools/aapt/ConfigDescription.h
@@ -29,7 +29,7 @@
         size = sizeof(android::ResTable_config);
     }
 
-    ConfigDescription(const android::ResTable_config&o) {
+    ConfigDescription(const android::ResTable_config&o) {  // NOLINT(implicit)
         *static_cast<android::ResTable_config*>(this) = o;
         size = sizeof(android::ResTable_config);
     }
diff --git a/tools/aapt/CrunchCache.cpp b/tools/aapt/CrunchCache.cpp
index 0d574cf..7b8a576 100644
--- a/tools/aapt/CrunchCache.cpp
+++ b/tools/aapt/CrunchCache.cpp
@@ -94,7 +94,7 @@
     delete dw;
 }
 
-bool CrunchCache::needsUpdating(String8 relativePath) const
+bool CrunchCache::needsUpdating(const String8& relativePath) const
 {
     // Retrieve modification dates for this file entry under the source and
     // cache directory trees. The vectors will return a modification date of 0
diff --git a/tools/aapt/CrunchCache.h b/tools/aapt/CrunchCache.h
index be3da5c..4d6a169 100644
--- a/tools/aapt/CrunchCache.h
+++ b/tools/aapt/CrunchCache.h
@@ -81,7 +81,7 @@
      *          // Recrunch sourceFile out to destFile.
      *
      */
-    bool needsUpdating(String8 relativePath) const;
+    bool needsUpdating(const String8& relativePath) const;
 
     // DATA MEMBERS ====================================================
 
diff --git a/tools/aapt/FileFinder.cpp b/tools/aapt/FileFinder.cpp
index 18775c0..c9d0744 100644
--- a/tools/aapt/FileFinder.cpp
+++ b/tools/aapt/FileFinder.cpp
@@ -77,7 +77,7 @@
     return true;
 }
 
-void SystemFileFinder::checkAndAddFile(String8 path, const struct stat* stats,
+void SystemFileFinder::checkAndAddFile(const String8& path, const struct stat* stats,
                                        Vector<String8>& extensions,
                                        KeyedVector<String8,time_t>& fileStore)
 {
diff --git a/tools/aapt/FileFinder.h b/tools/aapt/FileFinder.h
index 6974aee..f405381 100644
--- a/tools/aapt/FileFinder.h
+++ b/tools/aapt/FileFinder.h
@@ -72,7 +72,7 @@
      *    time as the value.
      *
      */
-    static void checkAndAddFile(String8 path, const struct stat* stats,
+    static void checkAndAddFile(const String8& path, const struct stat* stats,
                                 Vector<String8>& extensions,
                                 KeyedVector<String8,time_t>& fileStore);
 
diff --git a/tools/aapt/IndentPrinter.h b/tools/aapt/IndentPrinter.h
index 6fc94bc..bd0edcb 100644
--- a/tools/aapt/IndentPrinter.h
+++ b/tools/aapt/IndentPrinter.h
@@ -3,7 +3,7 @@
 
 class IndentPrinter {
 public:
-    IndentPrinter(FILE* stream, int indentSize=2)
+    explicit IndentPrinter(FILE* stream, int indentSize=2)
         : mStream(stream)
         , mIndentSize(indentSize)
         , mIndent(0)
diff --git a/tools/aapt/Resource.cpp b/tools/aapt/Resource.cpp
index d05ae3c..b6f9aaf 100644
--- a/tools/aapt/Resource.cpp
+++ b/tools/aapt/Resource.cpp
@@ -394,7 +394,7 @@
     const DefaultKeyedVector<String8, sp<AaptGroup> >& groups = dir->getFiles();
     int N = groups.size();
     for (int i=0; i<N; i++) {
-        String8 leafName = groups.keyAt(i);
+        const String8& leafName = groups.keyAt(i);
         const sp<AaptGroup>& group = groups.valueAt(i);
 
         const DefaultKeyedVector<AaptGroupEntry, sp<AaptFile> >& files
@@ -417,7 +417,7 @@
             set->add(leafName, group);
             resources->add(resType, set);
         } else {
-            sp<ResourceTypeSet> set = resources->valueAt(index);
+            const sp<ResourceTypeSet>& set = resources->valueAt(index);
             index = set->indexOfKey(leafName);
             if (index < 0) {
                 if (kIsDebug) {
@@ -452,7 +452,7 @@
     int N = dirs.size();
 
     for (int i=0; i<N; i++) {
-        sp<AaptDir> d = dirs.itemAt(i);
+        const sp<AaptDir>& d = dirs.itemAt(i);
         if (kIsDebug) {
             printf("Collecting dir #%d %p: %s, leaf %s\n", i, d.get(), d->getPath().string(),
                     d->getLeaf().string());
@@ -610,7 +610,7 @@
         // get the overlay resources of the requested type
         ssize_t index = overlayRes->indexOfKey(resTypeString);
         if (index >= 0) {
-            sp<ResourceTypeSet> overlaySet = overlayRes->valueAt(index);
+            const sp<ResourceTypeSet>& overlaySet = overlayRes->valueAt(index);
 
             // for each of the resources, check for a match in the previously built
             // non-overlay "baseset".
@@ -760,7 +760,7 @@
     return addTagAttribute(node, ns8, attr8, value, errorOnFailedInsert, false);
 }
 
-static void fullyQualifyClassName(const String8& package, sp<XMLNode> node,
+static void fullyQualifyClassName(const String8& package, const sp<XMLNode>& node,
         const String16& attrName) {
     XMLNode::attribute_entry* attr = node->editAttribute(
             String16("http://schemas.android.com/apk/res/android"), attrName);
@@ -1339,7 +1339,7 @@
             ResourceDirIterator it(resources->valueAt(index), String8("values"));
             ssize_t res;
             while ((res=it.next()) == NO_ERROR) {
-                sp<AaptFile> file = it.getFile();
+                const sp<AaptFile>& file = it.getFile();
                 res = compileResourceFile(bundle, assets, file, it.getParams(), 
                                           (current!=assets), &table);
                 if (res != NO_ERROR) {
@@ -2674,7 +2674,7 @@
         String8 dest(bundle->getRClassDir());
 
         if (bundle->getMakePackageDirs()) {
-            String8 pkg(package);
+            const String8& pkg(package);
             const char* last = pkg.string();
             const char* s = last-1;
             do {
diff --git a/tools/aapt/ResourceFilter.h b/tools/aapt/ResourceFilter.h
index d6430c0..40d5b75 100644
--- a/tools/aapt/ResourceFilter.h
+++ b/tools/aapt/ResourceFilter.h
@@ -78,7 +78,7 @@
 class StrongResourceFilter : public ResourceFilter {
 public:
     StrongResourceFilter() {}
-    StrongResourceFilter(const std::set<ConfigDescription>& configs)
+    explicit StrongResourceFilter(const std::set<ConfigDescription>& configs)
         : mConfigs(configs) {}
 
     android::status_t parse(const android::String8& str);
@@ -106,7 +106,7 @@
  */
 class InverseResourceFilter : public ResourceFilter {
 public:
-    InverseResourceFilter(const android::sp<ResourceFilter>& filter)
+    explicit InverseResourceFilter(const android::sp<ResourceFilter>& filter)
         : mFilter(filter) {}
 
     bool match(const android::ResTable_config& config) const {
diff --git a/tools/aapt/ResourceTable.cpp b/tools/aapt/ResourceTable.cpp
index d5a09d8..6601872 100644
--- a/tools/aapt/ResourceTable.cpp
+++ b/tools/aapt/ResourceTable.cpp
@@ -4023,7 +4023,7 @@
     
     j = 0;
     for (i=0; i<N; i++) {
-        sp<ConfigList> e = origOrder.itemAt(i);
+        const sp<ConfigList>& e = origOrder.itemAt(i);
         // There will always be enough room for the remaining entries.
         while (mOrderedConfigs.itemAt(j) != NULL) {
             j++;
@@ -4145,7 +4145,7 @@
 
     size_t j=0;
     for (i=0; i<N; i++) {
-        sp<Type> t = origOrder.itemAt(i);
+        const sp<Type>& t = origOrder.itemAt(i);
         // There will always be enough room for the remaining types.
         while (mOrderedTypes.itemAt(j) != NULL) {
             j++;
@@ -4577,7 +4577,7 @@
                         c->getEntries();
                 const size_t entryCount = entries.size();
                 for (size_t ei = 0; ei < entryCount; ei++) {
-                    sp<Entry> e = entries.valueAt(ei);
+                    const sp<Entry>& e = entries.valueAt(ei);
                     if (e == NULL || e->getType() != Entry::TYPE_BAG) {
                         continue;
                     }
diff --git a/tools/aapt/StringPool.h b/tools/aapt/StringPool.h
index 4b0d920..625b0bf 100644
--- a/tools/aapt/StringPool.h
+++ b/tools/aapt/StringPool.h
@@ -41,7 +41,7 @@
 public:
     struct entry {
         entry() : offset(0) { }
-        entry(const String16& _value) : value(_value), offset(0), hasStyles(false) { }
+        explicit entry(const String16& _value) : value(_value), offset(0), hasStyles(false) { }
         entry(const entry& o) : value(o.value), offset(o.offset),
                 hasStyles(o.hasStyles), indices(o.indices),
                 configTypeName(o.configTypeName), configs(o.configs) { }
diff --git a/tools/aapt/WorkQueue.h b/tools/aapt/WorkQueue.h
index d38f05d..ab5f969 100644
--- a/tools/aapt/WorkQueue.h
+++ b/tools/aapt/WorkQueue.h
@@ -47,7 +47,7 @@
     };
 
     /* Creates a work queue with the specified maximum number of work threads. */
-    WorkQueue(size_t maxThreads, bool canCallJava = true);
+    explicit WorkQueue(size_t maxThreads, bool canCallJava = true);
 
     /* Destroys the work queue.
      * Cancels pending work and waits for all remaining threads to complete.
diff --git a/tools/aapt/XMLNode.cpp b/tools/aapt/XMLNode.cpp
index dc08eb8..e87a05f 100644
--- a/tools/aapt/XMLNode.cpp
+++ b/tools/aapt/XMLNode.cpp
@@ -67,7 +67,7 @@
 static const String16 RESOURCES_PRV_PREFIX(RESOURCES_ROOT_PRV_NAMESPACE);
 static const String16 RESOURCES_TOOLS_NAMESPACE("http://schemas.android.com/tools");
 
-String16 getNamespaceResourcePackage(String16 appPackage, String16 namespaceUri, bool* outIsPublic)
+String16 getNamespaceResourcePackage(const String16& appPackage, const String16& namespaceUri, bool* outIsPublic)
 {
     //printf("%s starts with %s?\n", String8(namespaceUri).string(),
     //       String8(RESOURCES_PREFIX).string());
@@ -98,7 +98,7 @@
 
 status_t hasSubstitutionErrors(const char* fileName,
                                ResXMLTree* inXml,
-                               String16 str16)
+                               const String16& str16)
 {
     const char16_t* str = str16.string();
     const char16_t* p = str;
diff --git a/tools/aapt/XMLNode.h b/tools/aapt/XMLNode.h
index b9e5cd5..d9d86ec 100644
--- a/tools/aapt/XMLNode.h
+++ b/tools/aapt/XMLNode.h
@@ -176,7 +176,7 @@
     XMLNode(const String8& filename, const String16& s1, const String16& s2, bool isNamespace);
     
     // Creating a CDATA node.
-    XMLNode(const String8& filename);
+    explicit XMLNode(const String8& filename);
     
     status_t collect_strings(StringPool* dest, Vector<uint32_t>* outResIds,
             bool stripComments, bool stripRawValues) const;
diff --git a/tools/aapt/pseudolocalize.h b/tools/aapt/pseudolocalize.h
index 1faecd1..9bb1fd8 100644
--- a/tools/aapt/pseudolocalize.h
+++ b/tools/aapt/pseudolocalize.h
@@ -43,7 +43,7 @@
 
 class Pseudolocalizer {
  public:
-  Pseudolocalizer(PseudolocalizationMethod m);
+  explicit Pseudolocalizer(PseudolocalizationMethod m);
   ~Pseudolocalizer() { if (mImpl) delete mImpl; }
   void setMethod(PseudolocalizationMethod m);
   String16 start() { return mImpl->start(); }
diff --git a/tools/aapt2/BigBuffer.h b/tools/aapt2/BigBuffer.h
index 8b6569c..b4b42b4 100644
--- a/tools/aapt2/BigBuffer.h
+++ b/tools/aapt2/BigBuffer.h
@@ -62,7 +62,7 @@
      * Create a BigBuffer with block allocation sizes
      * of blockSize.
      */
-    BigBuffer(size_t blockSize);
+    explicit BigBuffer(size_t blockSize);
 
     BigBuffer(const BigBuffer&) = delete; // No copying.
 
diff --git a/tools/aapt2/BindingXmlPullParser.h b/tools/aapt2/BindingXmlPullParser.h
index cfb16ef..b34c00b 100644
--- a/tools/aapt2/BindingXmlPullParser.h
+++ b/tools/aapt2/BindingXmlPullParser.h
@@ -27,7 +27,7 @@
 
 class BindingXmlPullParser : public XmlPullParser {
 public:
-    BindingXmlPullParser(const std::shared_ptr<XmlPullParser>& parser);
+    explicit BindingXmlPullParser(const std::shared_ptr<XmlPullParser>& parser);
     BindingXmlPullParser(const BindingXmlPullParser& rhs) = delete;
 
     Event getEvent() const override;
diff --git a/tools/aapt2/ConfigDescription.h b/tools/aapt2/ConfigDescription.h
index 67b4b75..1250ad2 100644
--- a/tools/aapt2/ConfigDescription.h
+++ b/tools/aapt2/ConfigDescription.h
@@ -46,7 +46,7 @@
     static void applyVersionForCompatibility(ConfigDescription* config);
 
     ConfigDescription();
-    ConfigDescription(const android::ResTable_config& o);
+    ConfigDescription(const android::ResTable_config& o);  // NOLINT(implicit)
     ConfigDescription(const ConfigDescription& o);
     ConfigDescription(ConfigDescription&& o);
 
diff --git a/tools/aapt2/Flag.cpp b/tools/aapt2/Flag.cpp
index 76985da..774c1ba 100644
--- a/tools/aapt2/Flag.cpp
+++ b/tools/aapt2/Flag.cpp
@@ -32,20 +32,20 @@
 }
 
 void optionalFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<void(const StringPiece&)> action) {
+                  const std::function<void(const StringPiece&)>& action) {
     sFlags.push_back(Flag{
             name.toString(), description.toString(), wrap(action),
             false, nullptr, false, false });
 }
 
 void requiredFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<void(const StringPiece&)> action) {
+                  const std::function<void(const StringPiece&)>& action) {
     sFlags.push_back(Flag{ name.toString(), description.toString(), wrap(action),
             true, nullptr, false, false });
 }
 
 void requiredFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<bool(const StringPiece&, std::string*)> action) {
+                  const std::function<bool(const StringPiece&, std::string*)>& action) {
     sFlags.push_back(Flag{ name.toString(), description.toString(), action,
             true, nullptr, false, false });
 }
diff --git a/tools/aapt2/Flag.h b/tools/aapt2/Flag.h
index e863742..57aceb4 100644
--- a/tools/aapt2/Flag.h
+++ b/tools/aapt2/Flag.h
@@ -11,13 +11,13 @@
 namespace flag {
 
 void requiredFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<void(const StringPiece&)> action);
+                  const std::function<void(const StringPiece&)>& action);
 
 void requiredFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<bool(const StringPiece&, std::string*)> action);
+                  const std::function<bool(const StringPiece&, std::string*)>& action);
 
 void optionalFlag(const StringPiece& name, const StringPiece& description,
-                  std::function<void(const StringPiece&)> action);
+                  const std::function<void(const StringPiece&)>& action);
 
 void optionalSwitch(const StringPiece& name, const StringPiece& description, bool resultWhenSet,
                     bool* result);
diff --git a/tools/aapt2/JavaClassGenerator_test.cpp b/tools/aapt2/JavaClassGenerator_test.cpp
index b385ff4..3d1bf89 100644
--- a/tools/aapt2/JavaClassGenerator_test.cpp
+++ b/tools/aapt2/JavaClassGenerator_test.cpp
@@ -35,7 +35,7 @@
         mTable->setPackageId(0x01);
     }
 
-    bool addResource(const ResourceNameRef& name, ResourceId id) {
+    bool addResource(const ResourceNameRef& name, const ResourceId& id) {
         return mTable->addResource(name, id, {}, SourceLine{ "test.xml", 21 },
                                    util::make_unique<Id>());
     }
diff --git a/tools/aapt2/Logger.h b/tools/aapt2/Logger.h
index 1d437eb..27a79eb 100644
--- a/tools/aapt2/Logger.h
+++ b/tools/aapt2/Logger.h
@@ -56,7 +56,7 @@
 
 class SourceLogger {
 public:
-    SourceLogger(const Source& source);
+    SourceLogger(const Source& source);  // NOLINT(implicit)
 
     std::ostream& error();
     std::ostream& error(size_t line);
diff --git a/tools/aapt2/ManifestMerger.h b/tools/aapt2/ManifestMerger.h
index c6219db..9d34479 100644
--- a/tools/aapt2/ManifestMerger.h
+++ b/tools/aapt2/ManifestMerger.h
@@ -15,7 +15,7 @@
     struct Options {
     };
 
-    ManifestMerger(const Options& options);
+    explicit ManifestMerger(const Options& options);
 
     bool setAppManifest(const Source& source, const std::u16string& package,
                         std::unique_ptr<xml::Node> root);
diff --git a/tools/aapt2/ManifestParser.cpp b/tools/aapt2/ManifestParser.cpp
index b8f0a43..5b539c6 100644
--- a/tools/aapt2/ManifestParser.cpp
+++ b/tools/aapt2/ManifestParser.cpp
@@ -24,7 +24,7 @@
 
 namespace aapt {
 
-bool ManifestParser::parse(const Source& source, std::shared_ptr<XmlPullParser> parser,
+bool ManifestParser::parse(const Source& source, const std::shared_ptr<XmlPullParser>& parser,
                            AppInfo* outInfo) {
     SourceLogger logger = { source };
 
@@ -70,7 +70,7 @@
     return true;
 }
 
-bool ManifestParser::parseManifest(SourceLogger& logger, std::shared_ptr<XmlPullParser> parser,
+bool ManifestParser::parseManifest(SourceLogger& logger, const std::shared_ptr<XmlPullParser>& parser,
                                    AppInfo* outInfo) {
     auto attrIter = parser->findAttribute(u"", u"package");
     if (attrIter == parser->endAttributes() || attrIter->value.empty()) {
diff --git a/tools/aapt2/ManifestParser.h b/tools/aapt2/ManifestParser.h
index f2e43d4..76201ab 100644
--- a/tools/aapt2/ManifestParser.h
+++ b/tools/aapt2/ManifestParser.h
@@ -33,10 +33,10 @@
     ManifestParser() = default;
     ManifestParser(const ManifestParser&) = delete;
 
-    bool parse(const Source& source, std::shared_ptr<XmlPullParser> parser, AppInfo* outInfo);
+    bool parse(const Source& source, const std::shared_ptr<XmlPullParser>& parser, AppInfo* outInfo);
 
 private:
-    bool parseManifest(SourceLogger& logger, std::shared_ptr<XmlPullParser> parser,
+    bool parseManifest(SourceLogger& logger, const std::shared_ptr<XmlPullParser>& parser,
                        AppInfo* outInfo);
 };
 
diff --git a/tools/aapt2/ManifestValidator.h b/tools/aapt2/ManifestValidator.h
index 3188784..ecbecde 100644
--- a/tools/aapt2/ManifestValidator.h
+++ b/tools/aapt2/ManifestValidator.h
@@ -28,7 +28,7 @@
 
 class ManifestValidator {
 public:
-    ManifestValidator(const android::ResTable& table);
+    explicit ManifestValidator(const android::ResTable& table);
     ManifestValidator(const ManifestValidator&) = delete;
 
     bool validate(const Source& source, android::ResXMLParser* parser);
diff --git a/tools/aapt2/Maybe.h b/tools/aapt2/Maybe.h
index ff6625f..fe8e9a7 100644
--- a/tools/aapt2/Maybe.h
+++ b/tools/aapt2/Maybe.h
@@ -41,12 +41,12 @@
     Maybe(const Maybe& rhs);
 
     template <typename U>
-    Maybe(const Maybe<U>& rhs);
+    Maybe(const Maybe<U>& rhs);  // NOLINT(implicit)
 
     Maybe(Maybe&& rhs);
 
     template <typename U>
-    Maybe(Maybe<U>&& rhs);
+    Maybe(Maybe<U>&& rhs);  // NOLINT(implicit)
 
     Maybe& operator=(const Maybe& rhs);
 
@@ -61,12 +61,12 @@
     /**
      * Construct a Maybe holding a value.
      */
-    Maybe(const T& value);
+    Maybe(const T& value);  // NOLINT(implicit)
 
     /**
      * Construct a Maybe holding a value.
      */
-    Maybe(T&& value);
+    Maybe(T&& value);  // NOLINT(implicit)
 
     /**
      * True if this holds a value, false if
diff --git a/tools/aapt2/Resource.h b/tools/aapt2/Resource.h
index fa9ac07..b948ea5 100644
--- a/tools/aapt2/Resource.h
+++ b/tools/aapt2/Resource.h
@@ -94,7 +94,7 @@
     ResourceNameRef() = default;
     ResourceNameRef(const ResourceNameRef&) = default;
     ResourceNameRef(ResourceNameRef&&) = default;
-    ResourceNameRef(const ResourceName& rhs);
+    ResourceNameRef(const ResourceName& rhs);  // NOLINT(implicit)
     ResourceNameRef(const StringPiece16& p, ResourceType t, const StringPiece16& e);
     ResourceNameRef& operator=(const ResourceNameRef& rhs) = default;
     ResourceNameRef& operator=(ResourceNameRef&& rhs) = default;
@@ -124,7 +124,7 @@
 
     ResourceId();
     ResourceId(const ResourceId& rhs);
-    ResourceId(uint32_t resId);
+    ResourceId(uint32_t resId);  // NOLINT(implicit)
     ResourceId(size_t p, size_t t, size_t e);
 
     bool isValid() const;
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 13f916b..ec7bfa5 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -391,7 +391,7 @@
 
 std::unique_ptr<Item> ResourceParser::parseItemForAttribute(
         const StringPiece16& value, uint32_t typeMask,
-        std::function<void(const ResourceName&)> onCreateReference) {
+        const std::function<void(const ResourceName&)>& onCreateReference) {
     std::unique_ptr<BinaryPrimitive> nullOrEmpty = tryParseNullOrEmpty(value);
     if (nullOrEmpty) {
         return std::move(nullOrEmpty);
@@ -451,7 +451,7 @@
  */
 std::unique_ptr<Item> ResourceParser::parseItemForAttribute(
         const StringPiece16& str, const Attribute& attr,
-        std::function<void(const ResourceName&)> onCreateReference) {
+        const std::function<void(const ResourceName&)>& onCreateReference) {
     const uint32_t typeMask = attr.typeMask;
     std::unique_ptr<Item> value = parseItemForAttribute(str, typeMask, onCreateReference);
     if (value) {
diff --git a/tools/aapt2/ResourceParser.h b/tools/aapt2/ResourceParser.h
index 7618999..6fd58fa 100644
--- a/tools/aapt2/ResourceParser.h
+++ b/tools/aapt2/ResourceParser.h
@@ -133,11 +133,11 @@
      */
     static std::unique_ptr<Item> parseItemForAttribute(
             const StringPiece16& value, const Attribute& attr,
-            std::function<void(const ResourceName&)> onCreateReference = {});
+            const std::function<void(const ResourceName&)>& onCreateReference = {});
 
     static std::unique_ptr<Item> parseItemForAttribute(
             const StringPiece16& value, uint32_t typeMask,
-            std::function<void(const ResourceName&)> onCreateReference = {});
+            const std::function<void(const ResourceName&)>& onCreateReference = {});
 
     static uint32_t androidTypeToAttributeTypeMask(uint16_t type);
 
diff --git a/tools/aapt2/ResourceTable.cpp b/tools/aapt2/ResourceTable.cpp
index c93ecc7..eeec8da 100644
--- a/tools/aapt2/ResourceTable.cpp
+++ b/tools/aapt2/ResourceTable.cpp
@@ -151,7 +151,7 @@
     return addResourceImpl(name, ResourceId{}, config, source, std::move(value), kValidNameChars);
 }
 
-bool ResourceTable::addResource(const ResourceNameRef& name, const ResourceId resId,
+bool ResourceTable::addResource(const ResourceNameRef& name, const ResourceId& resId,
                                 const ConfigDescription& config, const SourceLine& source,
                                 std::unique_ptr<Value> value) {
     return addResourceImpl(name, resId, config, source, std::move(value), kValidNameChars);
@@ -165,7 +165,7 @@
                            kValidNameMangledChars);
 }
 
-bool ResourceTable::addResourceImpl(const ResourceNameRef& name, const ResourceId resId,
+bool ResourceTable::addResourceImpl(const ResourceNameRef& name, const ResourceId& resId,
                                     const ConfigDescription& config, const SourceLine& source,
                                     std::unique_ptr<Value> value, const char16_t* validChars) {
     if (!name.package.empty() && name.package != mPackage) {
@@ -255,17 +255,17 @@
     return true;
 }
 
-bool ResourceTable::markPublic(const ResourceNameRef& name, const ResourceId resId,
+bool ResourceTable::markPublic(const ResourceNameRef& name, const ResourceId& resId,
                                const SourceLine& source) {
     return markPublicImpl(name, resId, source, kValidNameChars);
 }
 
-bool ResourceTable::markPublicAllowMangled(const ResourceNameRef& name, const ResourceId resId,
+bool ResourceTable::markPublicAllowMangled(const ResourceNameRef& name, const ResourceId& resId,
                                            const SourceLine& source) {
     return markPublicImpl(name, resId, source, kValidNameMangledChars);
 }
 
-bool ResourceTable::markPublicImpl(const ResourceNameRef& name, const ResourceId resId,
+bool ResourceTable::markPublicImpl(const ResourceNameRef& name, const ResourceId& resId,
                                    const SourceLine& source, const char16_t* validChars) {
     if (!name.package.empty() && name.package != mPackage) {
         Logger::error(source)
diff --git a/tools/aapt2/ResourceTable.h b/tools/aapt2/ResourceTable.h
index 706f56a..e00fb3e 100644
--- a/tools/aapt2/ResourceTable.h
+++ b/tools/aapt2/ResourceTable.h
@@ -81,8 +81,8 @@
      */
     std::vector<ResourceConfigValue> values;
 
-    inline ResourceEntry(const StringPiece16& _name);
-    inline ResourceEntry(const ResourceEntry* rhs);
+    inline explicit ResourceEntry(const StringPiece16& _name);
+    inline explicit ResourceEntry(const ResourceEntry* rhs);
 };
 
 /**
@@ -115,8 +115,8 @@
      */
     std::vector<std::unique_ptr<ResourceEntry>> entries;
 
-    ResourceTableType(const ResourceType _type);
-    ResourceTableType(const ResourceTableType* rhs);
+    explicit ResourceTableType(const ResourceType _type);
+    explicit ResourceTableType(const ResourceTableType* rhs);
 };
 
 /**
@@ -151,12 +151,12 @@
     bool addResourceAllowMangled(const ResourceNameRef& name, const ConfigDescription& config,
                                  const SourceLine& source, std::unique_ptr<Value> value);
 
-    bool addResource(const ResourceNameRef& name, const ResourceId resId,
+    bool addResource(const ResourceNameRef& name, const ResourceId& resId,
                      const ConfigDescription& config, const SourceLine& source,
                      std::unique_ptr<Value> value);
 
-    bool markPublic(const ResourceNameRef& name, const ResourceId resId, const SourceLine& source);
-    bool markPublicAllowMangled(const ResourceNameRef& name, const ResourceId resId,
+    bool markPublic(const ResourceNameRef& name, const ResourceId& resId, const SourceLine& source);
+    bool markPublicAllowMangled(const ResourceNameRef& name, const ResourceId& resId,
                                 const SourceLine& source);
 
     /*
@@ -186,10 +186,10 @@
     std::unique_ptr<ResourceEntry>& findOrCreateEntry(std::unique_ptr<ResourceTableType>& type,
                                                       const StringPiece16& name);
 
-    bool addResourceImpl(const ResourceNameRef& name, const ResourceId resId,
+    bool addResourceImpl(const ResourceNameRef& name, const ResourceId& resId,
                          const ConfigDescription& config, const SourceLine& source,
                          std::unique_ptr<Value> value, const char16_t* validChars);
-    bool markPublicImpl(const ResourceNameRef& name, const ResourceId resId,
+    bool markPublicImpl(const ResourceNameRef& name, const ResourceId& resId,
                         const SourceLine& source, const char16_t* validChars);
 
     std::u16string mPackage;
diff --git a/tools/aapt2/ResourceValues.h b/tools/aapt2/ResourceValues.h
index ef6594e..2635e34 100644
--- a/tools/aapt2/ResourceValues.h
+++ b/tools/aapt2/ResourceValues.h
@@ -128,8 +128,8 @@
     bool privateReference = false;
 
     Reference();
-    Reference(const ResourceNameRef& n, Type type = Type::kResource);
-    Reference(const ResourceId& i, Type type = Type::kResource);
+    Reference(const ResourceNameRef& n, Type type = Type::kResource);  // NOLINT(implicit)
+    explicit Reference(const ResourceId& i, Type type = Type::kResource);
 
     bool flatten(android::Res_value& outValue) const override;
     Reference* clone(StringPool* newPool) const override;
@@ -154,7 +154,7 @@
 struct RawString : public BaseItem<RawString> {
     StringPool::Ref value;
 
-    RawString(const StringPool::Ref& ref);
+    explicit RawString(const StringPool::Ref& ref);
 
     bool flatten(android::Res_value& outValue) const override;
     RawString* clone(StringPool* newPool) const override;
@@ -164,7 +164,7 @@
 struct String : public BaseItem<String> {
     StringPool::Ref value;
 
-    String(const StringPool::Ref& ref);
+    explicit String(const StringPool::Ref& ref);
 
     bool flatten(android::Res_value& outValue) const override;
     String* clone(StringPool* newPool) const override;
@@ -174,7 +174,7 @@
 struct StyledString : public BaseItem<StyledString> {
     StringPool::StyleRef value;
 
-    StyledString(const StringPool::StyleRef& ref);
+    explicit StyledString(const StringPool::StyleRef& ref);
 
     bool flatten(android::Res_value& outValue) const override;
     StyledString* clone(StringPool* newPool) const override;
@@ -185,7 +185,7 @@
     StringPool::Ref path;
 
     FileReference() = default;
-    FileReference(const StringPool::Ref& path);
+    explicit FileReference(const StringPool::Ref& path);
 
     bool flatten(android::Res_value& outValue) const override;
     FileReference* clone(StringPool* newPool) const override;
@@ -199,7 +199,7 @@
     android::Res_value value;
 
     BinaryPrimitive() = default;
-    BinaryPrimitive(const android::Res_value& val);
+    explicit BinaryPrimitive(const android::Res_value& val);
 
     bool flatten(android::Res_value& outValue) const override;
     BinaryPrimitive* clone(StringPool* newPool) const override;
@@ -218,7 +218,7 @@
     uint32_t maxInt;
     std::vector<Symbol> symbols;
 
-    Attribute(bool w, uint32_t t = 0u);
+    explicit Attribute(bool w, uint32_t t = 0u);
 
     bool isWeak() const override;
     virtual Attribute* clone(StringPool* newPool) const override;
@@ -388,7 +388,7 @@
 struct ValueVisitorFunc : ValueVisitor {
     TFunc func;
 
-    ValueVisitorFunc(TFunc f) : func(f) {
+    explicit ValueVisitorFunc(TFunc f) : func(f) {
     }
 
     void visit(T& value, ValueVisitorArgs&) override {
@@ -403,7 +403,7 @@
 struct ConstValueVisitorFunc : ConstValueVisitor {
     TFunc func;
 
-    ConstValueVisitorFunc(TFunc f) : func(f) {
+    explicit ConstValueVisitorFunc(TFunc f) : func(f) {
     }
 
     void visit(const T& value, ValueVisitorArgs&) override {
diff --git a/tools/aapt2/ScopedXmlPullParser.h b/tools/aapt2/ScopedXmlPullParser.h
index a040f60..5b6b321 100644
--- a/tools/aapt2/ScopedXmlPullParser.h
+++ b/tools/aapt2/ScopedXmlPullParser.h
@@ -47,7 +47,7 @@
  */
 class ScopedXmlPullParser : public XmlPullParser {
 public:
-    ScopedXmlPullParser(XmlPullParser* parser);
+    explicit ScopedXmlPullParser(XmlPullParser* parser);
     ScopedXmlPullParser(const ScopedXmlPullParser&) = delete;
     ScopedXmlPullParser& operator=(const ScopedXmlPullParser&) = delete;
     ~ScopedXmlPullParser();
diff --git a/tools/aapt2/SdkConstants.cpp b/tools/aapt2/SdkConstants.cpp
index 9bdae49..cd75e98 100644
--- a/tools/aapt2/SdkConstants.cpp
+++ b/tools/aapt2/SdkConstants.cpp
@@ -48,7 +48,7 @@
     return p.first < entryId;
 }
 
-size_t findAttributeSdkLevel(ResourceId id) {
+size_t findAttributeSdkLevel(const ResourceId& id) {
     if (id.packageId() != 0x01 && id.typeId() != 0x01) {
         return 0;
     }
diff --git a/tools/aapt2/SdkConstants.h b/tools/aapt2/SdkConstants.h
index 803da03..4a57828 100644
--- a/tools/aapt2/SdkConstants.h
+++ b/tools/aapt2/SdkConstants.h
@@ -44,7 +44,7 @@
     SDK_LOLLIPOP_MR1 = 22,
 };
 
-size_t findAttributeSdkLevel(ResourceId id);
+size_t findAttributeSdkLevel(const ResourceId& id);
 size_t findAttributeSdkLevel(const ResourceName& name);
 
 } // namespace aapt
diff --git a/tools/aapt2/SourceXmlPullParser.h b/tools/aapt2/SourceXmlPullParser.h
index d8ed459..66d54a8 100644
--- a/tools/aapt2/SourceXmlPullParser.h
+++ b/tools/aapt2/SourceXmlPullParser.h
@@ -30,7 +30,7 @@
 
 class SourceXmlPullParser : public XmlPullParser {
 public:
-    SourceXmlPullParser(std::istream& in);
+    explicit SourceXmlPullParser(std::istream& in);
     SourceXmlPullParser(const SourceXmlPullParser& rhs) = delete;
     ~SourceXmlPullParser();
 
diff --git a/tools/aapt2/StringPiece.h b/tools/aapt2/StringPiece.h
index e2a1597..2221e23 100644
--- a/tools/aapt2/StringPiece.h
+++ b/tools/aapt2/StringPiece.h
@@ -39,8 +39,8 @@
 
     BasicStringPiece();
     BasicStringPiece(const BasicStringPiece<TChar>& str);
-    BasicStringPiece(const std::basic_string<TChar>& str);
-    BasicStringPiece(const TChar* str);
+    BasicStringPiece(const std::basic_string<TChar>& str);  // NOLINT(implicit)
+    BasicStringPiece(const TChar* str);  // NOLINT(implicit)
     BasicStringPiece(const TChar* str, size_t len);
 
     BasicStringPiece<TChar>& operator=(const BasicStringPiece<TChar>& rhs);
diff --git a/tools/aapt2/StringPool.h b/tools/aapt2/StringPool.h
index 14304a6..610a553 100644
--- a/tools/aapt2/StringPool.h
+++ b/tools/aapt2/StringPool.h
@@ -65,7 +65,7 @@
     private:
         friend class StringPool;
 
-        Ref(Entry* entry);
+        explicit Ref(Entry* entry);
 
         Entry* mEntry;
     };
@@ -88,7 +88,7 @@
     private:
         friend class StringPool;
 
-        StyleRef(StyleEntry* entry);
+        explicit StyleRef(StyleEntry* entry);
 
         StyleEntry* mEntry;
     };
diff --git a/tools/aapt2/TableFlattener.h b/tools/aapt2/TableFlattener.h
index ccbb737..55914db 100644
--- a/tools/aapt2/TableFlattener.h
+++ b/tools/aapt2/TableFlattener.h
@@ -46,7 +46,7 @@
         bool useExtendedChunks = true;
     };
 
-    TableFlattener(Options options);
+    explicit TableFlattener(Options options);
 
     bool flatten(BigBuffer* out, const ResourceTable& table);
 
diff --git a/tools/aapt2/Util.h b/tools/aapt2/Util.h
index 9cdb152..b2b6d8e 100644
--- a/tools/aapt2/Util.h
+++ b/tools/aapt2/Util.h
@@ -311,7 +311,7 @@
  * In the aapt namespace for lookup.
  */
 inline ::std::ostream& operator<<(::std::ostream& out,
-                                  ::std::function<::std::ostream&(::std::ostream&)> f) {
+                                  const ::std::function<::std::ostream&(::std::ostream&)>& f) {
     return f(out);
 }
 
diff --git a/tools/aapt2/XliffXmlPullParser.h b/tools/aapt2/XliffXmlPullParser.h
index 7791227..e89d8bb 100644
--- a/tools/aapt2/XliffXmlPullParser.h
+++ b/tools/aapt2/XliffXmlPullParser.h
@@ -30,7 +30,7 @@
  */
 class XliffXmlPullParser : public XmlPullParser {
 public:
-    XliffXmlPullParser(const std::shared_ptr<XmlPullParser>& parser);
+    explicit XliffXmlPullParser(const std::shared_ptr<XmlPullParser>& parser);
     XliffXmlPullParser(const XliffXmlPullParser& rhs) = delete;
 
     Event getEvent() const override;
diff --git a/tools/aapt2/XmlDom.h b/tools/aapt2/XmlDom.h
index 035e7c4..105a074 100644
--- a/tools/aapt2/XmlDom.h
+++ b/tools/aapt2/XmlDom.h
@@ -52,7 +52,7 @@
     std::u16string comment;
     std::vector<std::unique_ptr<Node>> children;
 
-    Node(NodeType type);
+    explicit Node(NodeType type);
     void addChild(std::unique_ptr<Node> child);
     virtual std::unique_ptr<Node> clone() const = 0;
     virtual void accept(Visitor* visitor) = 0;
@@ -65,7 +65,7 @@
  */
 template <typename Derived>
 struct BaseNode : public Node {
-    BaseNode(NodeType t);
+    explicit BaseNode(NodeType t);
     virtual void accept(Visitor* visitor) override;
 };
 
diff --git a/tools/split-select/SplitSelector.h b/tools/split-select/SplitSelector.h
index 193fda7..120354f 100644
--- a/tools/split-select/SplitSelector.h
+++ b/tools/split-select/SplitSelector.h
@@ -29,7 +29,7 @@
 class SplitSelector {
 public:
     SplitSelector();
-    SplitSelector(const android::Vector<SplitDescription>& splits);
+    explicit SplitSelector(const android::Vector<SplitDescription>& splits);
 
     android::Vector<SplitDescription> getBestSplits(const SplitDescription& target) const;