Make HWUI's surface Reliable^TM

You won't believe this one weird trick to make
EGL_BAD_ALLOC errors never happen again! libgui
hates it!

Bug: way too many to list
Test: none - there probably should be, though
Change-Id: I8e7fc3e584f90c01e0fd932497604a1d93710ba6
diff --git a/libs/hwui/renderthread/CanvasContext.cpp b/libs/hwui/renderthread/CanvasContext.cpp
index f1a522e..182233f 100644
--- a/libs/hwui/renderthread/CanvasContext.cpp
+++ b/libs/hwui/renderthread/CanvasContext.cpp
@@ -142,7 +142,12 @@
 void CanvasContext::setSurface(sp<Surface>&& surface) {
     ATRACE_CALL();
 
-    mNativeSurface = std::move(surface);
+    if (surface) {
+        mNativeSurface = new ReliableSurface{std::move(surface)};
+        mNativeSurface->setDequeueTimeout(500_ms);
+    } else {
+        mNativeSurface = nullptr;
+    }
 
     ColorMode colorMode = mWideColorGamut ? ColorMode::WideColorGamut : ColorMode::SRGB;
     bool hasSurface = mRenderPipeline->setSurface(mNativeSurface.get(), mSwapBehavior, colorMode);
@@ -285,6 +290,7 @@
 
     info.damageAccumulator = &mDamageAccumulator;
     info.layerUpdateQueue = &mLayerUpdateQueue;
+    info.out.canDrawThisFrame = true;
 
     mAnimationContext->startFrame(info.mode);
     mRenderPipeline->onPrepareTree();
@@ -304,7 +310,7 @@
 
     mIsDirty = true;
 
-    if (CC_UNLIKELY(!mNativeSurface.get())) {
+    if (CC_UNLIKELY(!hasSurface())) {
         mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
         info.out.canDrawThisFrame = false;
         return;
@@ -323,27 +329,6 @@
             // the deadline for RT animations
             info.out.canDrawThisFrame = false;
         }
-        /* This logic exists to try and recover from a display latch miss, which essentially
-         * results in the bufferqueue being double-buffered instead of triple-buffered.
-         * SurfaceFlinger itself now tries to handle & recover from this situation, so this
-         * logic should no longer be necessary. As it's occasionally triggering when
-         * undesired disable it.
-         * TODO: Remove this entirely if the results are solid.
-        else if (vsyncDelta >= mRenderThread.timeLord().frameIntervalNanos() * 3 ||
-                   (latestVsync - mLastDropVsync) < 500_ms) {
-            // It's been several frame intervals, assume the buffer queue is fine
-            // or the last drop was too recent
-            info.out.canDrawThisFrame = true;
-        } else {
-            info.out.canDrawThisFrame = !isSwapChainStuffed();
-            if (!info.out.canDrawThisFrame) {
-                // dropping frame
-                mLastDropVsync = mRenderThread.timeLord().latestVsync();
-            }
-        }
-        */
-    } else {
-        info.out.canDrawThisFrame = true;
     }
 
     // TODO: Do we need to abort out if the backdrop is added but not ready? Should that even
@@ -354,6 +339,19 @@
 
     if (!info.out.canDrawThisFrame) {
         mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
+        return;
+    }
+
+    int err = mNativeSurface->reserveNext();
+    if (err != OK) {
+        mCurrentFrameInfo->addFlag(FrameInfoFlags::SkippedFrame);
+        info.out.canDrawThisFrame = false;
+        ALOGW("reserveNext failed, error = %d", err);
+        if (err != TIMED_OUT) {
+            // A timed out surface can still recover, but assume others are permanently dead.
+            setSurface(nullptr);
+        }
+        return;
     }
 
     bool postedFrameCallback = false;
diff --git a/libs/hwui/renderthread/CanvasContext.h b/libs/hwui/renderthread/CanvasContext.h
index 70be4a6..9e7abf4 100644
--- a/libs/hwui/renderthread/CanvasContext.h
+++ b/libs/hwui/renderthread/CanvasContext.h
@@ -25,6 +25,7 @@
 #include "IRenderPipeline.h"
 #include "LayerUpdateQueue.h"
 #include "RenderNode.h"
+#include "ReliableSurface.h"
 #include "renderthread/RenderTask.h"
 #include "renderthread/RenderThread.h"
 #include "thread/Task.h"
@@ -219,7 +220,7 @@
     EGLint mLastFrameHeight = 0;
 
     RenderThread& mRenderThread;
-    sp<Surface> mNativeSurface;
+    sp<ReliableSurface> mNativeSurface;
     // stopped indicates the CanvasContext will reject actual redraw operations,
     // and defer repaint until it is un-stopped
     bool mStopped = false;
diff --git a/libs/hwui/renderthread/EglManager.cpp b/libs/hwui/renderthread/EglManager.cpp
index 65ced6a..8230dfd 100644
--- a/libs/hwui/renderthread/EglManager.cpp
+++ b/libs/hwui/renderthread/EglManager.cpp
@@ -31,6 +31,8 @@
 
 #include <string>
 #include <vector>
+#include <system/window.h>
+#include <gui/Surface.h>
 
 #define GLES_VERSION 2
 
@@ -106,7 +108,7 @@
     LOG_ALWAYS_FATAL_IF(eglInitialize(mEglDisplay, &major, &minor) == EGL_FALSE,
                         "Failed to initialize display %p! err=%s", mEglDisplay, eglErrorString());
 
-    ALOGI("Initialized EGL, version %d.%d", (int)major, (int)minor);
+    ALOGV("Initialized EGL, version %d.%d", (int)major, (int)minor);
 
     initExtensions();
 
diff --git a/libs/hwui/renderthread/IRenderPipeline.h b/libs/hwui/renderthread/IRenderPipeline.h
index 4972554..42e17b273 100644
--- a/libs/hwui/renderthread/IRenderPipeline.h
+++ b/libs/hwui/renderthread/IRenderPipeline.h
@@ -28,9 +28,9 @@
 
 class GrContext;
 
-namespace android {
+struct ANativeWindow;
 
-class Surface;
+namespace android {
 
 namespace uirenderer {
 
@@ -67,7 +67,7 @@
     virtual bool swapBuffers(const Frame& frame, bool drew, const SkRect& screenDirty,
                              FrameInfo* currentFrameInfo, bool* requireSwap) = 0;
     virtual DeferredLayerUpdater* createTextureLayer() = 0;
-    virtual bool setSurface(Surface* window, SwapBehavior swapBehavior, ColorMode colorMode) = 0;
+    virtual bool setSurface(ANativeWindow* window, SwapBehavior swapBehavior, ColorMode colorMode) = 0;
     virtual void onStop() = 0;
     virtual bool isSurfaceReady() = 0;
     virtual bool isContextReady() = 0;
diff --git a/libs/hwui/renderthread/ReliableSurface.cpp b/libs/hwui/renderthread/ReliableSurface.cpp
new file mode 100644
index 0000000..0ab4cd2
--- /dev/null
+++ b/libs/hwui/renderthread/ReliableSurface.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReliableSurface.h"
+
+#include <private/android/AHardwareBufferHelpers.h>
+
+namespace android::uirenderer::renderthread {
+
+// TODO: Make surface less protected
+// This exists because perform is a varargs, and ANativeWindow has no va_list perform.
+// So wrapping/chaining that is hard. Telling the compiler to ignore protected is easy, so we do
+// that instead
+struct SurfaceExposer : Surface {
+    // Make warnings happy
+    SurfaceExposer() = delete;
+
+    using Surface::setBufferCount;
+    using Surface::setSwapInterval;
+    using Surface::dequeueBuffer;
+    using Surface::queueBuffer;
+    using Surface::cancelBuffer;
+    using Surface::lockBuffer_DEPRECATED;
+    using Surface::perform;
+};
+
+#define callProtected(surface, func, ...) ((*surface).*&SurfaceExposer::func)(__VA_ARGS__)
+
+ReliableSurface::ReliableSurface(sp<Surface>&& surface) : mSurface(std::move(surface)) {
+    LOG_ALWAYS_FATAL_IF(!mSurface, "Error, unable to wrap a nullptr");
+
+    ANativeWindow::setSwapInterval = hook_setSwapInterval;
+    ANativeWindow::dequeueBuffer = hook_dequeueBuffer;
+    ANativeWindow::cancelBuffer = hook_cancelBuffer;
+    ANativeWindow::queueBuffer = hook_queueBuffer;
+    ANativeWindow::query = hook_query;
+    ANativeWindow::perform = hook_perform;
+
+    ANativeWindow::dequeueBuffer_DEPRECATED = hook_dequeueBuffer_DEPRECATED;
+    ANativeWindow::cancelBuffer_DEPRECATED = hook_cancelBuffer_DEPRECATED;
+    ANativeWindow::lockBuffer_DEPRECATED = hook_lockBuffer_DEPRECATED;
+    ANativeWindow::queueBuffer_DEPRECATED = hook_queueBuffer_DEPRECATED;
+}
+
+void ReliableSurface::perform(int operation, va_list args) {
+    std::lock_guard _lock{mMutex};
+
+    switch (operation) {
+        case NATIVE_WINDOW_SET_USAGE:
+            mUsage = va_arg(args, uint32_t);
+            break;
+        case NATIVE_WINDOW_SET_USAGE64:
+            mUsage = va_arg(args, uint64_t);
+            break;
+        case NATIVE_WINDOW_SET_BUFFERS_GEOMETRY:
+            /* width */ va_arg(args, uint32_t);
+            /* height */ va_arg(args, uint32_t);
+            mFormat = va_arg(args, PixelFormat);
+            break;
+        case NATIVE_WINDOW_SET_BUFFERS_FORMAT:
+            mFormat = va_arg(args, PixelFormat);
+            break;
+    }
+}
+
+int ReliableSurface::reserveNext() {
+    {
+        std::lock_guard _lock{mMutex};
+        if (mReservedBuffer) {
+            ALOGW("reserveNext called but there was already a buffer reserved?");
+            return OK;
+        }
+        if (mInErrorState) {
+            return UNKNOWN_ERROR;
+        }
+    }
+
+    int fenceFd = -1;
+    ANativeWindowBuffer* buffer = nullptr;
+    int result = callProtected(mSurface, dequeueBuffer, &buffer, &fenceFd);
+
+    {
+        std::lock_guard _lock{mMutex};
+        LOG_ALWAYS_FATAL_IF(mReservedBuffer, "race condition in reserveNext");
+        mReservedBuffer = buffer;
+        mReservedFenceFd.reset(fenceFd);
+        if (result != OK) {
+            ALOGW("reserveNext failed, error %d", result);
+        }
+    }
+
+    return result;
+}
+
+void ReliableSurface::clearReservedBuffer() {
+    std::lock_guard _lock{mMutex};
+    if (mReservedBuffer) {
+        ALOGW("Reserved buffer %p was never used", mReservedBuffer);
+    }
+    mReservedBuffer = nullptr;
+    mReservedFenceFd.reset();
+}
+
+int ReliableSurface::cancelBuffer(ANativeWindowBuffer* buffer, int fenceFd) {
+    clearReservedBuffer();
+    if (isFallbackBuffer(buffer)) {
+        if (fenceFd > 0) {
+            close(fenceFd);
+        }
+        return OK;
+    }
+    int result = callProtected(mSurface, cancelBuffer, buffer, fenceFd);
+    return result;
+}
+
+int ReliableSurface::dequeueBuffer(ANativeWindowBuffer** buffer, int* fenceFd) {
+    {
+        std::lock_guard _lock{mMutex};
+        if (mReservedBuffer) {
+            *buffer = mReservedBuffer;
+            *fenceFd = mReservedFenceFd.release();
+            mReservedBuffer = nullptr;
+            return OK;
+        }
+    }
+
+    int result = callProtected(mSurface, dequeueBuffer, buffer, fenceFd);
+    if (result != OK) {
+        ALOGW("dequeueBuffer failed, error = %d; switching to fallback", result);
+        *buffer = acquireFallbackBuffer();
+        *fenceFd = -1;
+        return *buffer ? OK : INVALID_OPERATION;
+    }
+    return OK;
+}
+
+int ReliableSurface::queueBuffer(ANativeWindowBuffer* buffer, int fenceFd) {
+    clearReservedBuffer();
+
+    if (isFallbackBuffer(buffer)) {
+        if (fenceFd > 0) {
+            close(fenceFd);
+        }
+        return OK;
+    }
+
+    int result = callProtected(mSurface, queueBuffer, buffer, fenceFd);
+    return result;
+}
+
+bool ReliableSurface::isFallbackBuffer(const ANativeWindowBuffer* windowBuffer) const {
+    if (!mScratchBuffer || !windowBuffer) {
+        return false;
+    }
+    ANativeWindowBuffer* scratchBuffer =
+            AHardwareBuffer_to_ANativeWindowBuffer(mScratchBuffer.get());
+    return windowBuffer == scratchBuffer;
+}
+
+ANativeWindowBuffer* ReliableSurface::acquireFallbackBuffer() {
+    std::lock_guard _lock{mMutex};
+    mInErrorState = true;
+
+    if (mScratchBuffer) {
+        return AHardwareBuffer_to_ANativeWindowBuffer(mScratchBuffer.get());
+    }
+
+    AHardwareBuffer_Desc desc;
+    desc.usage = mUsage;
+    desc.format = mFormat;
+    desc.width = 1;
+    desc.height = 1;
+    desc.layers = 1;
+    desc.rfu0 = 0;
+    desc.rfu1 = 0;
+    AHardwareBuffer* newBuffer = nullptr;
+    int err = AHardwareBuffer_allocate(&desc, &newBuffer);
+    if (err) {
+        // Allocate failed, that sucks
+        ALOGW("Failed to allocate scratch buffer, error=%d", err);
+        return nullptr;
+    }
+    mScratchBuffer.reset(newBuffer);
+    return AHardwareBuffer_to_ANativeWindowBuffer(newBuffer);
+}
+
+Surface* ReliableSurface::getWrapped(const ANativeWindow* window) {
+    return getSelf(window)->mSurface.get();
+}
+
+int ReliableSurface::hook_setSwapInterval(ANativeWindow* window, int interval) {
+    return callProtected(getWrapped(window), setSwapInterval, interval);
+}
+
+int ReliableSurface::hook_dequeueBuffer(ANativeWindow* window, ANativeWindowBuffer** buffer,
+                                        int* fenceFd) {
+    return getSelf(window)->dequeueBuffer(buffer, fenceFd);
+}
+
+int ReliableSurface::hook_cancelBuffer(ANativeWindow* window, ANativeWindowBuffer* buffer,
+                                       int fenceFd) {
+    return getSelf(window)->cancelBuffer(buffer, fenceFd);
+}
+
+int ReliableSurface::hook_queueBuffer(ANativeWindow* window, ANativeWindowBuffer* buffer,
+                                      int fenceFd) {
+    return getSelf(window)->queueBuffer(buffer, fenceFd);
+}
+
+int ReliableSurface::hook_dequeueBuffer_DEPRECATED(ANativeWindow* window,
+                                                   ANativeWindowBuffer** buffer) {
+    ANativeWindowBuffer* buf;
+    int fenceFd = -1;
+    int result = window->dequeueBuffer(window, &buf, &fenceFd);
+    if (result != OK) {
+        return result;
+    }
+    sp<Fence> fence(new Fence(fenceFd));
+    int waitResult = fence->waitForever("dequeueBuffer_DEPRECATED");
+    if (waitResult != OK) {
+        ALOGE("dequeueBuffer_DEPRECATED: Fence::wait returned an error: %d", waitResult);
+        window->cancelBuffer(window, buf, -1);
+        return waitResult;
+    }
+    *buffer = buf;
+    return result;
+}
+
+int ReliableSurface::hook_cancelBuffer_DEPRECATED(ANativeWindow* window,
+                                                  ANativeWindowBuffer* buffer) {
+    return window->cancelBuffer(window, buffer, -1);
+}
+
+int ReliableSurface::hook_lockBuffer_DEPRECATED(ANativeWindow* window,
+                                                ANativeWindowBuffer* buffer) {
+    // This method is a no-op in Surface as well
+    return OK;
+}
+
+int ReliableSurface::hook_queueBuffer_DEPRECATED(ANativeWindow* window,
+                                                 ANativeWindowBuffer* buffer) {
+    return window->queueBuffer(window, buffer, -1);
+}
+
+int ReliableSurface::hook_query(const ANativeWindow* window, int what, int* value) {
+    return getWrapped(window)->query(what, value);
+}
+
+int ReliableSurface::hook_perform(ANativeWindow* window, int operation, ...) {
+    va_list args;
+    va_start(args, operation);
+    int result = callProtected(getWrapped(window), perform, operation, args);
+    va_end(args);
+
+    switch (operation) {
+        case NATIVE_WINDOW_SET_BUFFERS_FORMAT:
+        case NATIVE_WINDOW_SET_USAGE:
+        case NATIVE_WINDOW_SET_USAGE64:
+            va_start(args, operation);
+            getSelf(window)->perform(operation, args);
+            va_end(args);
+            break;
+        default:
+            break;
+    }
+
+    return result;
+}
+
+};  // namespace android::uirenderer::renderthread
\ No newline at end of file
diff --git a/libs/hwui/renderthread/ReliableSurface.h b/libs/hwui/renderthread/ReliableSurface.h
new file mode 100644
index 0000000..9ae53a9
--- /dev/null
+++ b/libs/hwui/renderthread/ReliableSurface.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <gui/Surface.h>
+#include <utils/Macros.h>
+#include <utils/StrongPointer.h>
+
+#include <memory>
+
+namespace android::uirenderer::renderthread {
+
+class ReliableSurface : public ANativeObjectBase<ANativeWindow, ReliableSurface, RefBase> {
+    PREVENT_COPY_AND_ASSIGN(ReliableSurface);
+
+public:
+    ReliableSurface(sp<Surface>&& surface);
+
+    void setDequeueTimeout(nsecs_t timeout) { mSurface->setDequeueTimeout(timeout); }
+
+    int reserveNext();
+
+    void allocateBuffers() { mSurface->allocateBuffers(); }
+
+    int query(int what, int* value) const { return mSurface->query(what, value); }
+
+    nsecs_t getLastDequeueStartTime() const { return mSurface->getLastDequeueStartTime(); }
+
+    uint64_t getNextFrameNumber() const { return mSurface->getNextFrameNumber(); }
+
+private:
+    const sp<Surface> mSurface;
+
+    mutable std::mutex mMutex;
+
+    uint64_t mUsage = AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER;
+    PixelFormat mFormat = PIXEL_FORMAT_RGBA_8888;
+    std::unique_ptr<AHardwareBuffer, void (*)(AHardwareBuffer*)> mScratchBuffer{
+            nullptr, AHardwareBuffer_release};
+    bool mInErrorState = false;
+    ANativeWindowBuffer* mReservedBuffer = nullptr;
+    base::unique_fd mReservedFenceFd;
+
+    bool isFallbackBuffer(const ANativeWindowBuffer* windowBuffer) const;
+    ANativeWindowBuffer* acquireFallbackBuffer();
+    void clearReservedBuffer();
+
+    void perform(int operation, va_list args);
+    int cancelBuffer(ANativeWindowBuffer* buffer, int fenceFd);
+    int dequeueBuffer(ANativeWindowBuffer** buffer, int* fenceFd);
+    int queueBuffer(ANativeWindowBuffer* buffer, int fenceFd);
+
+    static Surface* getWrapped(const ANativeWindow*);
+
+    // ANativeWindow hooks
+    static int hook_cancelBuffer(ANativeWindow* window, ANativeWindowBuffer* buffer, int fenceFd);
+    static int hook_dequeueBuffer(ANativeWindow* window, ANativeWindowBuffer** buffer,
+                                  int* fenceFd);
+    static int hook_queueBuffer(ANativeWindow* window, ANativeWindowBuffer* buffer, int fenceFd);
+
+    static int hook_perform(ANativeWindow* window, int operation, ...);
+    static int hook_query(const ANativeWindow* window, int what, int* value);
+    static int hook_setSwapInterval(ANativeWindow* window, int interval);
+
+    static int hook_cancelBuffer_DEPRECATED(ANativeWindow* window, ANativeWindowBuffer* buffer);
+    static int hook_dequeueBuffer_DEPRECATED(ANativeWindow* window, ANativeWindowBuffer** buffer);
+    static int hook_lockBuffer_DEPRECATED(ANativeWindow* window, ANativeWindowBuffer* buffer);
+    static int hook_queueBuffer_DEPRECATED(ANativeWindow* window, ANativeWindowBuffer* buffer);
+};
+
+};  // namespace android::uirenderer::renderthread
\ No newline at end of file