Add tracing thread in RenderEngine.

Times GPU work done by RenderEngine by creating sync fences during
flush() and finish() calls, asynchronously waits, and writes to
systrace.

As implemented here a frame is double-reported because SurfaceFlinger
calls flush an extra time, but we should get rid of that call anyways.

Bug: 120992906
Test: systrace
Change-Id: I6e1446fdf389157bfd533a96263c8e0cd1ed8514
diff --git a/libs/renderengine/gl/GLESRenderEngine.cpp b/libs/renderengine/gl/GLESRenderEngine.cpp
index d0a0ac8..ebe514c 100644
--- a/libs/renderengine/gl/GLESRenderEngine.cpp
+++ b/libs/renderengine/gl/GLESRenderEngine.cpp
@@ -30,6 +30,7 @@
 #include <GLES2/gl2ext.h>
 #include <android-base/stringprintf.h>
 #include <cutils/compiler.h>
+#include <cutils/properties.h>
 #include <renderengine/Mesh.h>
 #include <renderengine/Texture.h>
 #include <renderengine/private/Description.h>
@@ -416,6 +417,13 @@
         mBt2020ToSrgb = mXyzToSrgb * mBt2020ToXyz;
         mBt2020ToDisplayP3 = mXyzToDisplayP3 * mBt2020ToXyz;
     }
+
+    char value[PROPERTY_VALUE_MAX];
+    property_get("debug.egl.traceGpuCompletion", value, "0");
+    if (atoi(value)) {
+        mTraceGpuCompletion = true;
+        mFlushTracer = std::make_unique<FlushTracer>(this);
+    }
 }
 
 GLESRenderEngine::~GLESRenderEngine() {
@@ -461,6 +469,12 @@
         ALOGW("failed to dup EGL native fence sync: %#x", eglGetError());
     }
 
+    // Only trace if we have a valid fence, as current usage falls back to
+    // calling finish() if the fence fd is invalid.
+    if (CC_UNLIKELY(mTraceGpuCompletion && mFlushTracer) && fenceFd.get() >= 0) {
+        mFlushTracer->queueSync(eglCreateSyncKHR(mEGLDisplay, EGL_SYNC_FENCE_KHR, nullptr));
+    }
+
     return fenceFd;
 }
 
@@ -476,8 +490,15 @@
         return false;
     }
 
-    EGLint result = eglClientWaitSyncKHR(mEGLDisplay, sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR,
-                                         2000000000 /*2 sec*/);
+    if (CC_UNLIKELY(mTraceGpuCompletion && mFlushTracer)) {
+        mFlushTracer->queueSync(eglCreateSyncKHR(mEGLDisplay, EGL_SYNC_FENCE_KHR, nullptr));
+    }
+
+    return waitSync(sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR);
+}
+
+bool GLESRenderEngine::waitSync(EGLSyncKHR sync, EGLint flags) {
+    EGLint result = eglClientWaitSyncKHR(mEGLDisplay, sync, flags, 2000000000 /*2 sec*/);
     EGLint error = eglGetError();
     eglDestroySyncKHR(mEGLDisplay, sync);
     if (result != EGL_CONDITION_SATISFIED_KHR) {
@@ -1200,6 +1221,61 @@
     return (isInputHdrDataSpace || isOutputHdrDataSpace) && inputTransfer != outputTransfer;
 }
 
+// FlushTracer implementation
+GLESRenderEngine::FlushTracer::FlushTracer(GLESRenderEngine* engine) : mEngine(engine) {
+    mThread = std::thread(&GLESRenderEngine::FlushTracer::loop, this);
+}
+
+GLESRenderEngine::FlushTracer::~FlushTracer() {
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
+        mRunning = false;
+    }
+    mCondition.notify_all();
+    if (mThread.joinable()) {
+        mThread.join();
+    }
+}
+
+void GLESRenderEngine::FlushTracer::queueSync(EGLSyncKHR sync) {
+    std::lock_guard<std::mutex> lock(mMutex);
+    char name[64];
+    const uint64_t frameNum = mFramesQueued++;
+    snprintf(name, sizeof(name), "Queueing sync for frame: %lu",
+             static_cast<unsigned long>(frameNum));
+    ATRACE_NAME(name);
+    mQueue.push({sync, frameNum});
+    ATRACE_INT("GPU Frames Outstanding", mQueue.size());
+    mCondition.notify_one();
+}
+
+void GLESRenderEngine::FlushTracer::loop() {
+    while (mRunning) {
+        QueueEntry entry;
+        {
+            std::lock_guard<std::mutex> lock(mMutex);
+
+            mCondition.wait(mMutex,
+                            [&]() REQUIRES(mMutex) { return !mQueue.empty() && !mRunning; });
+
+            if (!mRunning) {
+                // if mRunning is false, then FlushTracer is being destroyed, so
+                // bail out now.
+                break;
+            }
+            entry = mQueue.front();
+            mQueue.pop();
+        }
+        {
+            char name[64];
+            snprintf(name, sizeof(name), "waiting for frame %lu",
+                     static_cast<unsigned long>(entry.mFrameNum));
+            ATRACE_NAME(name);
+            mEngine->waitSync(entry.mSync, 0);
+        }
+    }
+}
+
 } // namespace gl
 } // namespace renderengine
 } // namespace android