Reduce number of Fence syscalls made.

This patch saves 6 or more Fence syscalls per frame.

* Timelines are updated just before adding a new fence
  since the newly added fence is unlikely to have signaled.
* Layer::latch uses a FenceTime now, so the signal time is
  automatically shared with other owners of the FenceTime.
* DispSync uses FenceTime now, only using cached values of
  the signal time that have been populated by a Timeline.

Test: SurfaceFlinger boots and dumps still work.
Change-Id: Ie0cfc1af2aca143dd8d5f08f08dbe1e597376f2f
diff --git a/services/surfaceflinger/DispSync.cpp b/services/surfaceflinger/DispSync.cpp
index bd9b8aa..ac8aa04 100644
--- a/services/surfaceflinger/DispSync.cpp
+++ b/services/surfaceflinger/DispSync.cpp
@@ -30,7 +30,7 @@
 #include <utils/Trace.h>
 #include <utils/Vector.h>
 
-#include <ui/Fence.h>
+#include <ui/FenceTime.h>
 
 #include "DispSync.h"
 #include "SurfaceFlinger.h"
@@ -419,25 +419,13 @@
     resetErrorLocked();
 }
 
-bool DispSync::addPresentFence(const sp<Fence>& fence) {
+bool DispSync::addPresentFence(const std::shared_ptr<FenceTime>& fenceTime) {
     Mutex::Autolock lock(mMutex);
 
-    mPresentFences[mPresentSampleOffset] = fence;
-    mPresentTimes[mPresentSampleOffset] = 0;
+    mPresentFences[mPresentSampleOffset] = fenceTime;
     mPresentSampleOffset = (mPresentSampleOffset + 1) % NUM_PRESENT_SAMPLES;
     mNumResyncSamplesSincePresent = 0;
 
-    for (size_t i = 0; i < NUM_PRESENT_SAMPLES; i++) {
-        const sp<Fence>& f(mPresentFences[i]);
-        if (f != NULL) {
-            nsecs_t t = f->getSignalTime();
-            if (t < INT64_MAX) {
-                mPresentFences[i].clear();
-                mPresentTimes[i] = t + mPresentTimeOffset;
-            }
-        }
-    }
-
     updateErrorLocked();
 
     return !mModelUpdated || mError > kErrorThreshold;
@@ -602,21 +590,39 @@
     nsecs_t sqErrSum = 0;
 
     for (size_t i = 0; i < NUM_PRESENT_SAMPLES; i++) {
-        nsecs_t sample = mPresentTimes[i] - mReferenceTime;
-        if (sample > mPhase) {
-            nsecs_t sampleErr = (sample - mPhase) % period;
-            if (sampleErr > period / 2) {
-                sampleErr -= period;
-            }
-            sqErrSum += sampleErr * sampleErr;
-            numErrSamples++;
+        // Only check for the cached value of signal time to avoid unecessary
+        // syscalls. It is the responsibility of the DispSync owner to
+        // call getSignalTime() periodically so the cache is updated when the
+        // fence signals.
+        nsecs_t time = mPresentFences[i]->getCachedSignalTime();
+        if (time == Fence::SIGNAL_TIME_PENDING ||
+                time == Fence::SIGNAL_TIME_INVALID) {
+            continue;
         }
+
+        nsecs_t sample = time - mReferenceTime;
+        if (sample <= mPhase) {
+            continue;
+        }
+
+        nsecs_t sampleErr = (sample - mPhase) % period;
+        if (sampleErr > period / 2) {
+            sampleErr -= period;
+        }
+        sqErrSum += sampleErr * sampleErr;
+        numErrSamples++;
     }
 
     if (numErrSamples > 0) {
         mError = sqErrSum / numErrSamples;
+        mZeroErrSamplesCount = 0;
     } else {
         mError = 0;
+        // Use mod ACCEPTABLE_ZERO_ERR_SAMPLES_COUNT to avoid log spam.
+        mZeroErrSamplesCount++;
+        ALOGE_IF(
+                (mZeroErrSamplesCount % ACCEPTABLE_ZERO_ERR_SAMPLES_COUNT) == 0,
+                "No present times for model error.");
     }
 
     if (kTraceDetailedInfo) {
@@ -627,9 +633,9 @@
 void DispSync::resetErrorLocked() {
     mPresentSampleOffset = 0;
     mError = 0;
+    mZeroErrSamplesCount = 0;
     for (size_t i = 0; i < NUM_PRESENT_SAMPLES; i++) {
-        mPresentFences[i].clear();
-        mPresentTimes[i] = 0;
+        mPresentFences[i] = FenceTime::NO_FENCE;
     }
 }
 
@@ -668,19 +674,19 @@
         previous = sampleTime;
     }
 
-    result.appendFormat("mPresentFences / mPresentTimes [%d]:\n",
+    result.appendFormat("mPresentFences [%d]:\n",
             NUM_PRESENT_SAMPLES);
     nsecs_t now = systemTime(SYSTEM_TIME_MONOTONIC);
-    previous = 0;
+    previous = Fence::SIGNAL_TIME_INVALID;
     for (size_t i = 0; i < NUM_PRESENT_SAMPLES; i++) {
         size_t idx = (i + mPresentSampleOffset) % NUM_PRESENT_SAMPLES;
-        bool signaled = mPresentFences[idx] == NULL;
-        nsecs_t presentTime = mPresentTimes[idx];
-        if (!signaled) {
+        nsecs_t presentTime = mPresentFences[idx]->getSignalTime();
+        if (presentTime == Fence::SIGNAL_TIME_PENDING) {
             result.appendFormat("  [unsignaled fence]\n");
-        } else if (presentTime == 0) {
-            result.appendFormat("  0\n");
-        } else if (previous == 0) {
+        } else if(presentTime == Fence::SIGNAL_TIME_INVALID) {
+            result.appendFormat("  [invalid fence]\n");
+        } else if (previous == Fence::SIGNAL_TIME_PENDING ||
+                previous == Fence::SIGNAL_TIME_INVALID) {
             result.appendFormat("  %" PRId64 "  (%.3f ms ago)\n", presentTime,
                     (now - presentTime) / 1000000.0);
         } else {
diff --git a/services/surfaceflinger/DispSync.h b/services/surfaceflinger/DispSync.h
index 82ae795..c9f3b04 100644
--- a/services/surfaceflinger/DispSync.h
+++ b/services/surfaceflinger/DispSync.h
@@ -23,10 +23,14 @@
 #include <utils/Timers.h>
 #include <utils/RefBase.h>
 
+#include <ui/FenceTime.h>
+
+#include <memory>
+
 namespace android {
 
 class String8;
-class Fence;
+class FenceTime;
 class DispSyncThread;
 
 // DispSync maintains a model of the periodic hardware-based vsync events of a
@@ -67,7 +71,7 @@
     //
     // This method should be called with the retire fence from each HWComposer
     // set call that affects the display.
-    bool addPresentFence(const sp<Fence>& fence);
+    bool addPresentFence(const std::shared_ptr<FenceTime>& fenceTime);
 
     // The beginResync, addResyncSample, and endResync methods are used to re-
     // synchronize the DispSync's model to the hardware vsync events.  The re-
@@ -129,6 +133,7 @@
     enum { MIN_RESYNC_SAMPLES_FOR_UPDATE = 6 };
     enum { NUM_PRESENT_SAMPLES = 8 };
     enum { MAX_RESYNC_SAMPLES_WITHOUT_PRESENT = 4 };
+    enum { ACCEPTABLE_ZERO_ERR_SAMPLES_COUNT = 64 };
 
     const char* const mName;
 
@@ -146,9 +151,14 @@
 
     // mError is the computed model error.  It is based on the difference
     // between the estimated vsync event times and those observed in the
-    // mPresentTimes array.
+    // mPresentFences array.
     nsecs_t mError;
 
+    // mZeroErrSamplesCount keeps track of how many times in a row there were
+    // zero timestamps available in the mPresentFences array.
+    // Used to sanity check that we are able to calculate the model error.
+    size_t mZeroErrSamplesCount;
+
     // Whether we have updated the vsync event model since the last resync.
     bool mModelUpdated;
 
@@ -162,8 +172,8 @@
 
     // These member variables store information about the present fences used
     // to validate the currently computed model.
-    sp<Fence> mPresentFences[NUM_PRESENT_SAMPLES];
-    nsecs_t mPresentTimes[NUM_PRESENT_SAMPLES];
+    std::shared_ptr<FenceTime>
+            mPresentFences[NUM_PRESENT_SAMPLES] {FenceTime::NO_FENCE};
     size_t mPresentSampleOffset;
 
     int mRefreshSkipCount;
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index 1b9a230..3e9b10d 100755
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -1308,7 +1308,8 @@
         // able to be latched. To avoid this, grab this buffer anyway.
         return true;
     }
-    return mQueueItems[0].mFence->getSignalTime() != INT64_MAX;
+    return mQueueItems[0].mFenceTime->getSignalTime() !=
+            Fence::SIGNAL_TIME_PENDING;
 #else
     return true;
 #endif
@@ -2011,9 +2012,6 @@
 bool Layer::onPostComposition(const std::shared_ptr<FenceTime>& glDoneFence,
         const std::shared_ptr<FenceTime>& presentFence,
         const CompositorTiming& compositorTiming) {
-    mAcquireTimeline.updateSignalTimes();
-    mReleaseTimeline.updateSignalTimes();
-
     // mFrameLatencyNeeded is true when a new frame was latched for the
     // composition.
     if (!mFrameLatencyNeeded)
@@ -2064,6 +2062,7 @@
 
     auto releaseFenceTime = std::make_shared<FenceTime>(
             mSurfaceFlingerConsumer->getPrevFinalReleaseFence());
+    mReleaseTimeline.updateSignalTimes();
     mReleaseTimeline.push(releaseFenceTime);
 
     Mutex::Autolock lock(mFrameEventHistoryMutex);
@@ -2254,6 +2253,7 @@
 #ifndef USE_HWC2
         auto releaseFenceTime = std::make_shared<FenceTime>(
                 mSurfaceFlingerConsumer->getPrevFinalReleaseFence());
+        mReleaseTimeline.updateSignalTimes();
         mReleaseTimeline.push(releaseFenceTime);
         if (mPreviousFrameNumber != 0) {
             mFrameEventHistory.addRelease(mPreviousFrameNumber,
@@ -2509,6 +2509,12 @@
         FrameEventHistoryDelta *outDelta) {
     Mutex::Autolock lock(mFrameEventHistoryMutex);
     if (newTimestamps) {
+        // If there are any unsignaled fences in the aquire timeline at this
+        // point, the previously queued frame hasn't been latched yet. Go ahead
+        // and try to get the signal time here so the syscall is taken out of
+        // the main thread's critical path.
+        mAcquireTimeline.updateSignalTimes();
+        // Push the new fence after updating since it's likely still pending.
         mAcquireTimeline.push(newTimestamps->acquireFence);
         mFrameEventHistory.addQueue(*newTimestamps);
     }
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index a6b34c2..99b39cb 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -1553,6 +1553,7 @@
     // |mStateLock| not needed as we are on the main thread
     const sp<const DisplayDevice> hw(getDefaultDisplayDeviceLocked());
 
+    mGlCompositionDoneTimeline.updateSignalTimes();
     std::shared_ptr<FenceTime> glCompositionDoneFenceTime;
     if (mHwc->hasClientComposition(HWC_DISPLAY_PRIMARY)) {
         glCompositionDoneFenceTime =
@@ -1561,12 +1562,11 @@
     } else {
         glCompositionDoneFenceTime = FenceTime::NO_FENCE;
     }
-    mGlCompositionDoneTimeline.updateSignalTimes();
 
+    mDisplayTimeline.updateSignalTimes();
     sp<Fence> presentFence = mHwc->getPresentFence(HWC_DISPLAY_PRIMARY);
     auto presentFenceTime = std::make_shared<FenceTime>(presentFence);
     mDisplayTimeline.push(presentFenceTime);
-    mDisplayTimeline.updateSignalTimes();
 
     nsecs_t vsyncPhase = mPrimaryDispSync.computeNextRefresh(0);
     nsecs_t vsyncInterval = mPrimaryDispSync.getPeriod();
@@ -1591,8 +1591,8 @@
         }
     });
 
-    if (presentFence->isValid()) {
-        if (mPrimaryDispSync.addPresentFence(presentFence)) {
+    if (presentFenceTime->isValid()) {
+        if (mPrimaryDispSync.addPresentFence(presentFenceTime)) {
             enableHardwareVsync();
         } else {
             disableHardwareVsync(false);
diff --git a/services/surfaceflinger/SurfaceFlinger_hwc1.cpp b/services/surfaceflinger/SurfaceFlinger_hwc1.cpp
index 6ea070d..7741681 100644
--- a/services/surfaceflinger/SurfaceFlinger_hwc1.cpp
+++ b/services/surfaceflinger/SurfaceFlinger_hwc1.cpp
@@ -1257,6 +1257,7 @@
     const HWComposer& hwc = getHwComposer();
     const sp<const DisplayDevice> hw(getDefaultDisplayDevice());
 
+    mGlCompositionDoneTimeline.updateSignalTimes();
     std::shared_ptr<FenceTime> glCompositionDoneFenceTime;
     if (getHwComposer().hasGlesComposition(hw->getHwcDisplayId())) {
         glCompositionDoneFenceTime =
@@ -1265,12 +1266,11 @@
     } else {
         glCompositionDoneFenceTime = FenceTime::NO_FENCE;
     }
-    mGlCompositionDoneTimeline.updateSignalTimes();
 
+    mDisplayTimeline.updateSignalTimes();
     sp<Fence> retireFence = mHwc->getDisplayFence(HWC_DISPLAY_PRIMARY);
     auto retireFenceTime = std::make_shared<FenceTime>(retireFence);
     mDisplayTimeline.push(retireFenceTime);
-    mDisplayTimeline.updateSignalTimes();
 
     nsecs_t vsyncPhase = mPrimaryDispSync.computeNextRefresh(0);
     nsecs_t vsyncInterval = mPrimaryDispSync.getPeriod();
@@ -1298,7 +1298,7 @@
     });
 
     if (retireFence->isValid()) {
-        if (mPrimaryDispSync.addPresentFence(retireFence)) {
+        if (mPrimaryDispSync.addPresentFence(retireFenceTime)) {
             enableHardwareVsync();
         } else {
             disableHardwareVsync(false);