SurfaceFlinger: Reset VsyncReactor's timer thread

If any of the syscalls the timer uses returns an error, reset the timer.
This is a workaround for an issue seen where VsyncReactor is not calling
new callbacks. In addition this change adds more log prints to help
identifying the underlying issue.

Bug: 151892277
Test: simulate an error in syscall and observe timer resets
Change-Id: I6fd62f9d73f75f32a87d1483cea14f3f1b358507
diff --git a/services/surfaceflinger/Scheduler/EventThread.cpp b/services/surfaceflinger/Scheduler/EventThread.cpp
index 0d6a92e..8347650 100644
--- a/services/surfaceflinger/Scheduler/EventThread.cpp
+++ b/services/surfaceflinger/Scheduler/EventThread.cpp
@@ -429,7 +429,13 @@
                     ALOGW("Faking VSYNC due to driver stall for thread %s", mThreadName);
                     std::string debugInfo = "VsyncSource debug info:\n";
                     mVSyncSource->dump(debugInfo);
-                    ALOGW("%s", debugInfo.c_str());
+                    // Log the debug info line-by-line to avoid logcat overflow
+                    auto pos = debugInfo.find('\n');
+                    while (pos != std::string::npos) {
+                        ALOGW("%s", debugInfo.substr(0, pos).c_str());
+                        debugInfo = debugInfo.substr(pos + 1);
+                        pos = debugInfo.find('\n');
+                    }
                 }
 
                 LOG_FATAL_IF(!mVSyncState);
diff --git a/services/surfaceflinger/Scheduler/TimeKeeper.h b/services/surfaceflinger/Scheduler/TimeKeeper.h
index 38f0708..da2195c 100644
--- a/services/surfaceflinger/Scheduler/TimeKeeper.h
+++ b/services/surfaceflinger/Scheduler/TimeKeeper.h
@@ -53,6 +53,8 @@
      */
     virtual void alarmCancel() = 0;
 
+    virtual void dump(std::string& result) const = 0;
+
 protected:
     TimeKeeper(TimeKeeper const&) = delete;
     TimeKeeper& operator=(TimeKeeper const&) = delete;
diff --git a/services/surfaceflinger/Scheduler/Timer.cpp b/services/surfaceflinger/Scheduler/Timer.cpp
index 8f81c2c..7c5058e 100644
--- a/services/surfaceflinger/Scheduler/Timer.cpp
+++ b/services/surfaceflinger/Scheduler/Timer.cpp
@@ -17,6 +17,7 @@
 #undef LOG_TAG
 #define LOG_TAG "SchedulerTimer"
 #define ATRACE_TAG ATRACE_TAG_GRAPHICS
+#include <android-base/stringprintf.h>
 #include <log/log.h>
 #include <sys/epoll.h>
 #include <sys/timerfd.h>
@@ -29,28 +30,53 @@
 #include "Timer.h"
 
 namespace android::scheduler {
+using base::StringAppendF;
 
 static constexpr size_t kReadPipe = 0;
 static constexpr size_t kWritePipe = 1;
 
-Timer::Timer()
-      : mTimerFd(timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK)),
-        mEpollFd(epoll_create1(EPOLL_CLOEXEC)) {
-    if (pipe2(mPipes.data(), O_CLOEXEC | O_NONBLOCK)) {
-        ALOGE("could not create TimerDispatch mPipes");
-    };
-
-    mDispatchThread = std::thread(std::bind(&Timer::dispatch, this));
+Timer::Timer() {
+    reset();
+    mDispatchThread = std::thread([this]() { threadMain(); });
 }
 
 Timer::~Timer() {
     endDispatch();
     mDispatchThread.join();
+    cleanup();
+}
 
-    close(mPipes[kWritePipe]);
-    close(mPipes[kReadPipe]);
-    close(mEpollFd);
-    close(mTimerFd);
+void Timer::reset() {
+    cleanup();
+    mTimerFd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+    mEpollFd = epoll_create1(EPOLL_CLOEXEC);
+    if (pipe2(mPipes.data(), O_CLOEXEC | O_NONBLOCK)) {
+        ALOGE("could not create TimerDispatch mPipes");
+        return;
+    };
+    setDebugState(DebugState::Reset);
+}
+
+void Timer::cleanup() {
+    if (mTimerFd != -1) {
+        close(mTimerFd);
+        mTimerFd = -1;
+    }
+
+    if (mEpollFd != -1) {
+        close(mEpollFd);
+        mEpollFd = -1;
+    }
+
+    if (mPipes[kReadPipe] != -1) {
+        close(mPipes[kReadPipe]);
+        mPipes[kReadPipe] = -1;
+    }
+
+    if (mPipes[kWritePipe] != -1) {
+        close(mPipes[kWritePipe]);
+        mPipes[kWritePipe] = -1;
+    }
 }
 
 void Timer::endDispatch() {
@@ -99,7 +125,14 @@
     }
 }
 
-void Timer::dispatch() {
+void Timer::threadMain() {
+    while (dispatch()) {
+        reset();
+    }
+}
+
+bool Timer::dispatch() {
+    setDebugState(DebugState::Running);
     struct sched_param param = {0};
     param.sched_priority = 2;
     if (pthread_setschedparam(pthread_self(), SCHED_FIFO, &param) != 0) {
@@ -116,7 +149,7 @@
     timerEvent.data.u32 = DispatchType::TIMER;
     if (epoll_ctl(mEpollFd, EPOLL_CTL_ADD, mTimerFd, &timerEvent) == -1) {
         ALOGE("Error adding timer fd to epoll dispatch loop");
-        return;
+        return true;
     }
 
     epoll_event terminateEvent;
@@ -124,18 +157,20 @@
     terminateEvent.data.u32 = DispatchType::TERMINATE;
     if (epoll_ctl(mEpollFd, EPOLL_CTL_ADD, mPipes[kReadPipe], &terminateEvent) == -1) {
         ALOGE("Error adding control fd to dispatch loop");
-        return;
+        return true;
     }
 
     uint64_t iteration = 0;
     char const traceNamePrefix[] = "TimerIteration #";
     static constexpr size_t maxlen = arrayLen(traceNamePrefix) + max64print;
     std::array<char, maxlen> str_buffer;
-    auto timing = true;
-    while (timing) {
+
+    while (true) {
+        setDebugState(DebugState::Waiting);
         epoll_event events[DispatchType::MAX_DISPATCH_TYPE];
         int nfds = epoll_wait(mEpollFd, events, DispatchType::MAX_DISPATCH_TYPE, -1);
 
+        setDebugState(DebugState::Running);
         if (ATRACE_ENABLED()) {
             snprintf(str_buffer.data(), str_buffer.size(), "%s%" PRIu64, traceNamePrefix,
                      iteration++);
@@ -144,29 +179,62 @@
 
         if (nfds == -1) {
             if (errno != EINTR) {
-                timing = false;
-                continue;
+                ALOGE("Error waiting on epoll: %s", strerror(errno));
+                return true;
             }
         }
 
         for (auto i = 0; i < nfds; i++) {
             if (events[i].data.u32 == DispatchType::TIMER) {
                 static uint64_t mIgnored = 0;
+                setDebugState(DebugState::Reading);
                 read(mTimerFd, &mIgnored, sizeof(mIgnored));
+                setDebugState(DebugState::Running);
                 std::function<void()> cb;
                 {
                     std::lock_guard<decltype(mMutex)> lk(mMutex);
                     cb = mCallback;
                 }
                 if (cb) {
+                    setDebugState(DebugState::InCallback);
                     cb();
+                    setDebugState(DebugState::Running);
                 }
             }
             if (events[i].data.u32 == DispatchType::TERMINATE) {
-                timing = false;
+                ALOGE("Terminated");
+                setDebugState(DebugState::Running);
+                return false;
             }
         }
     }
 }
 
+void Timer::setDebugState(DebugState state) {
+    std::lock_guard lk(mMutex);
+    mDebugState = state;
+}
+
+const char* Timer::strDebugState(DebugState state) const {
+    switch (state) {
+        case DebugState::Reset:
+            return "Reset";
+        case DebugState::Running:
+            return "Running";
+        case DebugState::Waiting:
+            return "Waiting";
+        case DebugState::Reading:
+            return "Reading";
+        case DebugState::InCallback:
+            return "InCallback";
+        case DebugState::Terminated:
+            return "Terminated";
+    }
+}
+
+void Timer::dump(std::string& result) const {
+    std::lock_guard lk(mMutex);
+    StringAppendF(&result, "\t\tDebugState: %s\n", strDebugState(mDebugState));
+}
+
 } // namespace android::scheduler
diff --git a/services/surfaceflinger/Scheduler/Timer.h b/services/surfaceflinger/Scheduler/Timer.h
index 0ae82c8..a8e2d5a 100644
--- a/services/surfaceflinger/Scheduler/Timer.h
+++ b/services/surfaceflinger/Scheduler/Timer.h
@@ -34,18 +34,27 @@
     //     Most users will want to serialize thes calls so as to be aware of the timer state.
     void alarmIn(std::function<void()> const& cb, nsecs_t fireIn) final;
     void alarmCancel() final;
+    void dump(std::string& result) const final;
 
 private:
-    int const mTimerFd;
-    int const mEpollFd;
-    std::array<int, 2> mPipes;
+    enum class DebugState { Reset, Running, Waiting, Reading, InCallback, Terminated };
+    void reset();
+    void cleanup();
+    void setDebugState(DebugState state) EXCLUDES(mMutex);
+    const char* strDebugState(DebugState state) const;
+
+    int mTimerFd = -1;
+    int mEpollFd = -1;
+    std::array<int, 2> mPipes = {-1, -1};
 
     std::thread mDispatchThread;
-    void dispatch();
+    void threadMain();
+    bool dispatch();
     void endDispatch();
 
-    std::mutex mMutex;
+    mutable std::mutex mMutex;
     std::function<void()> mCallback GUARDED_BY(mMutex);
+    DebugState mDebugState GUARDED_BY(mMutex);
 };
 
 } // namespace android::scheduler
diff --git a/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.cpp b/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.cpp
index 460d4a5..cd15617 100644
--- a/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.cpp
+++ b/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.cpp
@@ -168,6 +168,7 @@
     mIntendedWakeupTime = targetTime;
     mTimeKeeper->alarmIn(std::bind(&VSyncDispatchTimerQueue::timerCallback, this),
                          targetTime - now);
+    mLastTimerSchedule = mTimeKeeper->now();
 }
 
 void VSyncDispatchTimerQueue::rearmTimer(nsecs_t now) {
@@ -226,6 +227,7 @@
     std::vector<Invocation> invocations;
     {
         std::lock_guard<decltype(mMutex)> lk(mMutex);
+        mLastTimerCallback = mTimeKeeper->now();
         for (auto it = mCallbacks.begin(); it != mCallbacks.end(); it++) {
             auto& callback = it->second;
             auto const wakeupTime = callback->wakeupTime();
@@ -322,10 +324,15 @@
 
 void VSyncDispatchTimerQueue::dump(std::string& result) const {
     std::lock_guard<decltype(mMutex)> lk(mMutex);
+    StringAppendF(&result, "\tTimer:\n");
+    mTimeKeeper->dump(result);
     StringAppendF(&result, "\tmTimerSlack: %.2fms mMinVsyncDistance: %.2fms\n", mTimerSlack / 1e6f,
                   mMinVsyncDistance / 1e6f);
     StringAppendF(&result, "\tmIntendedWakeupTime: %.2fms from now\n",
-                  (mIntendedWakeupTime - systemTime()) / 1e6f);
+                  (mIntendedWakeupTime - mTimeKeeper->now()) / 1e6f);
+    StringAppendF(&result, "\tmLastTimerCallback: %.2fms ago mLastTimerSchedule: %.2fms ago\n",
+                  (mTimeKeeper->now() - mLastTimerCallback) / 1e6f,
+                  (mTimeKeeper->now() - mLastTimerSchedule) / 1e6f);
     StringAppendF(&result, "\tCallbacks:\n");
     for (const auto& [token, entry] : mCallbacks) {
         entry->dump(result);
diff --git a/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.h b/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.h
index 390e0c4..26a8ec0 100644
--- a/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.h
+++ b/services/surfaceflinger/Scheduler/VSyncDispatchTimerQueue.h
@@ -152,6 +152,10 @@
         std::array<char, maxlen> str_buffer;
         void note(std::string_view name, nsecs_t in, nsecs_t vs);
     } mTraceBuffer GUARDED_BY(mMutex);
+
+    // For debugging purposes
+    nsecs_t mLastTimerCallback GUARDED_BY(mMutex) = kInvalidTime;
+    nsecs_t mLastTimerSchedule GUARDED_BY(mMutex) = kInvalidTime;
 };
 
 } // namespace android::scheduler
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 2da92b3..4d66c38 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -4373,6 +4373,7 @@
                   mDebugDisplayConfigSetByBackdoor ? "yes" : "no");
 
     mScheduler->dump(mAppConnectionHandle, result);
+    mScheduler->getPrimaryDispSync().dump(result);
 }
 
 void SurfaceFlinger::dumpStaticScreenStats(std::string& result) const {
diff --git a/services/surfaceflinger/tests/unittests/VSyncDispatchTimerQueueTest.cpp b/services/surfaceflinger/tests/unittests/VSyncDispatchTimerQueueTest.cpp
index 3543361..1899bed 100644
--- a/services/surfaceflinger/tests/unittests/VSyncDispatchTimerQueueTest.cpp
+++ b/services/surfaceflinger/tests/unittests/VSyncDispatchTimerQueueTest.cpp
@@ -71,6 +71,7 @@
     MOCK_CONST_METHOD0(now, nsecs_t());
     MOCK_METHOD2(alarmIn, void(std::function<void()> const&, nsecs_t time));
     MOCK_METHOD0(alarmCancel, void());
+    MOCK_CONST_METHOD1(dump, void(std::string&));
 
     void alarmInDefaultBehavior(std::function<void()> const& callback, nsecs_t time) {
         mCallback = callback;
@@ -188,6 +189,7 @@
             }
             void alarmCancel() final { mControllableClock.alarmCancel(); }
             nsecs_t now() const final { return mControllableClock.now(); }
+            void dump(std::string&) const final {}
 
         private:
             TimeKeeper& mControllableClock;