diff --git a/libs/hwui/DeferredDisplayList.cpp b/libs/hwui/DeferredDisplayList.cpp
index 020c1e9..5ff92be 100644
--- a/libs/hwui/DeferredDisplayList.cpp
+++ b/libs/hwui/DeferredDisplayList.cpp
@@ -75,7 +75,7 @@
         for (unsigned int i = 0; i < mOps.size(); i++) {
             DrawOp* op = mOps[i];
 
-            renderer.restoreDisplayState(op->state, kStateDeferFlag_Draw);
+            renderer.restoreDisplayState(op->state);
 
 #if DEBUG_DISPLAY_LIST_OPS_AS_EVENTS
             renderer.eventMark(op->name());
@@ -106,7 +106,7 @@
 
     virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty) {
         DEFER_LOGD("replaying state op batch %p", this);
-        renderer.restoreDisplayState(mOp->state, 0);
+        renderer.restoreDisplayState(mOp->state);
 
         // use invalid save count because it won't be used at flush time - RestoreToCountOp is the
         // only one to use it, and we don't use that class at flush time, instead calling
@@ -117,12 +117,12 @@
     }
 
 private:
-    StateOp* mOp;
+    const StateOp* mOp;
 };
 
 class RestoreToCountBatch : public DrawOpBatch {
 public:
-    RestoreToCountBatch(int restoreCount) : mRestoreCount(restoreCount) {}
+    RestoreToCountBatch(StateOp* op, int restoreCount) : mOp(op), mRestoreCount(restoreCount) {}
 
     bool intersects(Rect& rect) {
         // if something checks for intersection, it's trying to go backwards across a state op,
@@ -133,11 +133,15 @@
 
     virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty) {
         DEFER_LOGD("batch %p restoring to count %d", this, mRestoreCount);
+
+        renderer.restoreDisplayState(mOp->state);
         renderer.restoreToCount(mRestoreCount);
         return DrawGlInfo::kStatusDone;
     }
 
 private:
+    // we use the state storage for the RestoreToCountOp, but don't replay the op itself
+    const StateOp* mOp;
     /*
      * The count used here represents the flush() time saveCount. This is as opposed to the
      * DisplayList record time, or defer() time values (which are RestoreToCountOp's mCount, and
@@ -251,7 +255,8 @@
  * Either will act as a barrier to draw operation reordering, as we want to play back layer
  * save/restore and complex canvas modifications (including save/restore) in order.
  */
-void DeferredDisplayList::addRestoreToCount(OpenGLRenderer& renderer, int newSaveCount) {
+void DeferredDisplayList::addRestoreToCount(OpenGLRenderer& renderer, StateOp* op,
+        int newSaveCount) {
     DEFER_LOGD("%p addRestoreToCount %d", this, newSaveCount);
 
     if (recordingComplexClip() && newSaveCount <= mComplexClipStackStart) {
@@ -265,7 +270,7 @@
 
     while (!mSaveStack.isEmpty() && mSaveStack.top() >= newSaveCount) mSaveStack.pop();
 
-    storeRestoreToCountBarrier(mSaveStack.size() + 1);
+    storeRestoreToCountBarrier(renderer, op, mSaveStack.size() + 1);
 }
 
 void DeferredDisplayList::addDrawOp(OpenGLRenderer& renderer, DrawOp* op) {
@@ -338,11 +343,15 @@
     resetBatchingState();
 }
 
-void DeferredDisplayList::storeRestoreToCountBarrier(int newSaveCount) {
+void DeferredDisplayList::storeRestoreToCountBarrier(OpenGLRenderer& renderer, StateOp* op,
+        int newSaveCount) {
     DEFER_LOGD("%p adding restore to count %d barrier, pos %d",
             this, newSaveCount, mBatches.size());
 
-    mBatches.add(new RestoreToCountBatch(newSaveCount));
+    // store displayState for the restore operation, as it may be associated with a saveLayer that
+    // doesn't have kClip_SaveFlag set
+    renderer.storeDisplayState(op->state, getStateOpDeferFlags());
+    mBatches.add(new RestoreToCountBatch(op, newSaveCount));
     resetBatchingState();
 }
 
diff --git a/libs/hwui/DeferredDisplayList.h b/libs/hwui/DeferredDisplayList.h
index 2afc8c1..3e450da 100644
--- a/libs/hwui/DeferredDisplayList.h
+++ b/libs/hwui/DeferredDisplayList.h
@@ -65,7 +65,7 @@
     void addClip(OpenGLRenderer& renderer, ClipOp* op);
     void addSaveLayer(OpenGLRenderer& renderer, SaveLayerOp* op, int newSaveCount);
     void addSave(OpenGLRenderer& renderer, SaveOp* op, int newSaveCount);
-    void addRestoreToCount(OpenGLRenderer& renderer, int newSaveCount);
+    void addRestoreToCount(OpenGLRenderer& renderer, StateOp* op, int newSaveCount);
 
     /**
      * Add a draw op into the DeferredDisplayList, reordering as needed (for performance) if
@@ -81,7 +81,7 @@
     void resetBatchingState();
 
     void storeStateOpBarrier(OpenGLRenderer& renderer, StateOp* op);
-    void storeRestoreToCountBarrier(int newSaveCount);
+    void storeRestoreToCountBarrier(OpenGLRenderer& renderer, StateOp* op, int newSaveCount);
 
     bool recordingComplexClip() const { return mComplexClipStackStart >= 0; }
 
diff --git a/libs/hwui/DisplayListOp.h b/libs/hwui/DisplayListOp.h
index 4f2db69..9c3d058 100644
--- a/libs/hwui/DisplayListOp.h
+++ b/libs/hwui/DisplayListOp.h
@@ -117,7 +117,7 @@
         applyState(replayStruct.mRenderer, saveCount);
     }
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) = 0;
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const = 0;
 };
 
 class DrawOp : public DisplayListOp {
@@ -223,7 +223,7 @@
         deferStruct.mDeferredList.addSave(deferStruct.mRenderer, this, newSaveCount);
     }
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.save(mFlags);
     }
 
@@ -251,11 +251,12 @@
             : mCount(count) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level) {
-        deferStruct.mDeferredList.addRestoreToCount(deferStruct.mRenderer, saveCount + mCount);
+        deferStruct.mDeferredList.addRestoreToCount(deferStruct.mRenderer,
+                this, saveCount + mCount);
         deferStruct.mRenderer.restoreToCount(saveCount + mCount);
     }
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.restoreToCount(saveCount + mCount);
     }
 
@@ -293,7 +294,7 @@
                 mAlpha, mMode, mFlags);
     }
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.saveLayer(mArea.left, mArea.top, mArea.right, mArea.bottom, mAlpha, mMode, mFlags);
     }
 
@@ -330,7 +331,7 @@
     TranslateOp(float dx, float dy)
             : mDx(dx), mDy(dy) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.translate(mDx, mDy);
     }
 
@@ -350,7 +351,7 @@
     RotateOp(float degrees)
             : mDegrees(degrees) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.rotate(mDegrees);
     }
 
@@ -369,7 +370,7 @@
     ScaleOp(float sx, float sy)
             : mSx(sx), mSy(sy) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.scale(mSx, mSy);
     }
 
@@ -389,7 +390,7 @@
     SkewOp(float sx, float sy)
             : mSx(sx), mSy(sy) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.skew(mSx, mSy);
     }
 
@@ -409,7 +410,7 @@
     SetMatrixOp(SkMatrix* matrix)
             : mMatrix(matrix) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.setMatrix(mMatrix);
     }
 
@@ -428,7 +429,7 @@
     ConcatMatrixOp(SkMatrix* matrix)
             : mMatrix(matrix) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.concatMatrix(mMatrix);
     }
 
@@ -471,7 +472,7 @@
     ClipRectOp(float left, float top, float right, float bottom, SkRegion::Op op)
             : ClipOp(op), mArea(left, top, right, bottom) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.clipRect(mArea.left, mArea.top, mArea.right, mArea.bottom, mOp);
     }
 
@@ -500,7 +501,7 @@
     ClipPathOp(SkPath* path, SkRegion::Op op)
             : ClipOp(op), mPath(path) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.clipPath(mPath, mOp);
     }
 
@@ -521,7 +522,7 @@
     ClipRegionOp(SkRegion* region, SkRegion::Op op)
             : ClipOp(op), mRegion(region) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.clipRegion(mRegion, mOp);
     }
 
@@ -540,7 +541,7 @@
 
 class ResetShaderOp : public StateOp {
 public:
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.resetShader();
     }
 
@@ -555,7 +556,7 @@
 public:
     SetupShaderOp(SkiaShader* shader)
             : mShader(shader) {}
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.setupShader(mShader);
     }
 
@@ -571,7 +572,7 @@
 
 class ResetColorFilterOp : public StateOp {
 public:
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.resetColorFilter();
     }
 
@@ -587,7 +588,7 @@
     SetupColorFilterOp(SkiaColorFilter* colorFilter)
             : mColorFilter(colorFilter) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.setupColorFilter(mColorFilter);
     }
 
@@ -603,7 +604,7 @@
 
 class ResetShadowOp : public StateOp {
 public:
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.resetShadow();
     }
 
@@ -619,7 +620,7 @@
     SetupShadowOp(float radius, float dx, float dy, int color)
             : mRadius(radius), mDx(dx), mDy(dy), mColor(color) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.setupShadow(mRadius, mDx, mDy, mColor);
     }
 
@@ -638,7 +639,7 @@
 
 class ResetPaintFilterOp : public StateOp {
 public:
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.resetPaintFilter();
     }
 
@@ -654,7 +655,7 @@
     SetupPaintFilterOp(int clearBits, int setBits)
             : mClearBits(clearBits), mSetBits(setBits) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) const {
         renderer.setupPaintFilter(mClearBits, mSetBits);
     }
 
diff --git a/libs/hwui/OpenGLRenderer.cpp b/libs/hwui/OpenGLRenderer.cpp
index 2903bcd..aeabe366 100644
--- a/libs/hwui/OpenGLRenderer.cpp
+++ b/libs/hwui/OpenGLRenderer.cpp
@@ -696,7 +696,10 @@
     }
 
     if (restoreLayer) {
+        endMark(); // Savelayer
+        startMark("ComposeLayer");
         composeLayer(current, previous);
+        endMark();
     }
 
     return restoreClip;
@@ -874,6 +877,7 @@
     mSnapshot->flags |= Snapshot::kFlagIsLayer;
     mSnapshot->layer = layer;
 
+    startMark("SaveLayer");
     if (fboLayer) {
         return createFboLayer(layer, bounds, clip, previousFbo);
     } else {
@@ -1326,8 +1330,6 @@
         } else {
             state.mBounds.set(currentClip);
         }
-        state.mDrawModifiers = mDrawModifiers;
-        state.mAlpha = mSnapshot->alpha;
     }
 
     if (stateDeferFlags & kStateDeferFlag_Clip) {
@@ -1336,18 +1338,18 @@
         state.mClip.setEmpty();
     }
 
-    // transform always deferred
+    // Transform, drawModifiers, and alpha always deferred, since they are used by state operations
+    // (Note: saveLayer/restore use colorFilter and alpha, so we just save restore everything)
     state.mMatrix.load(currentMatrix);
+    state.mDrawModifiers = mDrawModifiers;
+    state.mAlpha = mSnapshot->alpha;
     return false;
 }
 
-void OpenGLRenderer::restoreDisplayState(const DeferredDisplayState& state, int stateDeferFlags) {
+void OpenGLRenderer::restoreDisplayState(const DeferredDisplayState& state) {
     currentTransform().load(state.mMatrix);
-
-    if (stateDeferFlags & kStateDeferFlag_Draw) {
-        mDrawModifiers = state.mDrawModifiers;
-        mSnapshot->alpha = state.mAlpha;
-    }
+    mDrawModifiers = state.mDrawModifiers;
+    mSnapshot->alpha = state.mAlpha;
 
     if (!state.mClip.isEmpty()) {
         mSnapshot->setClip(state.mClip.left, state.mClip.top, state.mClip.right, state.mClip.bottom);
diff --git a/libs/hwui/OpenGLRenderer.h b/libs/hwui/OpenGLRenderer.h
index b17bc3f..04a47fc 100644
--- a/libs/hwui/OpenGLRenderer.h
+++ b/libs/hwui/OpenGLRenderer.h
@@ -278,7 +278,7 @@
     SkPaint* filterPaint(SkPaint* paint);
 
     bool storeDisplayState(DeferredDisplayState& state, int stateDeferFlags);
-    void restoreDisplayState(const DeferredDisplayState& state, int stateDeferFlags);
+    void restoreDisplayState(const DeferredDisplayState& state);
 
     const DrawModifiers& getDrawModifiers() { return mDrawModifiers; }
     void setDrawModifiers(const DrawModifiers& drawModifiers) { mDrawModifiers = drawModifiers; }
