Move DeferredDisplayState out of ops

bug:9969358

Instead of storing DeferredDisplayState within an op (thus forcing ops
to be tied to a single state instance), associate each op with a new
state at DeferredDisplayList insertion time.

Now, DisplayLists (and the ops within) can be reused in a single
DeferredDisplayList draw call, as ops will use different state
instances at different points in the frame.

Change-Id: I525ab2abe0c3883679f2fa00b219b293e9ec53d9
diff --git a/libs/hwui/DeferredDisplayList.cpp b/libs/hwui/DeferredDisplayList.cpp
index 7ce15c5..fd9257a 100644
--- a/libs/hwui/DeferredDisplayList.cpp
+++ b/libs/hwui/DeferredDisplayList.cpp
@@ -66,23 +66,23 @@
 
     virtual ~DrawBatch() { mOps.clear(); }
 
-    virtual void add(DrawOp* op, bool opaqueOverBounds) {
+    virtual void add(DrawOp* op, const DeferredDisplayState* state, bool opaqueOverBounds) {
         // NOTE: ignore empty bounds special case, since we don't merge across those ops
-        mBounds.unionWith(op->state.mBounds);
+        mBounds.unionWith(state->mBounds);
         mAllOpsOpaque &= opaqueOverBounds;
-        mOps.add(op);
+        mOps.add(OpStatePair(op, state));
     }
 
-    bool intersects(Rect& rect) {
+    bool intersects(const Rect& rect) {
         if (!rect.intersects(mBounds)) return false;
 
         for (unsigned int i = 0; i < mOps.size(); i++) {
-            if (rect.intersects(mOps[i]->state.mBounds)) {
+            if (rect.intersects(mOps[i].state->mBounds)) {
 #if DEBUG_DEFER
-                DEFER_LOGD("op intersects with op %p with bounds %f %f %f %f:", mOps[i],
-                        mOps[i]->state.mBounds.left, mOps[i]->state.mBounds.top,
-                        mOps[i]->state.mBounds.right, mOps[i]->state.mBounds.bottom);
-                mOps[i]->output(2);
+                DEFER_LOGD("op intersects with op %p with bounds %f %f %f %f:", mOps[i].op,
+                        mOps[i].state->mBounds.left, mOps[i].state->mBounds.top,
+                        mOps[i].state->mBounds.right, mOps[i].state->mBounds.bottom);
+                mOps[i].op->output(2);
 #endif
                 return true;
             }
@@ -97,9 +97,9 @@
         status_t status = DrawGlInfo::kStatusDone;
         DisplayListLogBuffer& logBuffer = DisplayListLogBuffer::getInstance();
         for (unsigned int i = 0; i < mOps.size(); i++) {
-            DrawOp* op = mOps[i];
-
-            renderer.restoreDisplayState(op->state);
+            DrawOp* op = mOps[i].op;
+            const DeferredDisplayState* state = mOps[i].state;
+            renderer.restoreDisplayState(*state);
 
 #if DEBUG_DISPLAY_LIST_OPS_AS_EVENTS
             renderer.eventMark(op->name());
@@ -108,7 +108,7 @@
             status |= op->applyDraw(renderer, dirty);
 
 #if DEBUG_MERGE_BEHAVIOR
-            Rect& bounds = mOps[i]->state.mBounds;
+            const Rect& bounds = state->mBounds;
             int batchColor = 0x1f000000;
             if (getBatchId() & 0x1) batchColor |= 0x0000ff;
             if (getBatchId() & 0x2) batchColor |= 0x00ff00;
@@ -127,7 +127,7 @@
 
         Region uncovered(android::Rect(bounds.left, bounds.top, bounds.right, bounds.bottom));
         for (unsigned int i = 0; i < mOps.size(); i++) {
-            Rect &r = mOps[i]->state.mBounds;
+            const Rect &r = mOps[i].state->mBounds;
             uncovered.subtractSelf(android::Rect(r.left, r.top, r.right, r.bottom));
         }
         return uncovered.isEmpty();
@@ -138,7 +138,7 @@
     inline int count() const { return mOps.size(); }
 
 protected:
-    Vector<DrawOp*> mOps;
+    Vector<OpStatePair> mOps;
     Rect mBounds; // union of bounds of contained ops
 private:
     bool mAllOpsOpaque;
@@ -184,19 +184,19 @@
      * False positives can lead to information from the paints of subsequent merged operations being
      * dropped, so we make simplifying qualifications on the ops that can merge, per op type.
      */
-    bool canMergeWith(DrawOp* op) {
+    bool canMergeWith(const DrawOp* op, const DeferredDisplayState* state) {
         bool isTextBatch = getBatchId() == DeferredDisplayList::kOpBatch_Text ||
                 getBatchId() == DeferredDisplayList::kOpBatch_ColorText;
 
         // Overlapping other operations is only allowed for text without shadow. For other ops,
         // multiDraw isn't guaranteed to overdraw correctly
-        if (!isTextBatch || op->state.mDrawModifiers.mHasShadow) {
-            if (intersects(op->state.mBounds)) return false;
+        if (!isTextBatch || state->mDrawModifiers.mHasShadow) {
+            if (intersects(state->mBounds)) return false;
         }
-        const DeferredDisplayState& lhs = op->state;
-        const DeferredDisplayState& rhs = mOps[0]->state;
+        const DeferredDisplayState* lhs = state;
+        const DeferredDisplayState* rhs = mOps[0].state;
 
-        if (NEQ_FALPHA(lhs.mAlpha, rhs.mAlpha)) return false;
+        if (NEQ_FALPHA(lhs->mAlpha, rhs->mAlpha)) return false;
 
         /* Clipping compatibility check
          *
@@ -204,9 +204,9 @@
          * clip for that side.
          */
         const int currentFlags = mClipSideFlags;
-        const int newFlags = op->state.mClipSideFlags;
+        const int newFlags = state->mClipSideFlags;
         if (currentFlags != kClipSide_None || newFlags != kClipSide_None) {
-            const Rect& opBounds = op->state.mBounds;
+            const Rect& opBounds = state->mBounds;
             float boundsDelta = mBounds.left - opBounds.left;
             if (!checkSide(currentFlags, newFlags, kClipSide_Left, boundsDelta)) return false;
             boundsDelta = mBounds.top - opBounds.top;
@@ -220,9 +220,9 @@
         }
 
         // if paints are equal, then modifiers + paint attribs don't need to be compared
-        if (op->mPaint == mOps[0]->mPaint) return true;
+        if (op->mPaint == mOps[0].op->mPaint) return true;
 
-        if (op->getPaintAlpha() != mOps[0]->getPaintAlpha()) return false;
+        if (op->getPaintAlpha() != mOps[0].op->getPaintAlpha()) return false;
 
         /* Draw Modifiers compatibility check
          *
@@ -236,8 +236,8 @@
          *
          * These ignore cases prevent us from simply memcmp'ing the drawModifiers
          */
-        const DrawModifiers& lhsMod = lhs.mDrawModifiers;
-        const DrawModifiers& rhsMod = rhs.mDrawModifiers;
+        const DrawModifiers& lhsMod = lhs->mDrawModifiers;
+        const DrawModifiers& rhsMod = rhs->mDrawModifiers;
         if (lhsMod.mShader != rhsMod.mShader) return false;
         if (lhsMod.mColorFilter != rhsMod.mColorFilter) return false;
 
@@ -249,15 +249,15 @@
         return true;
     }
 
-    virtual void add(DrawOp* op, bool opaqueOverBounds) {
-        DrawBatch::add(op, opaqueOverBounds);
+    virtual void add(DrawOp* op, DeferredDisplayState* state, bool opaqueOverBounds) {
+        DrawBatch::add(op, state, opaqueOverBounds);
 
-        const int newClipSideFlags = op->state.mClipSideFlags;
+        const int newClipSideFlags = state->mClipSideFlags;
         mClipSideFlags |= newClipSideFlags;
-        if (newClipSideFlags & kClipSide_Left) mClipRect.left = op->state.mClip.left;
-        if (newClipSideFlags & kClipSide_Top) mClipRect.top = op->state.mClip.top;
-        if (newClipSideFlags & kClipSide_Right) mClipRect.right = op->state.mClip.right;
-        if (newClipSideFlags & kClipSide_Bottom) mClipRect.bottom = op->state.mClip.bottom;
+        if (newClipSideFlags & kClipSide_Left) mClipRect.left = state->mClip.left;
+        if (newClipSideFlags & kClipSide_Top) mClipRect.top = state->mClip.top;
+        if (newClipSideFlags & kClipSide_Right) mClipRect.right = state->mClip.right;
+        if (newClipSideFlags & kClipSide_Bottom) mClipRect.bottom = state->mClip.bottom;
     }
 
     virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int index) {
@@ -271,7 +271,7 @@
         // clipping in the merged case is done ahead of time since all ops share the clip (if any)
         renderer.setupMergedMultiDraw(mClipSideFlags ? &mClipRect : NULL);
 
-        DrawOp* op = mOps[0];
+        DrawOp* op = mOps[0].op;
         DisplayListLogBuffer& buffer = DisplayListLogBuffer::getInstance();
         buffer.writeCommand(0, "multiDraw");
         buffer.writeCommand(1, op->name());
@@ -297,11 +297,11 @@
 class StateOpBatch : public Batch {
 public:
     // creates a single operation batch
-    StateOpBatch(StateOp* op) : mOp(op) {}
+    StateOpBatch(const StateOp* op, const DeferredDisplayState* state) : mOp(op), mState(state) {}
 
     virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int index) {
         DEFER_LOGD("replaying state op batch %p", this);
-        renderer.restoreDisplayState(mOp->state);
+        renderer.restoreDisplayState(*mState);
 
         // use invalid save count because it won't be used at flush time - RestoreToCountOp is the
         // only one to use it, and we don't use that class at flush time, instead calling
@@ -313,16 +313,18 @@
 
 private:
     const StateOp* mOp;
+    const DeferredDisplayState* mState;
 };
 
 class RestoreToCountBatch : public Batch {
 public:
-    RestoreToCountBatch(StateOp* op, int restoreCount) : mOp(op), mRestoreCount(restoreCount) {}
+    RestoreToCountBatch(const StateOp* op, const DeferredDisplayState* state, int restoreCount) :
+            mOp(op), mState(state), mRestoreCount(restoreCount) {}
 
     virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int index) {
         DEFER_LOGD("batch %p restoring to count %d", this, mRestoreCount);
 
-        renderer.restoreDisplayState(mOp->state);
+        renderer.restoreDisplayState(*mState);
         renderer.restoreToCount(mRestoreCount);
         return DrawGlInfo::kStatusDone;
     }
@@ -330,6 +332,8 @@
 private:
     // we use the state storage for the RestoreToCountOp, but don't replay the op itself
     const StateOp* mOp;
+    const DeferredDisplayState* mState;
+
     /*
      * The count used here represents the flush() time saveCount. This is as opposed to the
      * DisplayList record time, or defer() time values (which are RestoreToCountOp's mCount, and
@@ -480,12 +484,27 @@
 }
 
 void DeferredDisplayList::addDrawOp(OpenGLRenderer& renderer, DrawOp* op) {
-    if (renderer.storeDisplayState(op->state, getDrawOpDeferFlags())) {
+    /* 1: op calculates local bounds */
+    DeferredDisplayState* const state = createState();
+    if (op->getLocalBounds(renderer.getDrawModifiers(), state->mBounds)) {
+        if (state->mBounds.isEmpty()) {
+            // valid empty bounds, don't bother deferring
+            tryRecycleState(state);
+            return;
+        }
+    } else {
+        state->mBounds.setEmpty();
+    }
+
+    /* 2: renderer calculates global bounds + stores state */
+    if (renderer.storeDisplayState(*state, getDrawOpDeferFlags())) {
+        tryRecycleState(state);
         return; // quick rejected
     }
 
+    /* 3: ask op for defer info, given renderer state */
     DeferInfo deferInfo;
-    op->onDefer(renderer, deferInfo);
+    op->onDefer(renderer, deferInfo, *state);
 
     // complex clip has a complex set of expectations on the renderer state - for now, avoid taking
     // the merge path in those cases
@@ -493,8 +512,8 @@
     deferInfo.opaqueOverBounds &= !recordingComplexClip() && mSaveStack.isEmpty();
 
     if (CC_LIKELY(mAvoidOverdraw) && mBatches.size() &&
-            op->state.mClipSideFlags != kClipSide_ConservativeFull &&
-            deferInfo.opaqueOverBounds && op->state.mBounds.contains(mBounds)) {
+            state->mClipSideFlags != kClipSide_ConservativeFull &&
+            deferInfo.opaqueOverBounds && state->mBounds.contains(mBounds)) {
         // avoid overdraw by resetting drawing state + discarding drawing ops
         discardDrawingBatches(mBatches.size() - 1);
         resetBatchingState();
@@ -503,7 +522,7 @@
     if (CC_UNLIKELY(renderer.getCaches().drawReorderDisabled)) {
         // TODO: elegant way to reuse batches?
         DrawBatch* b = new DrawBatch(deferInfo);
-        b->add(op, deferInfo.opaqueOverBounds);
+        b->add(op, state, deferInfo.opaqueOverBounds);
         mBatches.add(b);
         return;
     }
@@ -515,10 +534,10 @@
     // (eventually, should be similar shader)
     int insertBatchIndex = mBatches.size();
     if (!mBatches.isEmpty()) {
-        if (op->state.mBounds.isEmpty()) {
+        if (state->mBounds.isEmpty()) {
             // don't know the bounds for op, so add to last batch and start from scratch on next op
             DrawBatch* b = new DrawBatch(deferInfo);
-            b->add(op, deferInfo.opaqueOverBounds);
+            b->add(op, state, deferInfo.opaqueOverBounds);
             mBatches.add(b);
             resetBatchingState();
 #if DEBUG_DEFER
@@ -531,7 +550,7 @@
         if (deferInfo.mergeable) {
             // Try to merge with any existing batch with same mergeId.
             if (mMergingBatches[deferInfo.batchId].get(deferInfo.mergeId, targetBatch)) {
-                if (!((MergingDrawBatch*) targetBatch)->canMergeWith(op)) {
+                if (!((MergingDrawBatch*) targetBatch)->canMergeWith(op, state)) {
                     targetBatch = NULL;
                 }
             }
@@ -554,14 +573,14 @@
                     if (!targetBatch) break; // found insert position, quit
                 }
 
-                if (overBatch->intersects(op->state.mBounds)) {
+                if (overBatch->intersects(state->mBounds)) {
                     // NOTE: it may be possible to optimize for special cases where two operations
                     // of the same batch/paint could swap order, such as with a non-mergeable
                     // (clipped) and a mergeable text operation
                     targetBatch = NULL;
 #if DEBUG_DEFER
-                    DEFER_LOGD("op couldn't join batch %d, was intersected by batch %d",
-                            targetIndex, i);
+                    DEFER_LOGD("op couldn't join batch %p, was intersected by batch %d",
+                            targetBatch, i);
                     op->output(2);
 #endif
                     break;
@@ -586,14 +605,15 @@
         mBatches.insertAt(targetBatch, insertBatchIndex);
     }
 
-    targetBatch->add(op, deferInfo.opaqueOverBounds);
+    targetBatch->add(op, state, deferInfo.opaqueOverBounds);
 }
 
 void DeferredDisplayList::storeStateOpBarrier(OpenGLRenderer& renderer, StateOp* op) {
     DEFER_LOGD("%p adding state op barrier at pos %d", this, mBatches.size());
 
-    renderer.storeDisplayState(op->state, getStateOpDeferFlags());
-    mBatches.add(new StateOpBatch(op));
+    DeferredDisplayState* state = createState();
+    renderer.storeDisplayState(*state, getStateOpDeferFlags());
+    mBatches.add(new StateOpBatch(op, state));
     resetBatchingState();
 }
 
@@ -604,8 +624,9 @@
 
     // store displayState for the restore operation, as it may be associated with a saveLayer that
     // doesn't have kClip_SaveFlag set
-    renderer.storeDisplayState(op->state, getStateOpDeferFlags());
-    mBatches.add(new RestoreToCountBatch(op, newSaveCount));
+    DeferredDisplayState* state = createState();
+    renderer.storeDisplayState(*state, getStateOpDeferFlags());
+    mBatches.add(new RestoreToCountBatch(op, state, newSaveCount));
     resetBatchingState();
 }
 
diff --git a/libs/hwui/DeferredDisplayList.h b/libs/hwui/DeferredDisplayList.h
index 1ef0152..3dcbd0b 100644
--- a/libs/hwui/DeferredDisplayList.h
+++ b/libs/hwui/DeferredDisplayList.h
@@ -18,11 +18,13 @@
 #define ANDROID_HWUI_DEFERRED_DISPLAY_LIST_H
 
 #include <utils/Errors.h>
+#include <utils/LinearAllocator.h>
 #include <utils/Vector.h>
+#include <utils/TinyHashMap.h>
 
 #include "Matrix.h"
+#include "OpenGLRenderer.h"
 #include "Rect.h"
-#include "utils/TinyHashMap.h"
 
 class SkBitmap;
 
@@ -34,6 +36,8 @@
 class SaveOp;
 class SaveLayerOp;
 class StateOp;
+
+class DeferredDisplayState;
 class OpenGLRenderer;
 
 class Batch;
@@ -42,6 +46,38 @@
 
 typedef const void* mergeid_t;
 
+class DeferredDisplayState {
+public:
+    /** static void* operator new(size_t size); PURPOSELY OMITTED **/
+    static void* operator new(size_t size, LinearAllocator& allocator) {
+        return allocator.alloc(size);
+    }
+
+    // global op bounds, mapped by mMatrix to be in screen space coordinates, clipped
+    Rect mBounds;
+
+    // the below are set and used by the OpenGLRenderer at record and deferred playback
+    bool mClipValid;
+    Rect mClip;
+    int mClipSideFlags; // specifies which sides of the bounds are clipped, unclipped if cleared
+    bool mClipped;
+    mat4 mMatrix;
+    DrawModifiers mDrawModifiers;
+    float mAlpha;
+};
+
+class OpStatePair {
+public:
+    OpStatePair()
+            : op(NULL), state(NULL) {}
+    OpStatePair(DrawOp* newOp, const DeferredDisplayState* newState)
+            : op(newOp), state(newState) {}
+    OpStatePair(const OpStatePair& other)
+            : op(other.op), state(other.state) {}
+    DrawOp* op;
+    const DeferredDisplayState* state;
+};
+
 class DeferredDisplayList {
 public:
     DeferredDisplayList(const Rect& bounds, bool avoidOverdraw = true) :
@@ -84,6 +120,14 @@
     void addDrawOp(OpenGLRenderer& renderer, DrawOp* op);
 
 private:
+    DeferredDisplayState* createState() {
+        return new (mAllocator) DeferredDisplayState();
+    }
+
+    void tryRecycleState(DeferredDisplayState* state) {
+        mAllocator.rewindIfLastAlloc(state, sizeof(DeferredDisplayState));
+    }
+
     /**
      * Resets the batching back-pointers, creating a barrier in the operation stream so that no ops
      * added in the future will be inserted into a batch that already exist.
@@ -131,6 +175,8 @@
      * collide, which avoids the need to resolve mergeid collisions.
      */
     TinyHashMap<mergeid_t, DrawBatch*> mMergingBatches[kOpBatch_Count];
+
+    LinearAllocator mAllocator;
 };
 
 /**
diff --git a/libs/hwui/DisplayList.h b/libs/hwui/DisplayList.h
index 194be9e..1cd5f1c 100644
--- a/libs/hwui/DisplayList.h
+++ b/libs/hwui/DisplayList.h
@@ -26,6 +26,7 @@
 
 #include <private/hwui/DrawGlInfo.h>
 
+#include <utils/LinearAllocator.h>
 #include <utils/RefBase.h>
 #include <utils/SortedVector.h>
 #include <utils/String8.h>
@@ -35,8 +36,6 @@
 
 #include <androidfw/ResourceTypes.h>
 
-#include "utils/LinearAllocator.h"
-
 #include "Debug.h"
 
 #define TRANSLATION 0x0001
@@ -114,7 +113,6 @@
 
     void initFromDisplayListRenderer(const DisplayListRenderer& recorder, bool reusing = false);
 
-
     void defer(DeferStateStruct& deferStruct, const int level);
     void replay(ReplayStateStruct& replayStruct, const int level);
 
diff --git a/libs/hwui/DisplayListOp.h b/libs/hwui/DisplayListOp.h
index 42e11d0..a17942e 100644
--- a/libs/hwui/DisplayListOp.h
+++ b/libs/hwui/DisplayListOp.h
@@ -82,15 +82,6 @@
     // NOTE: it would be nice to declare constants and overriding the implementation in each op to
     // point at the constants, but that seems to require a .cpp file
     virtual const char* name() = 0;
-
-    /**
-     * Stores the relevant canvas state of the object between deferral and replay (if the canvas
-     * state supports being stored) See OpenGLRenderer::simpleClipAndState()
-     *
-     * TODO: don't reserve space for StateOps that won't be deferred
-     */
-    DeferredDisplayState state;
-
 };
 
 class StateOp : public DisplayListOp {
@@ -129,14 +120,6 @@
             return;
         }
 
-        if (getLocalBounds(state.mBounds)) {
-            // valid empty bounds, don't bother deferring
-            if (state.mBounds.isEmpty()) return;
-        } else {
-            // empty bounds signify bounds can't be calculated
-            state.mBounds.setEmpty();
-        }
-
         deferStruct.mDeferredList.addDrawOp(deferStruct.mRenderer, this);
     }
 
@@ -159,11 +142,11 @@
      * reducing which operations are tagged as mergeable.
      */
     virtual status_t multiDraw(OpenGLRenderer& renderer, Rect& dirty,
-            const Vector<DrawOp*>& ops, const Rect& bounds) {
+            const Vector<OpStatePair>& ops, const Rect& bounds) {
         status_t status = DrawGlInfo::kStatusDone;
         for (unsigned int i = 0; i < ops.size(); i++) {
-            renderer.restoreDisplayState(ops[i]->state, true);
-            status |= ops[i]->applyDraw(renderer, dirty);
+            renderer.restoreDisplayState(*(ops[i].state), true);
+            status |= ops[i].op->applyDraw(renderer, dirty);
         }
         return status;
     }
@@ -178,20 +161,23 @@
      *
      * if a subclass can set deferInfo.mergeable to true, it should implement multiDraw()
      */
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {}
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {}
 
     /**
      * Query the conservative, local bounds (unmapped) bounds of the op.
      *
      * returns true if bounds exist
      */
-    virtual bool getLocalBounds(Rect& localBounds) { return false; }
+    virtual bool getLocalBounds(const DrawModifiers& drawModifiers, Rect& localBounds) {
+        return false;
+    }
 
     // TODO: better refine localbounds usage
     void setQuickRejected(bool quickRejected) { mQuickRejected = quickRejected; }
     bool getQuickRejected() { return mQuickRejected; }
 
-    inline int getPaintAlpha() {
+    inline int getPaintAlpha() const {
         return OpenGLRenderer::getAlphaDirect(mPaint);
     }
 
@@ -208,7 +194,7 @@
 
     // Helper method for determining op opaqueness. Assumes op fills its bounds in local
     // coordinates, and that paint's alpha is used
-    inline bool isOpaqueOverBounds() {
+    inline bool isOpaqueOverBounds(const DeferredDisplayState& state) {
         // ensure that local bounds cover mapped bounds
         if (!state.mMatrix.isSimple()) return false;
 
@@ -251,12 +237,13 @@
     // default empty constructor for bounds, to be overridden in child constructor body
     DrawBoundedOp(SkPaint* paint): DrawOp(paint) { }
 
-    bool getLocalBounds(Rect& localBounds) {
+    bool getLocalBounds(const DrawModifiers& drawModifiers, Rect& localBounds) {
         localBounds.set(mLocalBounds);
-        if (state.mDrawModifiers.mHasShadow) {
+        if (drawModifiers.mHasShadow) {
+            // TODO: inspect paint's looper directly
             Rect shadow(mLocalBounds);
-            shadow.translate(state.mDrawModifiers.mShadowDx, state.mDrawModifiers.mShadowDy);
-            shadow.outset(state.mDrawModifiers.mShadowRadius);
+            shadow.translate(drawModifiers.mShadowDx, drawModifiers.mShadowDy);
+            shadow.outset(drawModifiers.mShadowRadius);
             localBounds.unionWith(shadow);
         }
         return true;
@@ -777,8 +764,10 @@
      * the current layer, if any.
      */
     virtual status_t multiDraw(OpenGLRenderer& renderer, Rect& dirty,
-            const Vector<DrawOp*>& ops, const Rect& bounds) {
-        renderer.restoreDisplayState(state, true); // restore all but the clip
+            const Vector<OpStatePair>& ops, const Rect& bounds) {
+        const DeferredDisplayState& firstState = *(ops[0].state);
+        renderer.restoreDisplayState(firstState, true); // restore all but the clip
+
         TextureVertex vertices[6 * ops.size()];
         TextureVertex* vertex = &vertices[0];
 
@@ -788,14 +777,15 @@
         // TODO: manually handle rect clip for bitmaps by adjusting texCoords per op,
         // and allowing them to be merged in getBatchId()
         for (unsigned int i = 0; i < ops.size(); i++) {
-            const Rect& opBounds = ops[i]->state.mBounds;
+            const DeferredDisplayState& state = *(ops[i].state);
+            const Rect& opBounds = state.mBounds;
             // When we reach multiDraw(), the matrix can be either
             // pureTranslate or simple (translate and/or scale).
             // If the matrix is not pureTranslate, then we have a scale
-            if (!ops[i]->state.mMatrix.isPureTranslate()) transformed = true;
+            if (state.mMatrix.isPureTranslate()) transformed = true;
 
             Rect texCoords(0, 0, 1, 1);
-            ((DrawBitmapOp*) ops[i])->mUvMapper.map(texCoords);
+            ((DrawBitmapOp*) ops[i].op)->mUvMapper.map(texCoords);
 
             SET_TEXTURE(vertex, opBounds, bounds, texCoords, left, top);
             SET_TEXTURE(vertex, opBounds, bounds, texCoords, right, top);
@@ -806,8 +796,7 @@
             SET_TEXTURE(vertex, opBounds, bounds, texCoords, right, bottom);
 
             if (hasLayer) {
-                const Rect& dirty = ops[i]->state.mBounds;
-                renderer.dirtyLayer(dirty.left, dirty.top, dirty.right, dirty.bottom);
+                renderer.dirtyLayer(opBounds.left, opBounds.top, opBounds.right, opBounds.bottom);
             }
         }
 
@@ -821,7 +810,8 @@
 
     virtual const char* name() { return "DrawBitmap"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Bitmap;
         deferInfo.mergeId = getAtlasEntry() ? (mergeid_t) mEntry->getMergeId() : (mergeid_t) mBitmap;
 
@@ -861,7 +851,8 @@
 
     virtual const char* name() { return "DrawBitmapMatrix"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Bitmap;
     }
 
@@ -890,7 +881,8 @@
 
     virtual const char* name() { return "DrawBitmapRect"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Bitmap;
     }
 
@@ -915,7 +907,8 @@
 
     virtual const char* name() { return "DrawBitmapData"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Bitmap;
     }
 };
@@ -939,7 +932,8 @@
 
     virtual const char* name() { return "DrawBitmapMesh"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Bitmap;
     }
 
@@ -989,15 +983,16 @@
      * is also responsible for dirtying the current layer, if any.
      */
     virtual status_t multiDraw(OpenGLRenderer& renderer, Rect& dirty,
-            const Vector<DrawOp*>& ops, const Rect& bounds) {
-        renderer.restoreDisplayState(state, true);
+            const Vector<OpStatePair>& ops, const Rect& bounds) {
+        const DeferredDisplayState& firstState = *(ops[0].state);
+        renderer.restoreDisplayState(firstState, true); // restore all but the clip
 
         // Batches will usually contain a small number of items so it's
         // worth performing a first iteration to count the exact number
         // of vertices we need in the new mesh
         uint32_t totalVertices = 0;
         for (unsigned int i = 0; i < ops.size(); i++) {
-            totalVertices += ((DrawPatchOp*) ops[i])->getMesh(renderer)->verticesCount;
+            totalVertices += ((DrawPatchOp*) ops[i].op)->getMesh(renderer)->verticesCount;
         }
 
         const bool hasLayer = renderer.hasLayer();
@@ -1012,7 +1007,8 @@
         // enforces ops drawn by this function to have a pure translate or
         // identity matrix
         for (unsigned int i = 0; i < ops.size(); i++) {
-            DrawPatchOp* patchOp = (DrawPatchOp*) ops[i];
+            DrawPatchOp* patchOp = (DrawPatchOp*) ops[i].op;
+            const DeferredDisplayState* state = ops[i].state;
             const Patch* opMesh = patchOp->getMesh(renderer);
             uint32_t vertexCount = opMesh->verticesCount;
             if (vertexCount == 0) continue;
@@ -1020,9 +1016,9 @@
             // We use the bounds to know where to translate our vertices
             // Using patchOp->state.mBounds wouldn't work because these
             // bounds are clipped
-            const float tx = (int) floorf(patchOp->state.mMatrix.getTranslateX() +
+            const float tx = (int) floorf(state->mMatrix.getTranslateX() +
                     patchOp->mLocalBounds.left + 0.5f);
-            const float ty = (int) floorf(patchOp->state.mMatrix.getTranslateY() +
+            const float ty = (int) floorf(state->mMatrix.getTranslateY() +
                     patchOp->mLocalBounds.top + 0.5f);
 
             // Copy & transform all the vertices for the current operation
@@ -1074,12 +1070,13 @@
 
     virtual const char* name() { return "DrawPatch"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Patch;
         deferInfo.mergeId = getAtlasEntry() ? (mergeid_t) mEntry->getMergeId() : (mergeid_t) mBitmap;
         deferInfo.mergeable = state.mMatrix.isPureTranslate() &&
                 OpenGLRenderer::getXfermodeDirect(mPaint) == SkXfermode::kSrcOver_Mode;
-        deferInfo.opaqueOverBounds = isOpaqueOverBounds() && mBitmap->isOpaque();
+        deferInfo.opaqueOverBounds = isOpaqueOverBounds(state) && mBitmap->isOpaque();
     }
 
 private:
@@ -1119,7 +1116,7 @@
     DrawStrokableOp(float left, float top, float right, float bottom, SkPaint* paint)
             : DrawBoundedOp(left, top, right, bottom, paint) {};
 
-    bool getLocalBounds(Rect& localBounds) {
+    bool getLocalBounds(const DrawModifiers& drawModifiers, Rect& localBounds) {
         localBounds.set(mLocalBounds);
         if (mPaint && mPaint->getStyle() != SkPaint::kFill_Style) {
             localBounds.outset(strokeWidthOutset());
@@ -1127,7 +1124,8 @@
         return true;
     }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         if (mPaint->getPathEffect()) {
             deferInfo.batchId = DeferredDisplayList::kOpBatch_AlphaMaskTexture;
         } else {
@@ -1152,9 +1150,10 @@
         OP_LOG("Draw Rect "RECT_STRING, RECT_ARGS(mLocalBounds));
     }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
-        DrawStrokableOp::onDefer(renderer, deferInfo);
-        deferInfo.opaqueOverBounds = isOpaqueOverBounds() &&
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
+        DrawStrokableOp::onDefer(renderer, deferInfo, state);
+        deferInfo.opaqueOverBounds = isOpaqueOverBounds(state) &&
                 mPaint->getStyle() == SkPaint::kFill_Style;
     }
 
@@ -1177,7 +1176,8 @@
 
     virtual const char* name() { return "DrawRects"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = DeferredDisplayList::kOpBatch_Vertices;
     }
 
@@ -1289,7 +1289,8 @@
         return renderer.drawPath(mPath, getPaint(renderer));
     }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         SkPaint* paint = getPaint(renderer);
         renderer.getCaches().pathCache.precache(mPath, paint);
 
@@ -1324,7 +1325,8 @@
 
     virtual const char* name() { return "DrawLines"; }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         deferInfo.batchId = mPaint->isAntiAlias() ?
                 DeferredDisplayList::kOpBatch_AlphaVertices :
                 DeferredDisplayList::kOpBatch_Vertices;
@@ -1360,7 +1362,8 @@
         OP_LOG("Draw some text, %d bytes", mBytesCount);
     }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         SkPaint* paint = getPaint(renderer);
         FontRenderer& fontRenderer = renderer.getCaches().fontRenderer->getFontRenderer(paint);
         fontRenderer.precache(paint, mText, mCount, mat4::identity());
@@ -1425,7 +1428,8 @@
         memset(&mPrecacheTransform.data[0], 0xff, 16 * sizeof(float));
     }
 
-    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo) {
+    virtual void onDefer(OpenGLRenderer& renderer, DeferInfo& deferInfo,
+            const DeferredDisplayState& state) {
         SkPaint* paint = getPaint(renderer);
         FontRenderer& fontRenderer = renderer.getCaches().fontRenderer->getFontRenderer(paint);
         const mat4& transform = renderer.findBestFontTransform(state.mMatrix);
@@ -1448,19 +1452,20 @@
 
     virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty) {
         Rect bounds;
-        getLocalBounds(bounds);
+        getLocalBounds(renderer.getDrawModifiers(), bounds);
         return renderer.drawText(mText, mBytesCount, mCount, mX, mY,
                 mPositions, getPaint(renderer), mTotalAdvance, bounds);
     }
 
     virtual status_t multiDraw(OpenGLRenderer& renderer, Rect& dirty,
-            const Vector<DrawOp*>& ops, const Rect& bounds) {
+            const Vector<OpStatePair>& ops, const Rect& bounds) {
         status_t status = DrawGlInfo::kStatusDone;
         for (unsigned int i = 0; i < ops.size(); i++) {
+            const DeferredDisplayState& state = *(ops[i].state);
             DrawOpMode drawOpMode = (i == ops.size() - 1) ? kDrawOpMode_Flush : kDrawOpMode_Defer;
-            renderer.restoreDisplayState(ops[i]->state, true); // restore all but the clip
+            renderer.restoreDisplayState(state, true); // restore all but the clip
 
-            DrawTextOp& op = *((DrawTextOp*)ops[i]);
+            DrawTextOp& op = *((DrawTextOp*)ops[i].op);
             // quickReject() will not occure in drawText() so we can use mLocalBounds
             // directly, we do not need to account for shadow by calling getLocalBounds()
             status |= renderer.drawText(op.mText, op.mBytesCount, op.mCount, op.mX, op.mY,
diff --git a/libs/hwui/DisplayListRenderer.cpp b/libs/hwui/DisplayListRenderer.cpp
index 90dcf93..8866029 100644
--- a/libs/hwui/DisplayListRenderer.cpp
+++ b/libs/hwui/DisplayListRenderer.cpp
@@ -505,17 +505,12 @@
 
 void DisplayListRenderer::addDrawOp(DrawOp* op) {
     Rect localBounds;
-    if (mDrawModifiers.mHasShadow) {
-        op->state.mDrawModifiers = mDrawModifiers;
-    }
-    if (op->getLocalBounds(localBounds)) {
+    if (op->getLocalBounds(mDrawModifiers, localBounds)) {
         bool rejected = quickRejectNoScissor(localBounds.left, localBounds.top,
                 localBounds.right, localBounds.bottom);
         op->setQuickRejected(rejected);
     }
-    if (mDrawModifiers.mHasShadow) {
-        op->state.mDrawModifiers.reset();
-    }
+
     mHasDrawOps = true;
     addOpInternal(op);
 }
diff --git a/libs/hwui/OpenGLRenderer.h b/libs/hwui/OpenGLRenderer.h
index 54f6d76..f74df97 100644
--- a/libs/hwui/OpenGLRenderer.h
+++ b/libs/hwui/OpenGLRenderer.h
@@ -98,24 +98,11 @@
     kClipSide_ConservativeFull = 0x1F
 };
 
-struct DeferredDisplayState {
-    // global op bounds, mapped by mMatrix to be in screen space coordinates, clipped
-    Rect mBounds;
-
-    // the below are set and used by the OpenGLRenderer at record and deferred playback
-    bool mClipValid;
-    Rect mClip;
-    int mClipSideFlags; // specifies which sides of the bounds are clipped, unclipped if cleared
-    bool mClipped;
-    mat4 mMatrix;
-    DrawModifiers mDrawModifiers;
-    float mAlpha;
-};
-
 ///////////////////////////////////////////////////////////////////////////////
 // Renderer
 ///////////////////////////////////////////////////////////////////////////////
 
+class DeferredDisplayState;
 class DisplayList;
 class TextSetupFunctor;
 class VertexBuffer;
@@ -423,7 +410,7 @@
         return getXfermode(paint->getXfermode());
     }
 
-    static inline int getAlphaDirect(SkPaint* paint) {
+    static inline int getAlphaDirect(const SkPaint* paint) {
         if (!paint) return 255;
         return paint->getAlpha();
     }