3d view system!

True 3d transformations are now supported by DisplayLists and the
renderer, initially with the translationZ property on view.

Renderer operations used directly by DisplayList (formerly,
clip/save/restore/saveLayer) are now more simply managed by allocating
them temporarily on the handler's allocator, which exists for a single
frame. This is much simpler than continuing to expand the pool of
pre-allocated DisplayListOps now that more operations are called
directly by DisplayList, especially with z ordered drawing.

Still TODO:
-APIs for camera positioning, shadows
-Make Z apis public, and expose through XML
-Make invalidation / input 3d aware

Change-Id: I95fe6fa03f9b6ddd34a7e0c6ec8dd9fe47c6c6eb
diff --git a/libs/hwui/Debug.h b/libs/hwui/Debug.h
index 786f12a..79afad1 100644
--- a/libs/hwui/Debug.h
+++ b/libs/hwui/Debug.h
@@ -85,6 +85,9 @@
 // Turn on to highlight drawing batches and merged batches with different colors
 #define DEBUG_MERGE_BEHAVIOR 0
 
+// Turn on to enable 3D support in the renderer (off by default until API for control exists)
+#define DEBUG_ENABLE_3D 0
+
 #if DEBUG_INIT
     #define INIT_LOGD(...) ALOGD(__VA_ARGS__)
 #else
diff --git a/libs/hwui/DeferredDisplayList.h b/libs/hwui/DeferredDisplayList.h
index 3dcbd0b..fca3588 100644
--- a/libs/hwui/DeferredDisplayList.h
+++ b/libs/hwui/DeferredDisplayList.h
@@ -79,13 +79,13 @@
 };
 
 class DeferredDisplayList {
+    friend class DeferStateStruct; // used to give access to allocator
 public:
     DeferredDisplayList(const Rect& bounds, bool avoidOverdraw = true) :
             mBounds(bounds), mAvoidOverdraw(avoidOverdraw) {
         clear();
     }
     ~DeferredDisplayList() { clear(); }
-    void reset(const Rect& bounds) { mBounds.set(bounds); }
 
     enum OpBatchId {
         kOpBatch_None = 0, // Don't batch
@@ -120,6 +120,8 @@
     void addDrawOp(OpenGLRenderer& renderer, DrawOp* op);
 
 private:
+    DeferredDisplayList(const DeferredDisplayList& other); // disallow copy
+
     DeferredDisplayState* createState() {
         return new (mAllocator) DeferredDisplayState();
     }
diff --git a/libs/hwui/DisplayList.cpp b/libs/hwui/DisplayList.cpp
index a3e4bb4..c616cd8 100644
--- a/libs/hwui/DisplayList.cpp
+++ b/libs/hwui/DisplayList.cpp
@@ -14,8 +14,12 @@
  * limitations under the License.
  */
 
+#define ATRACE_TAG ATRACE_TAG_VIEW
+
 #include <SkCanvas.h>
 
+#include <utils/Trace.h>
+
 #include "Debug.h"
 #include "DisplayList.h"
 #include "DisplayListOp.h"
@@ -65,11 +69,6 @@
 void DisplayList::clearResources() {
     mDisplayListData = NULL;
 
-    mClipRectOp = NULL;
-    mSaveLayerOp = NULL;
-    mSaveOp = NULL;
-    mRestoreToCountOp = NULL;
-
     delete mTransformMatrix;
     delete mTransformCamera;
     delete mTransformMatrix3D;
@@ -168,17 +167,6 @@
         return;
     }
 
-    // allocate reusable ops for state-deferral
-    LinearAllocator& alloc = mDisplayListData->allocator;
-    mClipRectOp = new (alloc) ClipRectOp();
-    mSaveLayerOp = new (alloc) SaveLayerOp();
-    mSaveOp = new (alloc) SaveOp();
-    mRestoreToCountOp = new (alloc) RestoreToCountOp();
-    if (CC_UNLIKELY(!mSaveOp)) { // temporary debug logging
-        ALOGW("Error: %s's SaveOp not allocated, size %d", getName(), mSize);
-        CRASH();
-    }
-
     mFunctorCount = recorder.getFunctorCount();
 
     Caches& caches = Caches::getInstance();
@@ -253,6 +241,7 @@
     mHasOverlappingRendering = true;
     mTranslationX = 0;
     mTranslationY = 0;
+    mTranslationZ = 0;
     mRotation = 0;
     mRotationX = 0;
     mRotationY= 0;
@@ -269,6 +258,7 @@
     mHeight = 0;
     mPivotExplicitlySet = false;
     mCaching = false;
+    mIs3dRoot = true; // TODO: setter, java side impl
 }
 
 size_t DisplayList::getSize() {
@@ -320,27 +310,38 @@
                     mPivotY = mPrevHeight / 2.0f;
                 }
             }
-            if ((mMatrixFlags & ROTATION_3D) == 0) {
+            if (!DEBUG_ENABLE_3D && (mMatrixFlags & ROTATION_3D) == 0) {
                 mTransformMatrix->setTranslate(mTranslationX, mTranslationY);
                 mTransformMatrix->preRotate(mRotation, mPivotX, mPivotY);
                 mTransformMatrix->preScale(mScaleX, mScaleY, mPivotX, mPivotY);
             } else {
-                if (!mTransformCamera) {
-                    mTransformCamera = new Sk3DView();
-                    mTransformMatrix3D = new SkMatrix();
+                if (DEBUG_ENABLE_3D) {
+                    mTransform.loadTranslate(mPivotX + mTranslationX, mPivotY + mTranslationY,
+                            mTranslationZ);
+                    mTransform.rotate(mRotationX, 1, 0, 0);
+                    mTransform.rotate(mRotationY, 0, 1, 0);
+                    mTransform.rotate(mRotation, 0, 0, 1);
+                    mTransform.scale(mScaleX, mScaleY, 1);
+                    mTransform.translate(-mPivotX, -mPivotY);
+                } else {
+                    /* TODO: support this old transform approach, based on API level */
+                    if (!mTransformCamera) {
+                        mTransformCamera = new Sk3DView();
+                        mTransformMatrix3D = new SkMatrix();
+                    }
+                    mTransformMatrix->reset();
+                    mTransformCamera->save();
+                    mTransformMatrix->preScale(mScaleX, mScaleY, mPivotX, mPivotY);
+                    mTransformCamera->rotateX(mRotationX);
+                    mTransformCamera->rotateY(mRotationY);
+                    mTransformCamera->rotateZ(-mRotation);
+                    mTransformCamera->getMatrix(mTransformMatrix3D);
+                    mTransformMatrix3D->preTranslate(-mPivotX, -mPivotY);
+                    mTransformMatrix3D->postTranslate(mPivotX + mTranslationX,
+                            mPivotY + mTranslationY);
+                    mTransformMatrix->postConcat(*mTransformMatrix3D);
+                    mTransformCamera->restore();
                 }
-                mTransformMatrix->reset();
-                mTransformCamera->save();
-                mTransformMatrix->preScale(mScaleX, mScaleY, mPivotX, mPivotY);
-                mTransformCamera->rotateX(mRotationX);
-                mTransformCamera->rotateY(mRotationY);
-                mTransformCamera->rotateZ(-mRotation);
-                mTransformCamera->getMatrix(mTransformMatrix3D);
-                mTransformMatrix3D->preTranslate(-mPivotX, -mPivotY);
-                mTransformMatrix3D->postTranslate(mPivotX + mTranslationX,
-                        mPivotY + mTranslationY);
-                mTransformMatrix->postConcat(*mTransformMatrix3D);
-                mTransformCamera->restore();
             }
         }
         mMatrixDirty = false;
@@ -417,8 +418,13 @@
     if (mMatrixFlags != 0) {
         if (mMatrixFlags == TRANSLATION) {
             renderer.translate(mTranslationX, mTranslationY);
+            renderer.translateZ(mTranslationZ);
         } else {
+#if DEBUG_ENABLE_3D
+            renderer.concatMatrix(mTransform);
+#else
             renderer.concatMatrix(mTransformMatrix);
+#endif
         }
     }
     bool clipToBoundsNeeded = mCaching ? false : mClipToBounds;
@@ -436,14 +442,107 @@
                 saveFlags |= SkCanvas::kClipToLayer_SaveFlag;
                 clipToBoundsNeeded = false; // clipping done by saveLayer
             }
-            handler(mSaveLayerOp->reinit(0, 0, mRight - mLeft, mBottom - mTop,
-                    mAlpha * 255, SkXfermode::kSrcOver_Mode, saveFlags), PROPERTY_SAVECOUNT,
-                    mClipToBounds);
+
+            SaveLayerOp* op = new (handler.allocator()) SaveLayerOp(
+                    0, 0, mRight - mLeft, mBottom - mTop,
+                    mAlpha * 255, SkXfermode::kSrcOver_Mode, saveFlags);
+            handler(op, PROPERTY_SAVECOUNT, mClipToBounds);
         }
     }
     if (clipToBoundsNeeded) {
-        handler(mClipRectOp->reinit(0, 0, mRight - mLeft, mBottom - mTop, SkRegion::kIntersect_Op),
-                PROPERTY_SAVECOUNT, mClipToBounds);
+        ClipRectOp* op = new (handler.allocator()) ClipRectOp(0, 0,
+                mRight - mLeft, mBottom - mTop, SkRegion::kIntersect_Op);
+        handler(op, PROPERTY_SAVECOUNT, mClipToBounds);
+    }
+}
+
+/**
+ * Apply property-based transformations to input matrix
+ */
+void DisplayList::applyViewPropertyTransforms(mat4& matrix) {
+    if (mLeft != 0 || mTop != 0) {
+        matrix.translate(mLeft, mTop);
+    }
+    if (mStaticMatrix) {
+        mat4 stat(*mStaticMatrix);
+        matrix.multiply(stat);
+    } else if (mAnimationMatrix) {
+        mat4 anim(*mAnimationMatrix);
+        matrix.multiply(anim);
+    }
+    if (mMatrixFlags != 0) {
+        if (mMatrixFlags == TRANSLATION) {
+            matrix.translate(mTranslationX, mTranslationY, mTranslationZ);
+        } else {
+#if DEBUG_ENABLE_3D
+            matrix.multiply(mTransform);
+#else
+            mat4 temp(*mTransformMatrix);
+            matrix.multiply(temp);
+#endif
+        }
+    }
+}
+
+/**
+ * Organizes the DisplayList hierarchy to prepare for Z-based draw order.
+ *
+ * This should be called before a call to defer() or drawDisplayList()
+ *
+ * Each DisplayList that serves as a 3d root builds its list of composited children,
+ * which are flagged to not draw in the standard draw loop.
+ */
+void DisplayList::computeOrdering() {
+    ATRACE_CALL();
+    mat4::identity();
+    for (unsigned int i = 0; i < mDisplayListData->children.size(); i++) {
+        DrawDisplayListOp* childOp = mDisplayListData->children[i];
+        childOp->mDisplayList->computeOrderingImpl(childOp, &m3dNodes, &mat4::identity());
+    }
+}
+
+void DisplayList::computeOrderingImpl(
+        DrawDisplayListOp* opState,
+        KeyedVector<float, Vector<DrawDisplayListOp*> >* compositedChildrenOf3dRoot,
+        const mat4* transformFrom3dRoot) {
+    // TODO: should avoid this calculation in most cases
+    opState->mTransformFrom3dRoot.load(*transformFrom3dRoot);
+    opState->mTransformFrom3dRoot.multiply(opState->mTransformFromParent);
+
+    if (mTranslationZ != 0.0f) { // TODO: other signals, such as custom 4x4 matrix
+        // composited layer, insert into current 3d root and flag for out of order draw
+        opState->mSkipInOrderDraw = true;
+
+        Vector3 pivot(mPivotX, mPivotY, 0.0f);
+        mat4 totalTransform(opState->mTransformFrom3dRoot);
+        applyViewPropertyTransforms(totalTransform);
+        totalTransform.mapPoint3d(pivot);
+        const float key = pivot.z;
+
+        if (compositedChildrenOf3dRoot->indexOfKey(key) < 0) {
+            compositedChildrenOf3dRoot->add(key, Vector<DrawDisplayListOp*>());
+        }
+        compositedChildrenOf3dRoot->editValueFor(key).push(opState);
+    } else {
+        // standard in order draw
+        opState->mSkipInOrderDraw = false;
+    }
+
+    m3dNodes.clear();
+    if (mIs3dRoot) {
+        // create a new 3d space for children by separating their ordering
+        compositedChildrenOf3dRoot = &m3dNodes;
+        transformFrom3dRoot = &mat4::identity();
+    } else {
+        transformFrom3dRoot = &(opState->mTransformFrom3dRoot);
+    }
+
+    if (mDisplayListData->children.size() > 0) {
+        for (unsigned int i = 0; i < mDisplayListData->children.size(); i++) {
+            DrawDisplayListOp* childOp = mDisplayListData->children[i];
+            childOp->mDisplayList->computeOrderingImpl(childOp,
+                    compositedChildrenOf3dRoot, transformFrom3dRoot);
+        }
     }
 }
 
@@ -454,6 +553,8 @@
     inline void operator()(DisplayListOp* operation, int saveCount, bool clipToBounds) {
         operation->defer(mDeferStruct, saveCount, mLevel, clipToBounds);
     }
+    inline LinearAllocator& allocator() { return *(mDeferStruct.mAllocator); }
+
 private:
     DeferStateStruct& mDeferStruct;
     const int mLevel;
@@ -474,6 +575,8 @@
 #endif
         operation->replay(mReplayStruct, saveCount, mLevel, clipToBounds);
     }
+    inline LinearAllocator& allocator() { return *(mReplayStruct.mAllocator); }
+
 private:
     ReplayStateStruct& mReplayStruct;
     const int mLevel;
@@ -490,11 +593,60 @@
             replayStruct.mDrawGlStatus);
 }
 
+template <class T>
+void DisplayList::iterate3dChildren(ChildrenSelectMode mode, OpenGLRenderer& renderer,
+        T& handler, const int level) {
+    if (m3dNodes.size() == 0 ||
+            (mode == kNegativeZChildren && m3dNodes.keyAt(0) > 0.0f) ||
+            (mode == kPositiveZChildren && m3dNodes.keyAt(m3dNodes.size() - 1) < 0.0f)) {
+        // nothing to draw
+        return;
+    }
+
+    LinearAllocator& alloc = handler.allocator();
+    ClipRectOp* op = new (alloc) ClipRectOp(0, 0, mWidth, mHeight,
+            SkRegion::kIntersect_Op); // clip to 3d root bounds for now
+    handler(op, PROPERTY_SAVECOUNT, mClipToBounds);
+    int rootRestoreTo = renderer.save(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag);
+
+    for (int i = 0; i < m3dNodes.size(); i++) {
+        const float zValue = m3dNodes.keyAt(i);
+
+        if (mode == kPositiveZChildren && zValue < 0.0f) continue;
+        if (mode == kNegativeZChildren && zValue > 0.0f) break;
+
+        const Vector<DrawDisplayListOp*>& nodesAtZ = m3dNodes[i];
+        for (int j = 0; j < nodesAtZ.size(); j++) {
+            DrawDisplayListOp* op = nodesAtZ[j];
+            if (mode == kPositiveZChildren) {
+                /* draw shadow on renderer with parent matrix applied, passing in the child's total matrix
+                 *
+                 * TODO:
+                 * -determine and pass background shape (and possibly drawable alpha)
+                 * -view must opt-in to shadows
+                 * -consider shadows for other content
+                 */
+                mat4 shadowMatrix(op->mTransformFrom3dRoot);
+                op->mDisplayList->applyViewPropertyTransforms(shadowMatrix);
+                DisplayListOp* shadowOp  = new (alloc) DrawShadowOp(shadowMatrix, op->mDisplayList->mAlpha,
+                        op->mDisplayList->getWidth(), op->mDisplayList->getHeight());
+                handler(shadowOp, PROPERTY_SAVECOUNT, mClipToBounds);
+            }
+
+            renderer.concatMatrix(op->mTransformFrom3dRoot);
+            op->mSkipInOrderDraw = false; // this is horrible, I'm so sorry everyone
+            handler(op, renderer.getSaveCount() - 1, mClipToBounds);
+            op->mSkipInOrderDraw = true;
+        }
+    }
+    handler(new (alloc) RestoreToCountOp(rootRestoreTo), PROPERTY_SAVECOUNT, mClipToBounds);
+}
+
 /**
  * This function serves both defer and replay modes, and will organize the displayList's component
  * operations for a single frame:
  *
- * Every 'simple' operation that affects just the matrix and alpha (or other factors of
+ * Every 'simple' state operation that affects just the matrix and alpha (or other factors of
  * DeferredDisplayState) may be issued directly to the renderer, but complex operations (with custom
  * defer logic) and operations in displayListOps are issued through the 'handler' which handles the
  * defer vs replay logic, per operation
@@ -517,8 +669,9 @@
             clipRect->right, clipRect->bottom);
 #endif
 
+    LinearAllocator& alloc = handler.allocator();
     int restoreTo = renderer.getSaveCount();
-    handler(mSaveOp->reinit(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag),
+    handler(new (alloc) SaveOp(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag),
             PROPERTY_SAVECOUNT, mClipToBounds);
 
     DISPLAY_LIST_LOGD("%*sSave %d %d", (level + 1) * 2, "",
@@ -526,30 +679,31 @@
 
     setViewProperties<T>(renderer, handler, level + 1);
 
-    if (mClipToBounds && renderer.quickRejectConservative(0, 0, mWidth, mHeight)) {
-        DISPLAY_LIST_LOGD("%*sRestoreToCount %d", (level + 1) * 2, "", restoreTo);
-        handler(mRestoreToCountOp->reinit(restoreTo), PROPERTY_SAVECOUNT, mClipToBounds);
-        renderer.restoreToCount(restoreTo);
-        renderer.setOverrideLayerAlpha(1.0f);
-        return;
-    }
+    bool quickRejected = mClipToBounds && renderer.quickRejectConservative(0, 0, mWidth, mHeight);
+    if (!quickRejected) {
+        // for 3d root, draw children with negative z values
+        iterate3dChildren(kNegativeZChildren, renderer, handler, level);
 
-    DisplayListLogBuffer& logBuffer = DisplayListLogBuffer::getInstance();
-    int saveCount = renderer.getSaveCount() - 1;
-    for (unsigned int i = 0; i < mDisplayListData->displayListOps.size(); i++) {
-        DisplayListOp *op = mDisplayListData->displayListOps[i];
+        DisplayListLogBuffer& logBuffer = DisplayListLogBuffer::getInstance();
+        const int saveCountOffset = renderer.getSaveCount() - 1;
+        for (unsigned int i = 0; i < mDisplayListData->displayListOps.size(); i++) {
+            DisplayListOp *op = mDisplayListData->displayListOps[i];
 
 #if DEBUG_DISPLAY_LIST
-        op->output(level + 1);
+            op->output(level + 1);
 #endif
 
-        logBuffer.writeCommand(level, op->name());
-        handler(op, saveCount, mClipToBounds);
+            logBuffer.writeCommand(level, op->name());
+            handler(op, saveCountOffset, mClipToBounds);
+        }
+
+        // for 3d root, draw children with positive z values
+        iterate3dChildren(kPositiveZChildren, renderer, handler, level);
     }
 
     DISPLAY_LIST_LOGD("%*sRestoreToCount %d", (level + 1) * 2, "", restoreTo);
-    handler(mRestoreToCountOp->reinit(restoreTo), PROPERTY_SAVECOUNT, mClipToBounds);
-    renderer.restoreToCount(restoreTo);
+    handler(new (alloc) RestoreToCountOp(restoreTo),
+            PROPERTY_SAVECOUNT, mClipToBounds);
     renderer.setOverrideLayerAlpha(1.0f);
 }
 
diff --git a/libs/hwui/DisplayList.h b/libs/hwui/DisplayList.h
index 1cd5f1c..983cc02 100644
--- a/libs/hwui/DisplayList.h
+++ b/libs/hwui/DisplayList.h
@@ -26,6 +26,7 @@
 
 #include <private/hwui/DrawGlInfo.h>
 
+#include <utils/KeyedVector.h>
 #include <utils/LinearAllocator.h>
 #include <utils/RefBase.h>
 #include <utils/SortedVector.h>
@@ -37,6 +38,8 @@
 #include <androidfw/ResourceTypes.h>
 
 #include "Debug.h"
+#include "Matrix.h"
+#include "DeferredDisplayList.h"
 
 #define TRANSLATION 0x0001
 #define ROTATION    0x0002
@@ -65,36 +68,70 @@
 class SaveLayerOp;
 class SaveOp;
 class RestoreToCountOp;
+class DrawDisplayListOp;
 
-struct DeferStateStruct {
-    DeferStateStruct(DeferredDisplayList& deferredList, OpenGLRenderer& renderer, int replayFlags)
-            : mDeferredList(deferredList), mRenderer(renderer), mReplayFlags(replayFlags) {}
-    DeferredDisplayList& mDeferredList;
+/**
+ * Holds data used in the playback a tree of DisplayLists.
+ */
+class PlaybackStateStruct {
+protected:
+    PlaybackStateStruct(OpenGLRenderer& renderer, int replayFlags, LinearAllocator* allocator)
+            : mRenderer(renderer), mReplayFlags(replayFlags), mAllocator(allocator){}
+
+public:
     OpenGLRenderer& mRenderer;
     const int mReplayFlags;
+
+    // Allocator with the lifetime of a single frame.
+    // replay uses an Allocator owned by the struct, while defer shares the DeferredDisplayList's Allocator
+    LinearAllocator * const mAllocator;
 };
 
-struct ReplayStateStruct {
+class DeferStateStruct : public PlaybackStateStruct {
+public:
+    DeferStateStruct(DeferredDisplayList& deferredList, OpenGLRenderer& renderer, int replayFlags)
+            : PlaybackStateStruct(renderer, replayFlags, &(deferredList.mAllocator)), mDeferredList(deferredList) {}
+
+    DeferredDisplayList& mDeferredList;
+};
+
+class ReplayStateStruct : public PlaybackStateStruct {
+public:
     ReplayStateStruct(OpenGLRenderer& renderer, Rect& dirty, int replayFlags)
-            : mRenderer(renderer), mDirty(dirty), mReplayFlags(replayFlags),
-            mDrawGlStatus(DrawGlInfo::kStatusDone) {}
-    OpenGLRenderer& mRenderer;
+            : PlaybackStateStruct(renderer, replayFlags, &mReplayAllocator),
+            mDirty(dirty), mDrawGlStatus(DrawGlInfo::kStatusDone) {}
+
     Rect& mDirty;
-    const int mReplayFlags;
     status_t mDrawGlStatus;
+    LinearAllocator mReplayAllocator;
 };
 
 /**
- * Refcounted structure that holds data used in display list stream
+ * Refcounted structure that holds the list of commands used in display list stream.
  */
 class DisplayListData : public LightRefBase<DisplayListData> {
 public:
+    // allocator into which all ops were allocated
     LinearAllocator allocator;
+
+    // pointers to all ops within display list, pointing into allocator data
     Vector<DisplayListOp*> displayListOps;
+
+    // list of children display lists for quick, non-drawing traversal
+    Vector<DrawDisplayListOp*> children;
 };
 
 /**
- * Replays recorded drawing commands.
+ * Primary class for storing recorded canvas commands, as well as per-View/ViewGroup display properties.
+ *
+ * Recording of canvas commands is somewhat similar to SkPicture, except the canvas-recording
+ * functionality is split between DisplayListRenderer (which manages the recording), DisplayListData
+ * (which holds the actual data), and DisplayList (which holds properties and performs playback onto
+ * a renderer).
+ *
+ * Note that DisplayListData is swapped out from beneath an individual DisplayList when a view's
+ * recorded stream of canvas operations is refreshed. The DisplayList (and its properties) stay
+ * attached.
  */
 class DisplayList {
 public:
@@ -113,6 +150,7 @@
 
     void initFromDisplayListRenderer(const DisplayListRenderer& recorder, bool reusing = false);
 
+    void computeOrdering();
     void defer(DeferStateStruct& deferStruct, const int level);
     void replay(ReplayStateStruct& replayStruct, const int level);
 
@@ -188,12 +226,7 @@
     void setTranslationX(float translationX) {
         if (translationX != mTranslationX) {
             mTranslationX = translationX;
-            mMatrixDirty = true;
-            if (mTranslationX == 0.0f && mTranslationY == 0.0f) {
-                mMatrixFlags &= ~TRANSLATION;
-            } else {
-                mMatrixFlags |= TRANSLATION;
-            }
+            onTranslationUpdate();
         }
     }
 
@@ -204,12 +237,7 @@
     void setTranslationY(float translationY) {
         if (translationY != mTranslationY) {
             mTranslationY = translationY;
-            mMatrixDirty = true;
-            if (mTranslationX == 0.0f && mTranslationY == 0.0f) {
-                mMatrixFlags &= ~TRANSLATION;
-            } else {
-                mMatrixFlags |= TRANSLATION;
-            }
+            onTranslationUpdate();
         }
     }
 
@@ -217,6 +245,17 @@
         return mTranslationY;
     }
 
+    void setTranslationZ(float translationZ) {
+        if (translationZ != mTranslationZ) {
+            mTranslationZ = translationZ;
+            onTranslationUpdate();
+        }
+    }
+
+    float getTranslationZ() const {
+        return mTranslationZ;
+    }
+
     void setRotation(float rotation) {
         if (rotation != mRotation) {
             mRotation = rotation;
@@ -454,12 +493,36 @@
     }
 
 private:
+    enum ChildrenSelectMode {
+        kNegativeZChildren,
+        kPositiveZChildren
+    };
+
+    void onTranslationUpdate() {
+        mMatrixDirty = true;
+        if (mTranslationX == 0.0f && mTranslationY == 0.0f && mTranslationZ == 0.0f) {
+            mMatrixFlags &= ~TRANSLATION;
+        } else {
+            mMatrixFlags |= TRANSLATION;
+        }
+    }
+
     void outputViewProperties(const int level);
 
+    void applyViewPropertyTransforms(mat4& matrix);
+
+    void computeOrderingImpl(DrawDisplayListOp* opState,
+            KeyedVector<float, Vector<DrawDisplayListOp*> >* compositedChildrenOf3dRoot,
+            const mat4* transformFromRoot);
+
     template <class T>
     inline void setViewProperties(OpenGLRenderer& renderer, T& handler, const int level);
 
     template <class T>
+    inline void iterate3dChildren(ChildrenSelectMode mode, OpenGLRenderer& renderer,
+        T& handler, const int level);
+
+    template <class T>
     inline void iterate(OpenGLRenderer& renderer, T& handler, const int level);
 
     void init();
@@ -509,7 +572,7 @@
     bool mClipToBounds;
     float mAlpha;
     bool mHasOverlappingRendering;
-    float mTranslationX, mTranslationY;
+    float mTranslationX, mTranslationY, mTranslationZ;
     float mRotation, mRotationX, mRotationY;
     float mScaleX, mScaleY;
     float mPivotX, mPivotY;
@@ -526,23 +589,17 @@
     SkMatrix* mTransformMatrix3D;
     SkMatrix* mStaticMatrix;
     SkMatrix* mAnimationMatrix;
+    Matrix4 mTransform;
     bool mCaching;
+    bool mIs3dRoot;
+
 
     /**
-     * State operations - needed to defer displayList property operations (for example, when setting
-     * an alpha causes a SaveLayerAlpha to occur). These operations point into mDisplayListData's
-     * allocation, or null if uninitialized.
-     *
-     * These are initialized (via friend re-constructors) when a displayList is issued in either
-     * replay or deferred mode. If replaying, the ops are not used until the next frame. If
-     * deferring, the ops may be stored in the DeferredDisplayList to be played back a second time.
-     *
-     * They should be used at most once per frame (one call to 'iterate') to avoid overwriting data
+     * Draw time state - these properties are only set and used during rendering
      */
-    ClipRectOp* mClipRectOp;
-    SaveLayerOp* mSaveLayerOp;
-    SaveOp* mSaveOp;
-    RestoreToCountOp* mRestoreToCountOp;
+
+    // for 3d roots, contains a z sorted list of all children items
+    KeyedVector<float, Vector<DrawDisplayListOp*> > m3dNodes; // TODO: good data structure
 }; // class DisplayList
 
 }; // namespace uirenderer
diff --git a/libs/hwui/DisplayListOp.h b/libs/hwui/DisplayListOp.h
index 88077d4..1980b03 100644
--- a/libs/hwui/DisplayListOp.h
+++ b/libs/hwui/DisplayListOp.h
@@ -262,7 +262,6 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 class SaveOp : public StateOp {
-    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     SaveOp(int flags)
             : mFlags(flags) {}
@@ -295,7 +294,6 @@
 };
 
 class RestoreToCountOp : public StateOp {
-    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     RestoreToCountOp(int count)
             : mCount(count) {}
@@ -328,7 +326,6 @@
 };
 
 class SaveLayerOp : public StateOp {
-    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     SaveLayerOp(float left, float top, float right, float bottom,
             int alpha, SkXfermode::Mode mode, int flags)
@@ -524,7 +521,6 @@
 };
 
 class ClipRectOp : public ClipOp {
-    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     ClipRectOp(float left, float top, float right, float bottom, SkRegion::Op op)
             : ClipOp(op), mArea(left, top, right, bottom) {}
@@ -1100,7 +1096,7 @@
 class DrawColorOp : public DrawOp {
 public:
     DrawColorOp(int color, SkXfermode::Mode mode)
-            : DrawOp(0), mColor(color), mMode(mode) {};
+            : DrawOp(NULL), mColor(color), mMode(mode) {};
 
     virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty) {
         return renderer.drawColor(mColor, mMode);
@@ -1505,7 +1501,7 @@
 class DrawFunctorOp : public DrawOp {
 public:
     DrawFunctorOp(Functor* functor)
-            : DrawOp(0), mFunctor(functor) {}
+            : DrawOp(NULL), mFunctor(functor) {}
 
     virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty) {
         renderer.startMark("GL functor");
@@ -1525,20 +1521,21 @@
 };
 
 class DrawDisplayListOp : public DrawBoundedOp {
+    friend class DisplayList; // grant DisplayList access to info of child
 public:
-    DrawDisplayListOp(DisplayList* displayList, int flags)
+    DrawDisplayListOp(DisplayList* displayList, int flags, const mat4& transformFromParent)
             : DrawBoundedOp(0, 0, displayList->getWidth(), displayList->getHeight(), 0),
-            mDisplayList(displayList), mFlags(flags) {}
+            mDisplayList(displayList), mFlags(flags), mTransformFromParent(transformFromParent) {}
 
     virtual void defer(DeferStateStruct& deferStruct, int saveCount, int level,
             bool useQuickReject) {
-        if (mDisplayList && mDisplayList->isRenderable()) {
+        if (mDisplayList && mDisplayList->isRenderable() && !mSkipInOrderDraw) {
             mDisplayList->defer(deferStruct, level + 1);
         }
     }
     virtual void replay(ReplayStateStruct& replayStruct, int saveCount, int level,
             bool useQuickReject) {
-        if (mDisplayList && mDisplayList->isRenderable()) {
+        if (mDisplayList && mDisplayList->isRenderable() && !mSkipInOrderDraw) {
             mDisplayList->replay(replayStruct, level + 1);
         }
     }
@@ -1559,13 +1556,58 @@
 
 private:
     DisplayList* mDisplayList;
-    int mFlags;
+    const int mFlags;
+
+    ///////////////////////////
+    // Properties below are used by DisplayList::computeOrderingImpl() and iterate()
+    ///////////////////////////
+    /**
+     * Records transform vs parent, used for computing total transform without rerunning DL contents
+     */
+    const mat4 mTransformFromParent;
+
+    /**
+     * Holds the transformation between the 3d root ViewGroup and this DisplayList drawing
+     * instance. Represents any translations / transformations done within the drawing of the 3d
+     * root ViewGroup's draw, before the draw of the View represented by this DisplayList draw
+     * instance.
+     *
+     * Note: doesn't include any transformation recorded within the DisplayList and its properties.
+     */
+    mat4 mTransformFrom3dRoot;
+    bool mSkipInOrderDraw;
+};
+
+/**
+ * Not a canvas operation, used only by 3d / z ordering logic in DisplayList::iterate()
+ */
+class DrawShadowOp : public DrawOp {
+public:
+    DrawShadowOp(const mat4& casterTransform, float casterAlpha, float width, float height)
+            : DrawOp(NULL), mCasterTransform(casterTransform), mCasterAlpha(casterAlpha),
+            mWidth(width), mHeight(height) {}
+
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty) {
+        return renderer.drawShadow(mCasterTransform, mCasterAlpha, mWidth, mHeight);
+    }
+
+    virtual void output(int level, uint32_t logFlags) const {
+        OP_LOG("DrawShadow of width %.2f, height %.2f", mWidth, mHeight);
+    }
+
+    virtual const char* name() { return "DrawShadow"; }
+
+private:
+    const mat4 mCasterTransform;
+    const float mCasterAlpha;
+    const float mWidth;
+    const float mHeight;
 };
 
 class DrawLayerOp : public DrawOp {
 public:
     DrawLayerOp(Layer* layer, float x, float y)
-            : DrawOp(0), mLayer(layer), mX(x), mY(y) {}
+            : DrawOp(NULL), mLayer(layer), mX(x), mY(y) {}
 
     virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty) {
         return renderer.drawLayer(mLayer, mX, mY);
diff --git a/libs/hwui/DisplayListRenderer.cpp b/libs/hwui/DisplayListRenderer.cpp
index d024923..19a027d 100644
--- a/libs/hwui/DisplayListRenderer.cpp
+++ b/libs/hwui/DisplayListRenderer.cpp
@@ -111,6 +111,7 @@
     } else {
         displayList->initFromDisplayListRenderer(*this, true);
     }
+    // TODO: should just avoid setting the DisplayList's DisplayListData
     displayList->setRenderable(mHasDrawOps);
     return displayList;
 }
@@ -120,7 +121,8 @@
 }
 
 void DisplayListRenderer::setViewport(int width, int height) {
-    mOrthoMatrix.loadOrtho(0, width, height, 0, -1, 1);
+    // TODO: DisplayListRenderer shouldn't have a projection matrix, as it should never be used
+    mViewProjMatrix.loadOrtho(0, width, height, 0, -1, 1);
 
     mWidth = width;
     mHeight = height;
@@ -248,7 +250,10 @@
     //       resources cache, but we rely on the caller (UI toolkit) to
     //       do the right thing for now
 
-    addDrawOp(new (alloc()) DrawDisplayListOp(displayList, flags));
+    DrawDisplayListOp* op = new (alloc()) DrawDisplayListOp(displayList, flags, currentTransform());
+    addDrawOp(op);
+    mDisplayListData->children.push(op);
+
     return DrawGlInfo::kStatusDone;
 }
 
diff --git a/libs/hwui/DisplayListRenderer.h b/libs/hwui/DisplayListRenderer.h
index d233150..7269378 100644
--- a/libs/hwui/DisplayListRenderer.h
+++ b/libs/hwui/DisplayListRenderer.h
@@ -328,6 +328,7 @@
         return patch;
     }
 
+    // TODO: move these to DisplayListData
     Vector<SkBitmap*> mBitmapResources;
     Vector<SkBitmap*> mOwnedBitmapResources;
     Vector<SkiaColorFilter*> mFilterResources;
diff --git a/libs/hwui/Layer.cpp b/libs/hwui/Layer.cpp
index bd371a3..742ffd47 100644
--- a/libs/hwui/Layer.cpp
+++ b/libs/hwui/Layer.cpp
@@ -194,11 +194,9 @@
         dirtyRect.set(0, 0, width, height);
     }
 
-    if (deferredList) {
-        deferredList->reset(dirtyRect);
-    } else {
-        deferredList = new DeferredDisplayList(dirtyRect);
-    }
+    delete deferredList;
+    deferredList = new DeferredDisplayList(dirtyRect);
+
     DeferStateStruct deferredState(*deferredList, *renderer,
             DisplayList::kReplayFlag_ClipChildren);
 
@@ -206,6 +204,7 @@
     renderer->setupFrameState(dirtyRect.left, dirtyRect.top,
             dirtyRect.right, dirtyRect.bottom, !isBlend());
 
+    displayList->computeOrdering();
     displayList->defer(deferredState, 0);
 
     deferredUpdateScheduled = false;
diff --git a/libs/hwui/Matrix.cpp b/libs/hwui/Matrix.cpp
index ba22071..4f5cd26 100644
--- a/libs/hwui/Matrix.cpp
+++ b/libs/hwui/Matrix.cpp
@@ -89,8 +89,9 @@
         float m01 = data[kSkewX];
         float m10 = data[kSkewY];
         float m11 = data[kScaleY];
+        float m32 = data[kTranslateZ];
 
-        if (m01 != 0.0f || m10 != 0.0f) {
+        if (m01 != 0.0f || m10 != 0.0f || m32 != 0.0f) {
             mType |= kTypeAffine;
         }
 
@@ -131,11 +132,13 @@
 }
 
 bool Matrix4::isPureTranslate() const {
-    return getGeometryType() <= kTypeTranslate;
+    // NOTE: temporary hack to workaround ignoreTransform behavior with Z values
+    // TODO: separate this into isPure2dTranslate vs isPure3dTranslate
+    return getGeometryType() <= kTypeTranslate && (data[kTranslateZ] == 0.0f);
 }
 
 bool Matrix4::isSimple() const {
-    return getGeometryType() <= (kTypeScale | kTypeTranslate);
+    return getGeometryType() <= (kTypeScale | kTypeTranslate) && (data[kTranslateZ] == 0.0f);
 }
 
 bool Matrix4::isIdentity() const {
@@ -369,6 +372,84 @@
     mType = kTypeUnknown;
 }
 
+// translated from android.opengl.Matrix#frustumM()
+void Matrix4::loadFrustum(float left, float top, float right, float bottom, float near, float far) {
+    float r_width  = 1.0f / (right - left);
+    float r_height = 1.0f / (top - bottom);
+    float r_depth  = 1.0f / (near - far);
+    float x = 2.0f * (near * r_width);
+    float y = 2.0f * (near * r_height);
+    float A = (right + left) * r_width;
+    float B = (top + bottom) * r_height;
+    float C = (far + near) * r_depth;
+    float D = 2.0f * (far * near * r_depth);
+
+    memset(&data, 0, sizeof(data));
+    mType = kTypeUnknown;
+
+    data[kScaleX] = x;
+    data[kScaleY] = y;
+    data[8] = A;
+    data[9] = B;
+    data[kScaleZ] = C;
+    data[kTranslateZ] = D;
+    data[11] = -1.0f;
+}
+
+// translated from android.opengl.Matrix#setLookAtM()
+void Matrix4::loadLookAt(float eyeX, float eyeY, float eyeZ,
+        float centerX, float centerY, float centerZ,
+        float upX, float upY, float upZ) {
+    float fx = centerX - eyeX;
+    float fy = centerY - eyeY;
+    float fz = centerZ - eyeZ;
+
+    // Normalize f
+    float rlf = 1.0f / sqrt(fx*fx + fy*fy + fz*fz);
+    fx *= rlf;
+    fy *= rlf;
+    fz *= rlf;
+
+    // compute s = f x up (x means "cross product")
+    float sx = fy * upZ - fz * upY;
+    float sy = fz * upX - fx * upZ;
+    float sz = fx * upY - fy * upX;
+
+    // and normalize s
+    float rls = 1.0f / sqrt(sx*sx + sy*sy + sz*sz);
+    sx *= rls;
+    sy *= rls;
+    sz *= rls;
+
+    // compute u = s x f
+    float ux = sy * fz - sz * fy;
+    float uy = sz * fx - sx * fz;
+    float uz = sx * fy - sy * fx;
+
+    mType = kTypeUnknown;
+    data[0] = sx;
+    data[1] = ux;
+    data[2] = -fx;
+    data[3] = 0.0f;
+
+    data[4] = sy;
+    data[5] = uy;
+    data[6] = -fy;
+    data[7] = 0.0f;
+
+    data[8] = sz;
+    data[9] = uz;
+    data[10] = -fz;
+    data[11] = 0.0f;
+
+    data[12] = 0.0f;
+    data[13] = 0.0f;
+    data[14] = 0.0f;
+    data[15] = 1.0f;
+
+    translate(-eyeX, -eyeY, -eyeZ);
+}
+
 void Matrix4::loadOrtho(float left, float right, float bottom, float top, float near, float far) {
     loadIdentity();
 
@@ -382,6 +463,14 @@
     mType = kTypeTranslate | kTypeScale | kTypeRectToRect;
 }
 
+void Matrix4::mapPoint3d(Vector3& vec) const {
+    //TODO: optimize simple case
+    Vector3 orig(vec);
+    vec.x = orig.x * data[kScaleX] + orig.y * data[kSkewX] + orig.z * data[8] + data[kTranslateX];
+    vec.y = orig.x * data[kSkewY] + orig.y * data[kScaleY] + orig.z * data[9] + data[kTranslateY];
+    vec.z = orig.x * data[2] + orig.y * data[6] + orig.z * data[kScaleZ] + data[kTranslateZ];
+}
+
 #define MUL_ADD_STORE(a, b, c) a = (a) * (b) + (c)
 
 void Matrix4::mapPoint(float& x, float& y) const {
diff --git a/libs/hwui/Matrix.h b/libs/hwui/Matrix.h
index b861ba4..00ca050 100644
--- a/libs/hwui/Matrix.h
+++ b/libs/hwui/Matrix.h
@@ -121,6 +121,10 @@
     void loadRotate(float angle);
     void loadRotate(float angle, float x, float y, float z);
     void loadMultiply(const Matrix4& u, const Matrix4& v);
+    void loadFrustum(float left, float top, float right, float bottom, float near, float far);
+    void loadLookAt(float eyeX, float eyeY, float eyeZ,
+            float centerX, float centerY, float centerZ,
+            float upX, float upY, float upZ);
 
     void loadOrtho(float left, float right, float bottom, float top, float near, float far);
 
@@ -134,17 +138,18 @@
 
     void multiply(float v);
 
-    void translate(float x, float y) {
+    void translate(float x, float y, float z = 0) {
         if ((getType() & sGeometryMask) <= kTypeTranslate) {
             data[kTranslateX] += x;
             data[kTranslateY] += y;
+            data[kTranslateZ] += z;
         } else {
             // Doing a translation will only affect the translate bit of the type
             // Save the type
             uint8_t type = mType;
 
             Matrix4 u;
-            u.loadTranslate(x, y, 0.0f);
+            u.loadTranslate(x, y, z);
             multiply(u);
 
             // Restore the type and fix the translate bit
@@ -190,8 +195,9 @@
     void copyTo(float* v) const;
     void copyTo(SkMatrix& v) const;
 
-    void mapRect(Rect& r) const;
-    void mapPoint(float& x, float& y) const;
+    void mapPoint3d(Vector3& vec) const;
+    void mapPoint(float& x, float& y) const; // 2d only
+    void mapRect(Rect& r) const; // 2d only
 
     float getTranslateX() const;
     float getTranslateY() const;
diff --git a/libs/hwui/OpenGLRenderer.cpp b/libs/hwui/OpenGLRenderer.cpp
index e256ec2..6599b20 100644
--- a/libs/hwui/OpenGLRenderer.cpp
+++ b/libs/hwui/OpenGLRenderer.cpp
@@ -180,7 +180,21 @@
 }
 
 void OpenGLRenderer::initViewport(int width, int height) {
-    mOrthoMatrix.loadOrtho(0, width, height, 0, -1, 1);
+    float dist = std::max(width, height) * 1.5;
+
+    if (DEBUG_ENABLE_3D) {
+        // TODO: make view proj app configurable
+        Matrix4 projection;
+        projection.loadFrustum(-width / 2, -height / 2, width / 2, height / 2, dist, 0);
+        Matrix4 view;
+        view.loadLookAt(0, 0, dist,
+                0, 0, 0,
+                0, 1, 0);
+        mViewProjMatrix.loadMultiply(projection, view);
+        mViewProjMatrix.translate(-width/2, -height/2);
+    } else {
+        mViewProjMatrix.loadOrtho(0, width, height, 0, -1, 1);
+    }
 
     mWidth = width;
     mHeight = height;
@@ -753,7 +767,7 @@
     if (restoreOrtho) {
         Rect& r = previous->viewport;
         glViewport(r.left, r.top, r.right, r.bottom);
-        mOrthoMatrix.load(current->orthoMatrix);
+        mViewProjMatrix.load(current->orthoMatrix);
     }
 
     mSaveCount--;
@@ -984,7 +998,7 @@
     mSnapshot->resetClip(clip.left, clip.top, clip.right, clip.bottom);
     mSnapshot->viewport.set(0.0f, 0.0f, bounds.getWidth(), bounds.getHeight());
     mSnapshot->height = bounds.getHeight();
-    mSnapshot->orthoMatrix.load(mOrthoMatrix);
+    mSnapshot->orthoMatrix.load(mViewProjMatrix);
 
     endTiling();
     debugOverdraw(false, false);
@@ -1013,7 +1027,9 @@
 
     // Change the ortho projection
     glViewport(0, 0, bounds.getWidth(), bounds.getHeight());
-    mOrthoMatrix.loadOrtho(0.0f, bounds.getWidth(), bounds.getHeight(), 0.0f, -1.0f, 1.0f);
+
+    // TODO: determine best way to support 3d drawing within HW layers
+    mViewProjMatrix.loadOrtho(0.0f, bounds.getWidth(), bounds.getHeight(), 0.0f, -1.0f, 1.0f);
 
     return true;
 }
@@ -1539,10 +1555,8 @@
 }
 
 void OpenGLRenderer::concatMatrix(SkMatrix* matrix) {
-    SkMatrix transform;
-    currentTransform().copyTo(transform);
-    transform.preConcat(*matrix);
-    currentTransform().load(transform);
+    mat4 transform(*matrix);
+    currentTransform().multiply(transform);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1927,10 +1941,10 @@
 
     bool dirty = right - left > 0.0f && bottom - top > 0.0f;
     if (!ignoreTransform) {
-        mCaches.currentProgram->set(mOrthoMatrix, mModelView, currentTransform(), offset);
+        mCaches.currentProgram->set(mViewProjMatrix, mModelView, currentTransform(), offset);
         if (dirty && mTrackDirtyRegions) dirtyLayer(left, top, right, bottom, currentTransform());
     } else {
-        mCaches.currentProgram->set(mOrthoMatrix, mModelView, mat4::identity(), offset);
+        mCaches.currentProgram->set(mViewProjMatrix, mModelView, mat4::identity(), offset);
         if (dirty && mTrackDirtyRegions) dirtyLayer(left, top, right, bottom);
     }
 }
@@ -2064,9 +2078,12 @@
 status_t OpenGLRenderer::drawDisplayList(DisplayList* displayList, Rect& dirty,
         int32_t replayFlags) {
     status_t status;
+
     // All the usual checks and setup operations (quickReject, setupDraw, etc.)
     // will be performed by the display list itself
     if (displayList && displayList->isRenderable()) {
+        // compute 3d ordering
+        displayList->computeOrdering();
         if (CC_UNLIKELY(mCaches.drawDeferDisabled)) {
             status = startFrame();
             ReplayStateStruct replayStruct(*this, dirty, replayFlags);
@@ -2082,7 +2099,7 @@
         flushLayers();
         status = startFrame();
 
-        return status | deferredList.flush(*this, dirty);
+        return deferredList.flush(*this, dirty) | status;
     }
 
     return DrawGlInfo::kStatusDone;
@@ -3372,6 +3389,34 @@
     return drawColorRects(rects, count, color, mode);
 }
 
+status_t OpenGLRenderer::drawShadow(const mat4& casterTransform, float casterAlpha,
+        float width, float height) {
+    if (mSnapshot->isIgnored()) return DrawGlInfo::kStatusDone;
+
+    // For now, always and scissor
+    // TODO: use quickReject
+    mCaches.enableScissor();
+
+    SkPaint paint;
+    paint.setColor(0x3f000000);
+    paint.setAntiAlias(true);
+    VertexBuffer vertexBuffer;
+    {
+        //TODO: populate vertex buffer with better shadow geometry.
+        Vector3 pivot(width/2, height/2, 0.0f);
+        casterTransform.mapPoint3d(pivot);
+
+        float zScaleFactor = 0.5 + 0.0005f * pivot.z;
+
+        SkPath path;
+        path.addRect(pivot.x - width * zScaleFactor, pivot.y - height * zScaleFactor,
+                pivot.x + width * zScaleFactor, pivot.y + height * zScaleFactor);
+        PathTessellator::tessellatePath(path, &paint, mSnapshot->transform, vertexBuffer);
+    }
+
+    return drawVertexBuffer(vertexBuffer, &paint);
+}
+
 status_t OpenGLRenderer::drawColorRects(const float* rects, int count, int color,
         SkXfermode::Mode mode, bool ignoreTransform, bool dirty, bool clip) {
     if (count == 0) {
diff --git a/libs/hwui/OpenGLRenderer.h b/libs/hwui/OpenGLRenderer.h
index cfc5931..185e247 100644
--- a/libs/hwui/OpenGLRenderer.h
+++ b/libs/hwui/OpenGLRenderer.h
@@ -265,6 +265,12 @@
     ANDROID_API void getMatrix(SkMatrix* matrix);
     virtual void setMatrix(SkMatrix* matrix);
     virtual void concatMatrix(SkMatrix* matrix);
+    virtual void concatMatrix(Matrix4& matrix) {
+        currentTransform().multiply(matrix);
+    }
+    void translateZ(float z) {
+        currentTransform().translate(0,0,z);
+    }
 
     ANDROID_API const Rect& getClipBounds();
 
@@ -315,6 +321,9 @@
             DrawOpMode drawOpMode = kDrawOpMode_Immediate);
     virtual status_t drawRects(const float* rects, int count, SkPaint* paint);
 
+    status_t drawShadow(const mat4& casterTransform, float casterAlpha,
+            float width, float height);
+
     virtual void resetShader();
     virtual void setupShader(SkiaShader* shader);
 
@@ -1070,8 +1079,8 @@
     // Dimensions of the drawing surface
     int mWidth, mHeight;
 
-    // Matrix used for ortho projection in shaders
-    mat4 mOrthoMatrix;
+    // Matrix used for view/projection in shaders
+    mat4 mViewProjMatrix;
 
     /**
      * Model-view matrix used to position/size objects
diff --git a/libs/hwui/Vector.h b/libs/hwui/Vector.h
index 497924e..5110272 100644
--- a/libs/hwui/Vector.h
+++ b/libs/hwui/Vector.h
@@ -107,6 +107,21 @@
     }
 }; // class Vector2
 
+class Vector3 {
+public:
+    float x;
+    float y;
+    float z;
+
+    Vector3() :
+        x(0.0f), y(0.0f), z(0.0f) {
+    }
+
+    Vector3(float px, float py, float pz) :
+        x(px), y(py), z(pz) {
+    }
+};
+
 ///////////////////////////////////////////////////////////////////////////////
 // Types
 ///////////////////////////////////////////////////////////////////////////////