Batch consecutive draw rects.

Review URL: http://codereview.appspot.com/4178057/

git-svn-id: http://skia.googlecode.com/svn/trunk@800 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index c11d02f..ad7f776 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -27,11 +27,18 @@
 

 #define DEFER_TEXT_RENDERING 1

 

+#define BATCH_RECT_TO_RECT (1 && !GR_STATIC_RECT_VB)

+

 static const size_t MAX_TEXTURE_CACHE_COUNT = 128;

 static const size_t MAX_TEXTURE_CACHE_BYTES = 8 * 1024 * 1024;

 

-static const uint32_t TEXT_POOL_VB_SIZE = 1 << 18; // enough to draw 4K untextured glyphs

-static const uint32_t NUM_TEXT_POOL_VBS = 4;

+static const size_t DRAW_BUFFER_VBPOOL_BUFFER_SIZE = 1 << 18;

+static const int DRAW_BUFFER_VBPOOL_PREALLOC_BUFFERS = 4;

+

+// We are currently only batching Text and drawRectToRect, both

+// of which use the quad index buffer.

+static const size_t DRAW_BUFFER_IBPOOL_BUFFER_SIZE = 0;

+static const int DRAW_BUFFER_IBPOOL_PREALLOC_BUFFERS = 0;

 

 GrContext* GrContext::Create(GrGpu::Engine engine,

                              GrGpu::Platform3DContext context3D) {

@@ -52,9 +59,9 @@
     fGpu->unref();

     delete fTextureCache;

     delete fFontCache;

-    delete fTextDrawBuffer;

-    delete fTextVBAllocPool;

-    delete fTextIBAllocPool;

+    delete fDrawBuffer;

+    delete fDrawBufferVBAllocPool;

+    delete fDrawBufferVBAllocPool;

 }

 

 void GrContext::abandonAllTextures() {

@@ -331,18 +338,18 @@
                          const GrMatrix* matrix) {

 

     bool textured = NULL != paint.getTexture();

-    GrVertexLayout layout = (textured) ?

-                            GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0) :

-                            0;

 

-    this->prepareToDraw(paint);

+    GrDrawTarget* target = this->prepareToDraw(paint, kUnbuffered_DrawCategory);

 

     if (width >= 0) {

         // TODO: consider making static vertex buffers for these cases.

         // Hairline could be done by just adding closing vertex to

         // unitSquareVertexBuffer()

+        GrVertexLayout layout = (textured) ?

+                                 GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0) :

+                                 0;

         static const int worstCaseVertCount = 10;

-        GrDrawTarget::AutoReleaseGeometry geo(fGpu, layout, worstCaseVertCount, 0);

+        GrDrawTarget::AutoReleaseGeometry geo(target, layout, worstCaseVertCount, 0);

 

         if (!geo.succeeded()) {

             return;

@@ -369,16 +376,20 @@
 

         GrDrawTarget::AutoViewMatrixRestore avmr;

         if (NULL != matrix) {

-            avmr.set(fGpu);

-            fGpu->concatViewMatrix(*matrix);

-            fGpu->concatTextureMatrix(0, *matrix);

+            avmr.set(target);

+            target->concatViewMatrix(*matrix);

+            target->concatTextureMatrix(0, *matrix);

         }

 

-        fGpu->drawNonIndexed(primType, 0, vertCount);

+        target->drawNonIndexed(primType, 0, vertCount);

     } else {

         #if GR_STATIC_RECT_VB

-            fGpu->setVertexSourceToBuffer(layout, fGpu->unitSquareVertexBuffer());

-            GrDrawTarget::AutoViewMatrixRestore avmr(fGpu);

+            GrVertexLayout layout = (textured) ?

+                            GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0) :

+                            0;

+            target->setVertexSourceToBuffer(layout, 

+                                            fGpu->getUnitSquareVertexBuffer());

+            GrDrawTarget::AutoViewMatrixRestore avmr(target);

             GrMatrix m;

             m.setAll(rect.width(), 0,             rect.fLeft,

                      0,            rect.height(), rect.fTop,

@@ -388,25 +399,15 @@
                 m.postConcat(*matrix);

             }

 

-            fGpu->concatViewMatrix(m);

+            target->concatViewMatrix(m);

 

             if (textured) {

-                fGpu->concatTextureMatrix(0, m);

+                target->concatTextureMatrix(0, m);

             }

+            target->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType, 0, 4);

         #else

-            GrDrawTarget::AutoReleaseGeometry geo(fGpu, layout, 4, 0);

-            GrPoint* vertex = geo.positions();

-            vertex->setRectFan(rect.fLeft, rect.fTop, rect.fRight, rect.fBottom);

-

-            GrDrawTarget::AutoViewMatrixRestore avmr;

-            if (NULL != matrix) {

-                avmr.set(fGpu);

-                fGpu->concatViewMatrix(*matrix);

-                fGpu->concatTextureMatrix(0, *matrix);

-            }

+            target->drawSimpleRect(rect, matrix, textured ? 1 : 0);

         #endif

-

-        fGpu->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType, 0, 4);

     }

 }

 

@@ -420,12 +421,14 @@
         drawRect(paint, dstRect, -1, dstMatrix);

         return;

     }

-

-    this->prepareToDraw(paint);

+    

+    GR_STATIC_ASSERT(!BATCH_RECT_TO_RECT || !GR_STATIC_RECT_VB);

 

 #if GR_STATIC_RECT_VB

+    GrDrawTarget* target = this->prepareToDraw(paint, kUnbuffered_DrawCategory);

+

     GrVertexLayout layout = GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0);

-    GrDrawTarget::AutoViewMatrixRestore avmr(fGpu);

+    GrDrawTarget::AutoViewMatrixRestore avmr(target);

 

     GrMatrix m;

 

@@ -435,7 +438,7 @@
     if (NULL != dstMatrix) {

         m.postConcat(*dstMatrix);

     }

-    fGpu->concatViewMatrix(m);

+    target->concatViewMatrix(m);

 

     m.setAll(srcRect.width(), 0,                srcRect.fLeft,

              0,               srcRect.height(), srcRect.fTop,

@@ -443,34 +446,26 @@
     if (NULL != srcMatrix) {

         m.postConcat(*srcMatrix);

     }

-    fGpu->concatTextureMatrix(0, m);

+    target->concatTextureMatrix(0, m);

 

-    fGpu->setVertexSourceToBuffer(layout, fGpu->unitSquareVertexBuffer());

+    target->setVertexSourceToBuffer(layout, fGpu->getUnitSquareVertexBuffer());

+    target->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType, 0, 4);

 #else

-    GrVertexLayout layout = GrDrawTarget::StageTexCoordVertexLayoutBit(0,0);

 

-    GrDrawTarget::AutoReleaseGeometry geo(fGpu, layout, 4, 0);

-    GrPoint* pos = geo.positions();

-    GrPoint* tex = pos + 1;

-    static const size_t stride = 2 * sizeof(GrPoint);

-    pos[0].setRectFan(dstRect.fLeft, dstRect.fTop,

-                      dstRect.fRight, dstRect.fBottom,

-                      stride);

-    tex[0].setRectFan(srcRect.fLeft, srcRect.fTop,

-                      srcRect.fRight, srcRect.fBottom,

-                      stride);

-

-    GrDrawTarget::AutoViewMatrixRestore avmr;

-    if (NULL != dstMatrix) {

-        avmr.set(fGpu);

-        fGpu->concatViewMatrix(*dstMatrix);

-    }

-    if (NULL != srcMatrix) {

-        fGpu->concatTextureMatrix(0, *srcMatrix);

-    }

-

+    GrDrawTarget* target;

+#if BATCH_RECT_TO_RECT 

+    target = this->prepareToDraw(paint, kBuffered_DrawCategory);

+#else 

+    target = this->prepareToDraw(paint, kUnbuffered_DrawCategory);

 #endif

-    fGpu->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType, 0, 4);

+

+    const GrRect* srcRects[GrDrawTarget::kNumStages] = {NULL};

+    const GrMatrix* srcMatrices[GrDrawTarget::kNumStages] = {NULL};

+    srcRects[0] = &srcRect;

+    srcMatrices[0] = srcMatrix;

+

+    target->drawRect(dstRect, dstMatrix, 1, srcRects, srcMatrices);

+#endif

 }

 

 void GrContext::drawVertices(const GrPaint& paint,

@@ -486,7 +481,7 @@
 

     GrDrawTarget::AutoReleaseGeometry geo;

 

-    this->prepareToDraw(paint);

+    GrDrawTarget* target = this->prepareToDraw(paint, kUnbuffered_DrawCategory);

 

     if (NULL != paint.getTexture()) {

         if (NULL == texCoords) {

@@ -503,7 +498,7 @@
     }

 

     if (sizeof(GrPoint) != vertexSize) {

-        if (!geo.set(fGpu, layout, vertexCount, 0)) {

+        if (!geo.set(target, layout, vertexCount, 0)) {

             GrPrintf("Failed to get space for vertices!");

             return;

         }

@@ -526,14 +521,14 @@
             curVertex = (void*)((intptr_t)curVertex + vsize);

         }

     } else {

-        fGpu->setVertexSourceToArray(layout, positions, vertexCount);

+        target->setVertexSourceToArray(layout, positions, vertexCount);

     }

 

     if (NULL != indices) {

-        fGpu->setIndexSourceToArray(indices, indexCount);

-        fGpu->drawIndexed(primitiveType, 0, 0, vertexCount, indexCount);

+        target->setIndexSourceToArray(indices, indexCount);

+        target->drawIndexed(primitiveType, 0, 0, vertexCount, indexCount);

     } else {

-        fGpu->drawNonIndexed(primitiveType, 0, vertexCount);

+        target->drawNonIndexed(primitiveType, 0, vertexCount);

     }

 }

 

@@ -666,7 +661,7 @@
 

 static inline bool single_pass_path(const GrPathIter& path,

                                     GrContext::PathFills fill,

-                                    const GrGpu& gpu) {

+                                    const GrDrawTarget& target) {

 #if STENCIL_OFF

     return true;

 #else

@@ -679,7 +674,7 @@
         return hint == GrPathIter::kConvex_ConvexHint ||

                hint == GrPathIter::kNonOverlappingConvexPieces_ConvexHint ||

                (hint == GrPathIter::kSameWindingConvexPieces_ConvexHint &&

-                gpu.canDisableBlend() && !gpu.isDitherState());

+                target.canDisableBlend() && !target.isDitherState());

 

     }

     return false;

@@ -692,11 +687,11 @@
                          const GrPoint* translate) {

 

 

-    this->prepareToDraw(paint);

+    GrDrawTarget* target = this->prepareToDraw(paint, kUnbuffered_DrawCategory);

 

-    GrDrawTarget::AutoStateRestore asr(fGpu);

+    GrDrawTarget::AutoStateRestore asr(target);

 

-    GrMatrix viewM = fGpu->getViewMatrix();

+    GrMatrix viewM = target->getViewMatrix();

     // In order to tesselate the path we get a bound on how much the matrix can

     // stretch when mapping to screen coordinates.

     GrScalar stretch = viewM.getMaxStretch();

@@ -722,7 +717,7 @@
         layout = GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0);

     }

     // add 4 to hold the bounding rect

-    GrDrawTarget::AutoReleaseGeometry arg(fGpu, layout, maxPts + 4, 0);

+    GrDrawTarget::AutoReleaseGeometry arg(target, layout, maxPts + 4, 0);

 

     GrPoint* base = (GrPoint*) arg.vertices();

     GrPoint* vert = base;

@@ -744,7 +739,7 @@
         passes[0] = GrDrawTarget::kNone_StencilPass;

     } else {

         type = GrDrawTarget::kTriangleFan_PrimitiveType;

-        if (single_pass_path(*path, fill, *fGpu)) {

+        if (single_pass_path(*path, fill, *target)) {

             passCount = 1;

             passes[0] = GrDrawTarget::kNone_StencilPass;

         } else {

@@ -778,7 +773,7 @@
             }

         }

     }

-    fGpu->setReverseFill(reverse);

+    target->setReverseFill(reverse);

 

     GrPoint pts[4];

 

@@ -840,13 +835,13 @@
     if (useBounds) {

         GrRect bounds;

         if (reverse) {

-            GrAssert(NULL != fGpu->getRenderTarget());

+            GrAssert(NULL != target->getRenderTarget());

             // draw over the whole world.

             bounds.setLTRB(0, 0,

-                           GrIntToScalar(fGpu->getRenderTarget()->width()),

-                           GrIntToScalar(fGpu->getRenderTarget()->height()));

+                           GrIntToScalar(target->getRenderTarget()->width()),

+                           GrIntToScalar(target->getRenderTarget()->height()));

             GrMatrix vmi;

-            if (fGpu->getViewInverse(&vmi)) {

+            if (target->getViewInverse(&vmi)) {

                 vmi.mapRect(&bounds);

             }

         } else {

@@ -857,16 +852,16 @@
     }

 

     for (int p = 0; p < passCount; ++p) {

-        fGpu->setStencilPass(passes[p]);

+        target->setStencilPass(passes[p]);

         if (useBounds && (GrDrawTarget::kEvenOddColor_StencilPass == passes[p] ||

                           GrDrawTarget::kWindingColor_StencilPass == passes[p])) {

-            fGpu->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType,

+            target->drawNonIndexed(GrDrawTarget::kTriangleFan_PrimitiveType,

                                  maxPts, 4);

 

         } else {

             int baseVertex = 0;

             for (int sp = 0; sp < subpathCnt; ++sp) {

-                fGpu->drawNonIndexed(type,

+                target->drawNonIndexed(type,

                                      baseVertex,

                                      subpathVertCount[sp]);

                 baseVertex += subpathVertCount[sp];

@@ -878,19 +873,25 @@
 ////////////////////////////////////////////////////////////////////////////////

 

 void GrContext::flush(bool flushRenderTarget) {

-    flushText();

+    flushDrawBuffer();

     if (flushRenderTarget) {

         fGpu->forceRenderTargetFlush();

     }

 }

 

 void GrContext::flushText() {

-    if (NULL != fTextDrawBuffer) {

-        fTextDrawBuffer->playback(fGpu);

-        fTextDrawBuffer->reset();

+    if (kText_DrawCategory == fLastDrawCategory) {

+        flushDrawBuffer();

     }

 }

 

+void GrContext::flushDrawBuffer() {

+#if BATCH_RECT_TO_RECT || DEFER_TEXT_RENDERING

+    fDrawBuffer->playback(fGpu);

+    fDrawBuffer->reset();

+#endif

+}

+

 bool GrContext::readPixels(int left, int top, int width, int height,

                            GrTexture::PixelConfig config, void* buffer) {

     this->flush(true);

@@ -962,10 +963,32 @@
     target->setBlendFunc(paint.fSrcBlendCoeff, paint.fDstBlendCoeff);

 }

 

-void GrContext::prepareToDraw(const GrPaint& paint) {

-

-    flushText();

+GrDrawTarget* GrContext::prepareToDraw(const GrPaint& paint, 

+                                       DrawCategory category) {

+    if (category != fLastDrawCategory) {

+        flushDrawBuffer();

+        fLastDrawCategory = category;

+    }

     SetPaint(paint, fGpu);

+    GrDrawTarget* target = fGpu;

+    switch (category) {

+    case kText_DrawCategory:

+#if DEFER_TEXT_RENDERING

+        target = fDrawBuffer;

+        fDrawBuffer->initializeDrawStateAndClip(*fGpu);

+#else

+        target = fGpu;

+#endif

+        break;

+    case kUnbuffered_DrawCategory:

+        target = fGpu;

+        break;

+    case kBuffered_DrawCategory:

+        target = fDrawBuffer;

+        fDrawBuffer->initializeDrawStateAndClip(*fGpu);

+        break;

+    }

+    return target;

 }

 

 ////////////////////////////////////////////////////////////////////////////////

@@ -975,7 +998,7 @@
 }

 

 void GrContext::setRenderTarget(GrRenderTarget* target) {

-    flushText();

+    flush(false);

     fGpu->setRenderTarget(target);

 }

 

@@ -1028,19 +1051,28 @@
                                        MAX_TEXTURE_CACHE_BYTES);

     fFontCache = new GrFontCache(fGpu);

 

-#if DEFER_TEXT_RENDERING

-    fTextVBAllocPool = new GrVertexBufferAllocPool(gpu,

-                                                   false,

-                                                   TEXT_POOL_VB_SIZE,

-                                                   NUM_TEXT_POOL_VBS);

-    fTextIBAllocPool = new GrIndexBufferAllocPool(gpu, false, 0, 0);

+    fLastDrawCategory = kUnbuffered_DrawCategory;

 

-    fTextDrawBuffer = new GrInOrderDrawBuffer(fTextVBAllocPool,

-                                              fTextIBAllocPool);

+#if DEFER_TEXT_RENDERING || BATCH_RECT_TO_RECT

+    fDrawBufferVBAllocPool = 

+        new GrVertexBufferAllocPool(gpu, false,

+                                    DRAW_BUFFER_VBPOOL_BUFFER_SIZE,

+                                    DRAW_BUFFER_VBPOOL_PREALLOC_BUFFERS);

+    fDrawBufferIBAllocPool = 

+        new GrIndexBufferAllocPool(gpu, false,

+                                   DRAW_BUFFER_IBPOOL_BUFFER_SIZE, 

+                                   DRAW_BUFFER_IBPOOL_PREALLOC_BUFFERS);

+

+    fDrawBuffer = new GrInOrderDrawBuffer(fDrawBufferVBAllocPool,

+                                          fDrawBufferIBAllocPool);

 #else

-    fTextDrawBuffer = NULL;

-    fTextVBAllocPool = NULL;

-    fTextIBAllocPool = NULL;

+    fDrawBuffer = NULL;

+    fDrawBufferVBAllocPool = NULL;

+    fDrawBufferIBAllocPool = NULL;

+#endif

+

+#if BATCH_RECT_TO_RECT

+    fDrawBuffer->setQuadIndexBuffer(this->getQuadIndexBuffer());

 #endif

 

 }

@@ -1070,22 +1102,14 @@
 GrDrawTarget* GrContext::getTextTarget(const GrPaint& paint) {

     GrDrawTarget* target;

 #if DEFER_TEXT_RENDERING

-    fTextDrawBuffer->initializeDrawStateAndClip(*fGpu);

-    target = fTextDrawBuffer;

+    target = prepareToDraw(paint, kText_DrawCategory);

 #else

-    target = fGpu;

+    target = prepareToDraw(paint, kUnbuffered_DrawCategory);

 #endif

     SetPaint(paint, target);

     return target;

 }

 

-const GrIndexBuffer* GrContext::quadIndexBuffer() const {

-    return fGpu->quadIndexBuffer();

+const GrIndexBuffer* GrContext::getQuadIndexBuffer() const {

+    return fGpu->getQuadIndexBuffer();

 }

-

-int GrContext::maxQuadsInIndexBuffer() const {

-    return fGpu->maxQuadsInIndexBuffer();

-}

-

-

-

diff --git a/gpu/src/GrDrawTarget.cpp b/gpu/src/GrDrawTarget.cpp
index 5cfc6f4..06e5ab8b 100644
--- a/gpu/src/GrDrawTarget.cpp
+++ b/gpu/src/GrDrawTarget.cpp
@@ -17,6 +17,7 @@
 
 #include "GrDrawTarget.h"
 #include "GrGpuVertex.h"
+#include "GrTexture.h"
 
 // recursive helper for creating mask with all the tex coord bits set for
 // one stage
@@ -291,7 +292,7 @@
 }
 
 void GrDrawTarget::setClip(const GrClip& clip) {
-    clipWillChange(clip);
+    clipWillBeSet(clip);
     fClip = clip;
 }
 
@@ -484,7 +485,127 @@
     fGeometrySrc.fIndexBuffer  = buffer;
 }
 
-////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+bool GrDrawTarget::canDisableBlend() const {
+    if ((kOne_BlendCoeff == fCurrDrawState.fSrcBlend) &&
+        (kZero_BlendCoeff == fCurrDrawState.fDstBlend)) {
+            return true;
+    }
+
+    // If we have vertex color without alpha then we can't force blend off
+    if ((fGeometrySrc.fVertexLayout & kColor_VertexLayoutBit) ||
+         0xff != GrColorUnpackA(fCurrDrawState.fColor)) {
+        return false;
+    }
+
+    // If the src coef will always be 1...
+    if (kSA_BlendCoeff != fCurrDrawState.fSrcBlend &&
+        kOne_BlendCoeff != fCurrDrawState.fSrcBlend) {
+        return false;
+    }
+
+    // ...and the dst coef is always 0...
+    if (kISA_BlendCoeff != fCurrDrawState.fDstBlend &&
+        kZero_BlendCoeff != fCurrDrawState.fDstBlend) {
+        return false;
+    }
+
+    // ...and there isn't a texture with an alpha channel...
+    for (int s = 0; s < kNumStages; ++s) {
+        if (VertexUsesStage(s, fGeometrySrc.fVertexLayout)) {
+            GrAssert(NULL != fCurrDrawState.fTextures[s]);
+            GrTexture::PixelConfig config = fCurrDrawState.fTextures[s]->config();
+
+            if (GrTexture::kRGB_565_PixelConfig != config &&
+                GrTexture::kRGBX_8888_PixelConfig != config) {
+                return false;
+            }
+        }
+    }
+
+    // ...then we disable blend.
+    return true;
+}
+///////////////////////////////////////////////////////////////////////////////
+void GrDrawTarget::drawRect(const GrRect& rect, 
+                            const GrMatrix* matrix,
+                            int stageEnableMask,
+                            const GrRect* srcRects[],
+                            const GrMatrix* srcMatrices[]) {
+    GR_STATIC_ASSERT(8*sizeof(int) >= kNumStages);
+
+    GrVertexLayout layout = GetRectVertexLayout(stageEnableMask, srcRects);
+
+    AutoReleaseGeometry geo(this, layout, 4, 0);
+
+    SetRectVertices(rect, matrix, srcRects, 
+                    srcMatrices, layout, geo.vertices());
+
+    drawNonIndexed(kTriangleFan_PrimitiveType, 0, 4);
+}
+
+GrVertexLayout GrDrawTarget::GetRectVertexLayout(int stageEnableMask, 
+                                                 const GrRect* srcRects[]) {
+    GrVertexLayout layout = 0;
+
+    for (int i = 0; i < kNumStages; ++i) {
+        int numTC = 0;
+        if (stageEnableMask & (1 << i)) {
+            if (NULL != srcRects && NULL != srcRects[i]) {
+                layout |= StageTexCoordVertexLayoutBit(i, numTC);
+                ++numTC;
+            } else {
+                layout |= StagePosAsTexCoordVertexLayoutBit(i);
+            }
+        }
+    }
+    return layout;
+}
+void GrDrawTarget::SetRectVertices(const GrRect& rect,
+                                   const GrMatrix* matrix, 
+                                   const GrRect* srcRects[], 
+                                   const GrMatrix* srcMatrices[],
+                                   GrVertexLayout layout, 
+                                   void* vertices) {
+#if GR_DEBUG
+    // check that the layout and srcRects agree
+    for (int i = 0; i < kNumStages; ++i) {
+        if (VertexTexCoordsForStage(i, layout) >= 0) {
+            GR_DEBUGASSERT(NULL != srcRects && NULL != srcRects[i]);
+        } else {
+            GR_DEBUGASSERT(NULL == srcRects || NULL == srcRects[i]);
+        }
+    }
+#endif
+
+    int stageOffsets[kNumStages];
+    int colorOffset;
+    int vsize = VertexSizeAndOffsetsByStage(layout, stageOffsets, &colorOffset);
+    GrAssert(-1 == colorOffset);
+
+    GrTCast<GrPoint*>(vertices)->setRectFan(rect.fLeft, rect.fTop, 
+                                            rect.fRight, rect.fBottom,
+                                            vsize);
+    if (NULL != matrix) {
+        matrix->mapPointsWithStride(GrTCast<GrPoint*>(vertices), vsize, 4);
+    }
+
+    for (int i = 0; i < kNumStages; ++i) {
+        if (stageOffsets[i] > 0) {
+            GrPoint* coords = GrTCast<GrPoint*>(GrTCast<intptr_t>(vertices) + 
+                                                stageOffsets[i]);
+            coords->setRectFan(srcRects[i]->fLeft, srcRects[i]->fTop,
+                               srcRects[i]->fRight, srcRects[i]->fBottom, 
+                               vsize);
+            if (NULL != srcMatrices && NULL != srcMatrices[i]) {
+                srcMatrices[i]->mapPointsWithStride(coords, vsize, 4);
+            }
+        }
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
 
 GrDrawTarget::AutoStateRestore::AutoStateRestore(GrDrawTarget* target) {
     fDrawTarget = target;
diff --git a/gpu/src/GrGpu.cpp b/gpu/src/GrGpu.cpp
index e406f6f..5425b58 100644
--- a/gpu/src/GrGpu.cpp
+++ b/gpu/src/GrGpu.cpp
@@ -92,49 +92,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-bool GrGpu::canDisableBlend() const {
-    if ((kOne_BlendCoeff == fCurrDrawState.fSrcBlend) &&
-        (kZero_BlendCoeff == fCurrDrawState.fDstBlend)) {
-            return true;
-    }
-
-    // If we have vertex color without alpha then we can't force blend off
-    if ((fGeometrySrc.fVertexLayout & kColor_VertexLayoutBit) ||
-         0xff != GrColorUnpackA(fCurrDrawState.fColor)) {
-        return false;
-    }
-
-    // If the src coef will always be 1...
-    if (kSA_BlendCoeff != fCurrDrawState.fSrcBlend &&
-        kOne_BlendCoeff != fCurrDrawState.fSrcBlend) {
-        return false;
-    }
-
-    // ...and the dst coef is always 0...
-    if (kISA_BlendCoeff != fCurrDrawState.fDstBlend &&
-        kZero_BlendCoeff != fCurrDrawState.fDstBlend) {
-        return false;
-    }
-
-    // ...and there isn't a texture with an alpha channel...
-    for (int s = 0; s < kNumStages; ++s) {
-        if (VertexUsesStage(s, fGeometrySrc.fVertexLayout)) {
-            GrAssert(NULL != fCurrDrawState.fTextures[s]);
-            GrTexture::PixelConfig config = fCurrDrawState.fTextures[s]->config();
-
-            if (GrTexture::kRGB_565_PixelConfig != config &&
-                GrTexture::kRGBX_8888_PixelConfig != config) {
-                return false;
-            }
-        }
-    }
-
-    // ...then we disable blend.
-    return true;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
 static const int MAX_QUADS = 1 << 12; // max possible: (1 << 14) - 1;
 
 GR_STATIC_ASSERT(4 * MAX_QUADS <= 65535);
@@ -150,7 +107,7 @@
     }
 }
 
-const GrIndexBuffer* GrGpu::quadIndexBuffer() const {
+const GrIndexBuffer* GrGpu::getQuadIndexBuffer() const {
     if (NULL == fQuadIndexBuffer) {
         static const int SIZE = sizeof(uint16_t) * 6 * MAX_QUADS;
         GrGpu* me = const_cast<GrGpu*>(this);
@@ -176,7 +133,7 @@
     return fQuadIndexBuffer;
 }
 
-const GrVertexBuffer* GrGpu::unitSquareVertexBuffer() const {
+const GrVertexBuffer* GrGpu::getUnitSquareVertexBuffer() const {
     if (NULL == fUnitSquareVertexBuffer) {
 
         static const GrPoint DATA[] = {
@@ -201,14 +158,10 @@
     return fUnitSquareVertexBuffer;
 }
 
-int GrGpu::maxQuadsInIndexBuffer() const {
-    return (NULL == this->quadIndexBuffer()) ? 0 : MAX_QUADS;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 
-void GrGpu::clipWillChange(const GrClip& clip) {
-    if (clip != fClip) {
+void GrGpu::clipWillBeSet(const GrClip& newClip) {
+    if (newClip != fClip) {
         fClipState.fClipIsDirty = true;
     }
 }
@@ -260,7 +213,7 @@
             }
             fVertexPool->unlock();
             this->setVertexSourceToBuffer(0, vertexBuffer);
-            this->setIndexSourceToBuffer(quadIndexBuffer());
+            this->setIndexSourceToBuffer(getQuadIndexBuffer());
             this->setViewMatrix(GrMatrix::I());
             // don't clip the clip or recurse!
             this->disableState(kClip_StateBit);
@@ -268,7 +221,7 @@
             this->setStencilPass((GrDrawTarget::StencilPass)kSetClip_StencilPass);
             int currRect = 0;
             while (currRect < rectTotal) {
-                int rectCount = GrMin(this->maxQuadsInIndexBuffer(),
+                int rectCount = GrMin(MAX_QUADS,
                                       rectTotal - currRect);
                 this->drawIndexed(kTriangles_PrimitiveType,
                                   vStart + currRect * 4,
diff --git a/gpu/src/GrInOrderDrawBuffer.cpp b/gpu/src/GrInOrderDrawBuffer.cpp
index 49b8901..25e74e3 100644
--- a/gpu/src/GrInOrderDrawBuffer.cpp
+++ b/gpu/src/GrInOrderDrawBuffer.cpp
@@ -18,6 +18,8 @@
 #include "GrInOrderDrawBuffer.h"
 #include "GrTexture.h"
 #include "GrBufferAllocPool.h"
+#include "GrIndexBuffer.h"
+#include "GrVertexBuffer.h"
 #include "GrGpu.h"
 
 GrInOrderDrawBuffer::GrInOrderDrawBuffer(GrVertexBufferAllocPool* vertexPool,
@@ -25,7 +27,13 @@
         fDraws(DRAWS_BLOCK_SIZE, fDrawsStorage),
         fStates(STATES_BLOCK_SIZE, fStatesStorage),
         fClips(CLIPS_BLOCK_SIZE, fClipsStorage),
-        fClipChanged(true),
+        fClipSet(true),
+
+        fLastRectVertexLayout(0),
+        fQuadIndexBuffer(NULL),
+        fMaxQuads(0),
+        fCurrQuad(0),
+
         fVertexPool(*vertexPool),
         fCurrPoolVertexBuffer(NULL),
         fCurrPoolStartVertex(0),
@@ -41,7 +49,8 @@
 }
 
 GrInOrderDrawBuffer::~GrInOrderDrawBuffer() {
-    reset();
+    this->reset();
+    GrSafeUnref(fQuadIndexBuffer);
 }
 
 void GrInOrderDrawBuffer::initializeDrawStateAndClip(const GrDrawTarget& target) {
@@ -49,6 +58,121 @@
     this->setClip(target.getClip());
 }
 
+void GrInOrderDrawBuffer::setQuadIndexBuffer(const GrIndexBuffer* indexBuffer) {
+    bool newIdxBuffer = fQuadIndexBuffer != indexBuffer;
+    if (newIdxBuffer) {
+        GrSafeUnref(fQuadIndexBuffer);
+        fQuadIndexBuffer = indexBuffer;
+        GrSafeRef(fQuadIndexBuffer);
+        fCurrQuad = 0;
+        fMaxQuads = (NULL == indexBuffer) ? 0 : indexBuffer->maxQuads();
+    } else {
+        GrAssert((NULL == indexBuffer && 0 == fMaxQuads) || 
+                 (indexBuffer->maxQuads() == fMaxQuads));
+    }
+}
+
+void GrInOrderDrawBuffer::drawRect(const GrRect& rect, 
+                                   const GrMatrix* matrix,
+                                   int stageEnableMask,
+                                   const GrRect* srcRects[],
+                                   const GrMatrix* srcMatrices[]) {
+    
+    GrAssert(!(NULL == fQuadIndexBuffer && fCurrQuad));
+    GrAssert(!(fDraws.empty() && fCurrQuad));
+    GrAssert(!(0 != fMaxQuads && NULL == fQuadIndexBuffer));
+
+    // if we have a quad IB then either append to the previous run of
+    // rects or start a new run
+    if (fMaxQuads) {
+        
+        bool appendToPreviousDraw = false;
+        GrVertexLayout layout = GetRectVertexLayout(stageEnableMask, srcRects);
+        AutoReleaseGeometry geo(this, layout, 4, 0);
+        AutoViewMatrixRestore avmr(this);
+        GrMatrix combinedMatrix = this->getViewMatrix();
+        this->setViewMatrix(GrMatrix::I());
+        if (NULL != matrix) {
+            combinedMatrix.preConcat(*matrix);
+        }
+
+        SetRectVertices(rect, &combinedMatrix, srcRects, srcMatrices, layout, geo.vertices());
+
+        // we don't want to miss an opportunity to batch rects together
+        // simply because the clip has changed if the clip doesn't affect
+        // the rect.
+        bool disabledClip = false;
+        if (this->isClipState() && fClip.isRect()) {
+            GrRect clipRect = GrRect(*fClip.getRects());
+            // If the clip rect touches the edge of the viewport, extended it
+            // out (close) to infinity to avoid bogus intersections.
+            // We might consider a more exact clip to viewport if this 
+            // conservative test fails.
+            const GrRenderTarget* target = this->getRenderTarget();
+            if (0 >= clipRect.fLeft) {
+                clipRect.fLeft = GR_ScalarMin;
+            }
+            if (target->width() <= clipRect.fRight) {
+                clipRect.fRight = GR_ScalarMax;
+            }
+            if (0 >= clipRect.top()) {
+                clipRect.fTop = GR_ScalarMin;
+            }
+            if (target->height() <= clipRect.fBottom) {
+                clipRect.fBottom = GR_ScalarMax;
+            }
+            int stride = VertexSize(layout);
+            bool insideClip = true;
+            for (int v = 0; v < 4; ++v) {
+                const GrPoint& p = *GetVertexPoint(geo.vertices(), v, stride);
+                if (!clipRect.contains(p)) {
+                    insideClip = false;
+                    break;
+                }
+            }
+            if (insideClip) {
+                this->disableState(kClip_StateBit);
+                disabledClip = true;
+            }
+        }
+        if (!needsNewClip() && !needsNewState() && fCurrQuad > 0 && 
+            fCurrQuad < fMaxQuads && layout == fLastRectVertexLayout) {
+
+            int vsize = VertexSize(layout);
+        
+            Draw& lastDraw = fDraws.back();
+
+            GrAssert(lastDraw.fIndexBuffer == fQuadIndexBuffer);
+            GrAssert(kTriangles_PrimitiveType == lastDraw.fPrimitiveType);
+            GrAssert(0 == lastDraw.fVertexCount % 4);
+            GrAssert(0 == lastDraw.fIndexCount % 6);
+            GrAssert(0 == lastDraw.fStartIndex);
+
+            appendToPreviousDraw = lastDraw.fVertexBuffer == fCurrPoolVertexBuffer &&
+                                   (fCurrQuad * 4 + lastDraw.fStartVertex) == fCurrPoolStartVertex;
+            if (appendToPreviousDraw) {
+                lastDraw.fVertexCount += 4;
+                lastDraw.fIndexCount += 6;
+                fCurrQuad += 1;
+                GrAssert(0 == fUsedReservedVertexBytes);
+                fUsedReservedVertexBytes = 4 * vsize;
+            }
+        }
+        if (!appendToPreviousDraw) {
+            this->setIndexSourceToBuffer(fQuadIndexBuffer);
+            drawIndexed(kTriangles_PrimitiveType, 0, 0, 4, 6);
+            fCurrQuad = 1;
+            fLastRectVertexLayout = layout;
+        }
+        if (disabledClip) {
+            this->enableState(kClip_StateBit);
+        }
+this->enableState(kClip_StateBit);
+    } else {
+        INHERITED::drawRect(rect, matrix, stageEnableMask, srcRects, srcMatrices);
+    }
+}
+
 void GrInOrderDrawBuffer::drawIndexed(PrimitiveType primitiveType,
                                       int startVertex,
                                       int startIndex,
@@ -59,14 +183,24 @@
         return;
     }
 
+    fCurrQuad = 0;
+
     Draw& draw = fDraws.push_back();
     draw.fPrimitiveType = primitiveType;
     draw.fStartVertex   = startVertex;
     draw.fStartIndex    = startIndex;
     draw.fVertexCount   = vertexCount;
     draw.fIndexCount    = indexCount;
-    draw.fClipChanged   = grabClip();
-    draw.fStateChanged  = grabState();
+
+    draw.fClipChanged = this->needsNewClip();
+    if (draw.fClipChanged) {
+       this->pushClip();
+    }
+
+    draw.fStateChanged = this->needsNewState();
+    if (draw.fStateChanged) {
+        this->pushState();
+    }
 
     draw.fVertexLayout = fGeometrySrc.fVertexLayout;
     switch (fGeometrySrc.fVertexSrc) {
@@ -76,8 +210,7 @@
     case kReserved_GeometrySrcType: {
         size_t vertexBytes = (vertexCount + startVertex) *
         VertexSize(fGeometrySrc.fVertexLayout);
-        fUsedReservedVertexBytes = GrMax(fUsedReservedVertexBytes,
-                                         vertexBytes);
+        fUsedReservedVertexBytes = GrMax(fUsedReservedVertexBytes, vertexBytes);
     } // fallthrough
     case kArray_GeometrySrcType:
         draw.fVertexBuffer = fCurrPoolVertexBuffer;
@@ -86,6 +219,7 @@
     default:
         GrCrash("unknown geom src type");
     }
+    draw.fVertexBuffer->ref();
 
     switch (fGeometrySrc.fIndexSrc) {
     case kBuffer_GeometrySrcType:
@@ -102,6 +236,7 @@
     default:
         GrCrash("unknown geom src type");
     }
+    draw.fIndexBuffer->ref();
 }
 
 void GrInOrderDrawBuffer::drawNonIndexed(PrimitiveType primitiveType,
@@ -111,6 +246,8 @@
         return;
     }
 
+    fCurrQuad = 0;
+
     Draw& draw = fDraws.push_back();
     draw.fPrimitiveType = primitiveType;
     draw.fStartVertex   = startVertex;
@@ -118,8 +255,15 @@
     draw.fVertexCount   = vertexCount;
     draw.fIndexCount    = 0;
 
-    draw.fClipChanged   = grabClip();
-    draw.fStateChanged  = grabState();
+    draw.fClipChanged = this->needsNewClip();
+    if (draw.fClipChanged) {
+        this->pushClip();
+    }
+
+    draw.fStateChanged = this->needsNewState();
+    if (draw.fStateChanged) {
+        this->pushState();
+    }
 
     draw.fVertexLayout = fGeometrySrc.fVertexLayout;
     switch (fGeometrySrc.fVertexSrc) {
@@ -139,6 +283,8 @@
     default:
         GrCrash("unknown geom src type");
     }
+    draw.fVertexBuffer->ref();
+    draw.fIndexBuffer = NULL;
 }
 
 void GrInOrderDrawBuffer::reset() {
@@ -146,12 +292,19 @@
     uint32_t numStates = fStates.count();
     for (uint32_t i = 0; i < numStates; ++i) {
         for (int s = 0; s < kNumStages; ++s) {
-            GrTexture* tex = accessSavedDrawState(fStates[i]).fTextures[s];
+            GrTexture* tex = this->accessSavedDrawState(fStates[i]).fTextures[s];
             if (NULL != tex) {
                 tex->unref();
             }
         }
     }
+    int numDraws = fDraws.count();
+    for (int d = 0; d < numDraws; ++d) {
+        // we always have a VB, but not always an IB
+        GrAssert(NULL != fDraws[d].fVertexBuffer);
+        fDraws[d].fVertexBuffer->unref();
+        GrSafeUnref(fDraws[d].fIndexBuffer);
+    }
     fDraws.reset();
     fStates.reset();
 
@@ -159,6 +312,8 @@
     fIndexPool.reset();
 
     fClips.reset();
+
+    fCurrQuad = 0;
 }
 
 void GrInOrderDrawBuffer::playback(GrDrawTarget* target) {
@@ -331,37 +486,36 @@
     GR_DEBUGASSERT(success);
 }
 
-bool GrInOrderDrawBuffer::grabState() {
-    bool newState;
-    if (fStates.empty()) {
-        newState = true;
-    } else {
-        const DrState& old = accessSavedDrawState(fStates.back());
-        newState = old != fCurrDrawState;
-    }
-    if (newState) {
-        for (int s = 0; s < kNumStages; ++s) {
-            if (NULL != fCurrDrawState.fTextures[s]) {
-                fCurrDrawState.fTextures[s]->ref();
-            }
-        }
-        saveCurrentDrawState(&fStates.push_back());
-    }
-    return newState;
+bool GrInOrderDrawBuffer::needsNewState() const {
+     if (fStates.empty()) {
+        return true;
+     } else {
+         const DrState& old = this->accessSavedDrawState(fStates.back());
+        return old != fCurrDrawState;
+     }
 }
 
-bool GrInOrderDrawBuffer::grabClip() {
-    if ((fCurrDrawState.fFlagBits & kClip_StateBit) &&
-        (fClipChanged || fClips.empty())) {
-
-        fClips.push_back() = fClip;
-        fClipChanged = false;
-        return true;
+void GrInOrderDrawBuffer::pushState() {
+    for (int s = 0; s < kNumStages; ++s) {
+        GrSafeRef(fCurrDrawState.fTextures[s]);
+    }
+    this->saveCurrentDrawState(&fStates.push_back());
+ }
+ 
+bool GrInOrderDrawBuffer::needsNewClip() const {
+   if (fCurrDrawState.fFlagBits & kClip_StateBit) {
+       if (fClips.empty() || (fClipSet && fClips.back() != fClip)) {
+           return true;
+       }
     }
     return false;
 }
-
-void GrInOrderDrawBuffer::clipWillChange(const GrClip& clip)  {
-    fClipChanged = true;
+ 
+void GrInOrderDrawBuffer::pushClip() {
+    fClips.push_back() = fClip;
+    fClipSet = false;
 }
-
+ 
+void GrInOrderDrawBuffer::clipWillBeSet(const GrClip& newClip)  {
+    fClipSet = true;
+}
diff --git a/gpu/src/GrTextContext.cpp b/gpu/src/GrTextContext.cpp
index d59f4ce..7341005 100644
--- a/gpu/src/GrTextContext.cpp
+++ b/gpu/src/GrTextContext.cpp
@@ -15,13 +15,14 @@
  */
 
 
-#include "GrAtlas.h"
-#include "GrClipIterator.h"
-#include "GrContext.h"
 #include "GrTextContext.h"
+#include "GrAtlas.h"
+#include "GrContext.h"
 #include "GrTextStrike.h"
 #include "GrTextStrike_impl.h"
 #include "GrFontScaler.h"
+#include "GrIndexBuffer.h"
+#include "GrGpuVertex.h"
 
 static const int TEXT_STAGE = 1;
 
@@ -45,7 +46,7 @@
         GrAssert(fCurrTexture);
         fDrawTarget->setTexture(TEXT_STAGE, fCurrTexture);
         fDrawTarget->setTextureMatrix(TEXT_STAGE, GrMatrix::I());
-        fDrawTarget->setIndexSourceToBuffer(fContext->quadIndexBuffer());
+        fDrawTarget->setIndexSourceToBuffer(fContext->getQuadIndexBuffer());
 
         fDrawTarget->drawIndexed(GrDrawTarget::kTriangles_PrimitiveType,
                                  0, 0, fCurrVertex, nIndices);
@@ -217,11 +218,12 @@
                                        NULL);
         }
 
+        int maxQuadVertices = 4 * fContext->getQuadIndexBuffer()->size() / (6 * sizeof(uint16_t));
         if (fMaxVertices < kMinRequestedVerts) {
             fMaxVertices = kDefaultRequestedVerts;
-        } else if (fMaxVertices > (fContext->maxQuadsInIndexBuffer() * 4)) {
+        } else if (fMaxVertices > maxQuadVertices) {
             // don't exceed the limit of the index buffer
-            fMaxVertices = (fContext->maxQuadsInIndexBuffer() * 4);
+            fMaxVertices = maxQuadVertices;
         }
         bool success = fDrawTarget->reserveAndLockGeometry(fVertexLayout,
                                                            fMaxVertices, 0,