When a bitmap is texture-backed, change SkBitmap::copyTo() to do a deep
copy of the texels in VRAM rather than a readback and re-upload.  This
gives a 3-10X speedup on recursive canvas-to-canvas draws.

N.B.:  This introduces a new GM test, which will need new baselines.



git-svn-id: http://skia.googlecode.com/svn/trunk@2790 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index 760bab7..24a1839 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -938,6 +938,29 @@
     return true;
 }
 
+bool SkBitmap::deepCopyTo(SkBitmap* dst, Config dstConfig) const {
+    if (!this->canCopyTo(dstConfig)) {
+        return false;
+    }
+
+    // If we have a PixelRef, and it supports deep copy, use it.
+    // Currently supported only by texture-backed bitmaps.
+    if (fPixelRef) {
+        SkPixelRef* pixelRef = fPixelRef->deepCopy(dstConfig);
+        if (pixelRef) {
+            dst->setConfig(dstConfig, fWidth, fHeight);
+            dst->setPixelRef(pixelRef)->unref();
+            return true;
+        }
+    }
+
+    if (this->getTexture()) {
+        return false;
+    } else {
+        return this->copyTo(dst, dstConfig, NULL);
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 70c9f6d..e4b1150 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1813,6 +1813,27 @@
                             config, buffer, rowBytes, flipY);
 }
 
+void GrContext::copyTexture(GrTexture* src, GrRenderTarget* dst) {
+    if (NULL == src || NULL == dst) {
+        return;
+    }
+    ASSERT_OWNED_RESOURCE(src);
+
+    GrDrawTarget::AutoStateRestore asr(fGpu);
+    reset_target_state(fGpu);
+    fGpu->setRenderTarget(dst);
+    GrSamplerState sampler(GrSamplerState::kClamp_WrapMode, 
+                           GrSamplerState::kClamp_WrapMode,
+                           GrSamplerState::kNearest_Filter);
+    GrMatrix sampleM;
+    sampleM.setIDiv(src->width(), src->height());
+    sampler.setMatrix(sampleM);
+    fGpu->setTexture(0, src);
+    fGpu->setSamplerState(0, sampler);
+    SkRect rect = SkRect::MakeXYWH(0, 0, src->width(), src->height());
+    fGpu->drawSimpleRect(rect, NULL, 1 << 0);
+}
+
 void GrContext::internalWriteRenderTargetPixels(GrRenderTarget* target, 
                                                 int left, int top,
                                                 int width, int height,
diff --git a/src/gpu/SkGrTexturePixelRef.cpp b/src/gpu/SkGrTexturePixelRef.cpp
index 8d0eb89..045ddab 100644
--- a/src/gpu/SkGrTexturePixelRef.cpp
+++ b/src/gpu/SkGrTexturePixelRef.cpp
@@ -9,7 +9,9 @@
 
 
 #include "SkGrTexturePixelRef.h"
+#include "GrContext.h"
 #include "GrTexture.h"
+#include "SkGr.h"
 #include "SkRect.h"
 
 // since we call lockPixels recursively on fBitmap, we need a distinct mutex,
@@ -46,6 +48,36 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+static SkGrTexturePixelRef* copyToTexturePixelRef(GrTexture* texture,
+                                                  SkBitmap::Config dstConfig) {
+    if (NULL == texture) {
+        return NULL;
+    }
+    GrContext* context = texture->getContext();
+    if (NULL == context) {
+        return NULL;
+    }
+    GrTextureDesc desc;
+
+    desc.fWidth  = texture->width();
+    desc.fHeight = texture->height();
+    desc.fFlags = kRenderTarget_GrTextureFlagBit | kNoStencil_GrTextureFlagBit;
+    desc.fConfig = SkGr::BitmapConfig2PixelConfig(dstConfig, false);
+    desc.fAALevel = kNone_GrAALevel;
+
+    GrTexture* dst = context->createUncachedTexture(desc, NULL, 0);
+    if (NULL == dst) {
+        return NULL;
+    }
+
+    context->copyTexture(texture, dst->asRenderTarget());
+    SkGrTexturePixelRef* pixelRef = new SkGrTexturePixelRef(dst);
+    GrSafeUnref(dst);
+    return pixelRef;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
 SkGrTexturePixelRef::SkGrTexturePixelRef(GrTexture* tex) {
     fTexture = tex;
     GrSafeRef(tex);
@@ -59,6 +91,10 @@
     return (SkGpuTexture*)fTexture;
 }
 
+SkPixelRef* SkGrTexturePixelRef::deepCopy(SkBitmap::Config dstConfig) {
+    return copyToTexturePixelRef(fTexture, dstConfig);
+}
+
 bool SkGrTexturePixelRef::onReadPixels(SkBitmap* dst, const SkIRect* subset) {
     if (NULL != fTexture && fTexture->isValid()) {
         int left, top, width, height;
@@ -103,6 +139,19 @@
     return NULL;
 }
 
+SkPixelRef* SkGrRenderTargetPixelRef::deepCopy(SkBitmap::Config dstConfig) {
+    if (NULL == fRenderTarget) {
+        return NULL;
+    }
+    // Note that when copying an SkGrRenderTargetPixelRef, we actually 
+    // return an SkGrTexturePixelRef instead.  This is because
+    // SkGrRenderTargetPixelRef is usually created in conjunction with
+    // GrTexture owned elsewhere (e.g., SkGpuDevice), and cannot live
+    // independently of that texture.  SkGrTexturePixelRef, on the other
+    // hand, owns its own GrTexture, and is thus self-contained.
+    return copyToTexturePixelRef(fRenderTarget->asTexture(), dstConfig);
+}
+
 bool SkGrRenderTargetPixelRef::onReadPixels(SkBitmap* dst, const SkIRect* subset) {
     if (NULL != fRenderTarget && fRenderTarget->isValid()) {
         int left, top, width, height;