When a bitmap is texture-backed, change SkBitmap::copyTo() to do a deep
copy of the texels in VRAM rather than a readback and re-upload.  This
gives a 3-10X speedup on recursive canvas-to-canvas draws.

N.B.:  This introduces a new GM test, which will need new baselines.



git-svn-id: http://skia.googlecode.com/svn/trunk@2790 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gm/bitmapcopy.cpp b/gm/bitmapcopy.cpp
new file mode 100644
index 0000000..249ec43
--- /dev/null
+++ b/gm/bitmapcopy.cpp
@@ -0,0 +1,121 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "gm.h"
+
+namespace skiagm {
+
+static const char* gConfigNames[] = {
+    "unknown config",
+    "A1",
+    "A8",
+    "Index8",
+    "565",
+    "4444",
+    "8888"
+};
+
+SkBitmap::Config gConfigs[] = {
+  SkBitmap::kRGB_565_Config,
+  SkBitmap::kARGB_4444_Config,
+  SkBitmap::kARGB_8888_Config,
+};
+
+#define NUM_CONFIGS (sizeof(gConfigs) / sizeof(SkBitmap::Config))
+
+static void draw_checks(SkCanvas* canvas, int width, int height) {
+    SkPaint paint;
+    paint.setColor(SK_ColorRED);
+    canvas->drawRectCoords(0, 0, width / 2, height / 2, paint);
+    paint.setColor(SK_ColorGREEN);
+    canvas->drawRectCoords(width / 2, 0, width, height / 2, paint);
+    paint.setColor(SK_ColorBLUE);
+    canvas->drawRectCoords(0, height / 2, width / 2, height, paint);
+    paint.setColor(SK_ColorYELLOW);
+    canvas->drawRectCoords(width / 2, height / 2, width, height, paint);
+}
+
+class BitmapCopyGM : public GM {
+public:
+    SkBitmap    fDst[NUM_CONFIGS];
+
+    BitmapCopyGM() {
+        this->setBGColor(0xFFDDDDDD);
+    }
+
+protected:
+    virtual SkString onShortName() {
+        return SkString("bitmapcopy");
+    }
+
+    virtual SkISize onISize() {
+        return make_isize(540, 330);
+    }
+
+    virtual void onDraw(SkCanvas* canvas) {
+        SkPaint paint;
+        SkScalar horizMargin(SkIntToScalar(10));
+        SkScalar vertMargin(SkIntToScalar(10));
+
+        draw_checks(canvas, 40, 40);
+        SkBitmap src = canvas->getDevice()->accessBitmap(false);
+
+        for (unsigned i = 0; i < NUM_CONFIGS; ++i) {
+            if (!src.deepCopyTo(&fDst[i], gConfigs[i])) {
+                src.copyTo(&fDst[i], gConfigs[i]);
+            }
+        }
+
+        canvas->clear(0xFFDDDDDD);
+        paint.setAntiAlias(true);
+        SkScalar width = SkIntToScalar(40);
+        SkScalar height = SkIntToScalar(40);
+        if (paint.getFontSpacing() > height) {
+            height = paint.getFontSpacing();
+        }
+        for (unsigned i = 0; i < NUM_CONFIGS; i++) {
+            const char* name = gConfigNames[src.config()];
+            SkScalar textWidth = paint.measureText(name, strlen(name));
+            if (textWidth > width) {
+                width = textWidth;
+            }
+        }
+        SkScalar horizOffset = width + horizMargin;
+        SkScalar vertOffset = height + vertMargin;
+        canvas->translate(SkIntToScalar(20), SkIntToScalar(20));
+
+        for (unsigned i = 0; i < NUM_CONFIGS; i++) {
+            canvas->save();
+            // Draw destination config name
+            const char* name = gConfigNames[fDst[i].config()];
+            SkScalar textWidth = paint.measureText(name, strlen(name));
+            SkScalar x = (width - textWidth) / SkScalar(2);
+            SkScalar y = paint.getFontSpacing() / SkScalar(2);
+            canvas->drawText(name, strlen(name), x, y, paint);
+
+            // Draw destination bitmap
+            canvas->translate(0, vertOffset);
+            x = (width - 40) / SkScalar(2);
+            canvas->drawBitmap(fDst[i], x, 0, &paint);
+            canvas->restore();
+
+            canvas->translate(horizOffset, 0);
+        }
+    }
+
+    virtual uint32_t onGetFlags() const { return kSkipPicture_Flag; }
+
+private:
+    typedef GM INHERITED;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+static GM* MyFactory(void*) { return new BitmapCopyGM; }
+static GMRegistry reg(MyFactory);
+
+}
diff --git a/gyp/gmslides.gypi b/gyp/gmslides.gypi
index 4c8f4ab..0aaef36 100644
--- a/gyp/gmslides.gypi
+++ b/gyp/gmslides.gypi
@@ -2,6 +2,7 @@
 {
   'sources': [
     '../gm/aarectmodes.cpp',
+    '../gm/bitmapcopy.cpp',
     '../gm/bitmapfilters.cpp',
     '../gm/bitmapscroll.cpp',
     '../gm/blurs.cpp',
diff --git a/include/core/SkBitmap.h b/include/core/SkBitmap.h
index ae56739..f280a24 100644
--- a/include/core/SkBitmap.h
+++ b/include/core/SkBitmap.h
@@ -462,19 +462,29 @@
     */
     bool extractSubset(SkBitmap* dst, const SkIRect& subset) const;
 
-    /** Makes a deep copy of this bitmap, respecting the requested config.
-        Returns false if either there is an error (i.e. the src does not have
-        pixels) or the request cannot be satisfied (e.g. the src has per-pixel
-        alpha, and the requested config does not support alpha).
-        @param dst The bitmap to be sized and allocated
-        @param c The desired config for dst
-        @param allocator Allocator used to allocate the pixelref for the dst
-                         bitmap. If this is null, the standard HeapAllocator
-                         will be used.
-        @return true if the copy could be made.
-    */
+    /** Makes a deep copy of this bitmap, respecting the requested config,
+     *  and allocating the dst pixels on the cpu.
+     *  Returns false if either there is an error (i.e. the src does not have
+     *  pixels) or the request cannot be satisfied (e.g. the src has per-pixel
+     *  alpha, and the requested config does not support alpha).
+     *  @param dst The bitmap to be sized and allocated
+     *  @param c The desired config for dst
+     *  @param allocator Allocator used to allocate the pixelref for the dst
+     *                   bitmap. If this is null, the standard HeapAllocator
+     *                   will be used.
+     *  @return true if the copy could be made.
+     */
     bool copyTo(SkBitmap* dst, Config c, Allocator* allocator = NULL) const;
 
+    /** Makes a deep copy of this bitmap, respecting the requested config, and
+     *  with custom allocation logic that will keep the copied pixels
+     *  in the same domain as the source: If the src pixels are allocated for
+     *  the cpu, then so will the dst. If the src pixels are allocated on the
+     *  gpu (typically as a texture), the it will do the same for the dst.
+     *  If the request cannot be fulfilled, returns false and dst is unmodified.
+     */
+    bool deepCopyTo(SkBitmap* dst, Config c) const;
+
     /** Returns true if this bitmap can be deep copied into the requested config
         by calling copyTo().
      */
diff --git a/include/core/SkPixelRef.h b/include/core/SkPixelRef.h
index 7e21a43..374868e 100644
--- a/include/core/SkPixelRef.h
+++ b/include/core/SkPixelRef.h
@@ -10,10 +10,10 @@
 #ifndef SkPixelRef_DEFINED
 #define SkPixelRef_DEFINED
 
+#include "SkBitmap.h"
 #include "SkRefCnt.h"
 #include "SkString.h"
 
-class SkBitmap;
 class SkColorTable;
 struct SkIRect;
 class SkMutex;
@@ -117,6 +117,12 @@
 
     bool readPixels(SkBitmap* dst, const SkIRect* subset = NULL);
 
+    /** Makes a deep copy of this PixelRef, respecting the requested config.
+        Returns NULL if either there is an error (e.g. the destination could
+        not be created with the given config), or this PixelRef does not 
+        support deep copies.  */
+    virtual SkPixelRef* deepCopy(SkBitmap::Config config) { return NULL; }
+
     // serialization
 
     typedef SkPixelRef* (*Factory)(SkFlattenableReadBuffer&);
diff --git a/include/gpu/GrContext.h b/include/gpu/GrContext.h
index bc52159..5b48c0b 100644
--- a/include/gpu/GrContext.h
+++ b/include/gpu/GrContext.h
@@ -548,6 +548,12 @@
                                          config, buffer, rowBytes, 0);
     }
     /**
+     * Copies all texels from one texture to another.
+     * @param src           the texture to copy from.
+     * @param dst           the render target to copy to.
+     */
+    void copyTexture(GrTexture* src, GrRenderTarget* dst);
+    /**
      * Applies a 1D convolution kernel in the X direction to a rectangle of
      * pixels from a given texture.
      * @param texture         the texture to read from
diff --git a/include/gpu/SkGrTexturePixelRef.h b/include/gpu/SkGrTexturePixelRef.h
index 1df10cb..720f130 100644
--- a/include/gpu/SkGrTexturePixelRef.h
+++ b/include/gpu/SkGrTexturePixelRef.h
@@ -52,6 +52,9 @@
     // override from SkPixelRef
     virtual bool onReadPixels(SkBitmap* dst, const SkIRect* subset);
 
+    // override from SkPixelRef
+    virtual SkPixelRef* deepCopy(SkBitmap::Config dstConfig) SK_OVERRIDE;
+
 private:
     GrTexture*  fTexture;
     typedef SkROLockPixelsPixelRef INHERITED;
@@ -72,6 +75,9 @@
     // override from SkPixelRef
     virtual bool onReadPixels(SkBitmap* dst, const SkIRect* subset);
 
+    // override from SkPixelRef
+    virtual SkPixelRef* deepCopy(SkBitmap::Config dstConfig) SK_OVERRIDE;
+
 private:
     GrRenderTarget*  fRenderTarget;
     typedef SkROLockPixelsPixelRef INHERITED;
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index 760bab7..24a1839 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -938,6 +938,29 @@
     return true;
 }
 
+bool SkBitmap::deepCopyTo(SkBitmap* dst, Config dstConfig) const {
+    if (!this->canCopyTo(dstConfig)) {
+        return false;
+    }
+
+    // If we have a PixelRef, and it supports deep copy, use it.
+    // Currently supported only by texture-backed bitmaps.
+    if (fPixelRef) {
+        SkPixelRef* pixelRef = fPixelRef->deepCopy(dstConfig);
+        if (pixelRef) {
+            dst->setConfig(dstConfig, fWidth, fHeight);
+            dst->setPixelRef(pixelRef)->unref();
+            return true;
+        }
+    }
+
+    if (this->getTexture()) {
+        return false;
+    } else {
+        return this->copyTo(dst, dstConfig, NULL);
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 70c9f6d..e4b1150 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1813,6 +1813,27 @@
                             config, buffer, rowBytes, flipY);
 }
 
+void GrContext::copyTexture(GrTexture* src, GrRenderTarget* dst) {
+    if (NULL == src || NULL == dst) {
+        return;
+    }
+    ASSERT_OWNED_RESOURCE(src);
+
+    GrDrawTarget::AutoStateRestore asr(fGpu);
+    reset_target_state(fGpu);
+    fGpu->setRenderTarget(dst);
+    GrSamplerState sampler(GrSamplerState::kClamp_WrapMode, 
+                           GrSamplerState::kClamp_WrapMode,
+                           GrSamplerState::kNearest_Filter);
+    GrMatrix sampleM;
+    sampleM.setIDiv(src->width(), src->height());
+    sampler.setMatrix(sampleM);
+    fGpu->setTexture(0, src);
+    fGpu->setSamplerState(0, sampler);
+    SkRect rect = SkRect::MakeXYWH(0, 0, src->width(), src->height());
+    fGpu->drawSimpleRect(rect, NULL, 1 << 0);
+}
+
 void GrContext::internalWriteRenderTargetPixels(GrRenderTarget* target, 
                                                 int left, int top,
                                                 int width, int height,
diff --git a/src/gpu/SkGrTexturePixelRef.cpp b/src/gpu/SkGrTexturePixelRef.cpp
index 8d0eb89..045ddab 100644
--- a/src/gpu/SkGrTexturePixelRef.cpp
+++ b/src/gpu/SkGrTexturePixelRef.cpp
@@ -9,7 +9,9 @@
 
 
 #include "SkGrTexturePixelRef.h"
+#include "GrContext.h"
 #include "GrTexture.h"
+#include "SkGr.h"
 #include "SkRect.h"
 
 // since we call lockPixels recursively on fBitmap, we need a distinct mutex,
@@ -46,6 +48,36 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+static SkGrTexturePixelRef* copyToTexturePixelRef(GrTexture* texture,
+                                                  SkBitmap::Config dstConfig) {
+    if (NULL == texture) {
+        return NULL;
+    }
+    GrContext* context = texture->getContext();
+    if (NULL == context) {
+        return NULL;
+    }
+    GrTextureDesc desc;
+
+    desc.fWidth  = texture->width();
+    desc.fHeight = texture->height();
+    desc.fFlags = kRenderTarget_GrTextureFlagBit | kNoStencil_GrTextureFlagBit;
+    desc.fConfig = SkGr::BitmapConfig2PixelConfig(dstConfig, false);
+    desc.fAALevel = kNone_GrAALevel;
+
+    GrTexture* dst = context->createUncachedTexture(desc, NULL, 0);
+    if (NULL == dst) {
+        return NULL;
+    }
+
+    context->copyTexture(texture, dst->asRenderTarget());
+    SkGrTexturePixelRef* pixelRef = new SkGrTexturePixelRef(dst);
+    GrSafeUnref(dst);
+    return pixelRef;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
 SkGrTexturePixelRef::SkGrTexturePixelRef(GrTexture* tex) {
     fTexture = tex;
     GrSafeRef(tex);
@@ -59,6 +91,10 @@
     return (SkGpuTexture*)fTexture;
 }
 
+SkPixelRef* SkGrTexturePixelRef::deepCopy(SkBitmap::Config dstConfig) {
+    return copyToTexturePixelRef(fTexture, dstConfig);
+}
+
 bool SkGrTexturePixelRef::onReadPixels(SkBitmap* dst, const SkIRect* subset) {
     if (NULL != fTexture && fTexture->isValid()) {
         int left, top, width, height;
@@ -103,6 +139,19 @@
     return NULL;
 }
 
+SkPixelRef* SkGrRenderTargetPixelRef::deepCopy(SkBitmap::Config dstConfig) {
+    if (NULL == fRenderTarget) {
+        return NULL;
+    }
+    // Note that when copying an SkGrRenderTargetPixelRef, we actually 
+    // return an SkGrTexturePixelRef instead.  This is because
+    // SkGrRenderTargetPixelRef is usually created in conjunction with
+    // GrTexture owned elsewhere (e.g., SkGpuDevice), and cannot live
+    // independently of that texture.  SkGrTexturePixelRef, on the other
+    // hand, owns its own GrTexture, and is thus self-contained.
+    return copyToTexturePixelRef(fRenderTarget->asTexture(), dstConfig);
+}
+
 bool SkGrRenderTargetPixelRef::onReadPixels(SkBitmap* dst, const SkIRect* subset) {
     if (NULL != fRenderTarget && fRenderTarget->isValid()) {
         int left, top, width, height;