Add support for GL_ANGLE_pack_reverse_row_order

Review URL: http://codereview.appspot.com/5448063/


git-svn-id: http://skia.googlecode.com/svn/trunk@2774 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/gpu/GrGLConfig.h b/include/gpu/GrGLConfig.h
index cfb5141..c9aaec5 100644
--- a/include/gpu/GrGLConfig.h
+++ b/include/gpu/GrGLConfig.h
@@ -81,6 +81,11 @@
  * GR_GL_RGBA_8888_PIXEL_OPS_SLOW: Set this to 1 if it is known that performing
  * glReadPixels / glTex(Sub)Image with format=GL_RGBA, type=GL_UNISIGNED_BYTE is
  * significantly slower than format=GL_BGRA, type=GL_UNISIGNED_BYTE.
+ *
+ * GR_GL_FULL_READPIXELS_FASTER_THAN_PARTIAL: Set this to 1 if calling
+ * glReadPixels to read the entire framebuffer is faster than calling it with
+ * the same sized rectangle but with a framebuffer bound that is larger than
+ * the rectangle read.
  */
 
 #if !defined(GR_GL_LOG_CALLS)
@@ -119,6 +124,10 @@
     #define GR_GL_RGBA_8888_PIXEL_OPS_SLOW      0
 #endif
 
+#if !defined(GR_GL_FULL_READPIXELS_FASTER_THAN_PARTIAL)
+    #define GR_GL_FULL_READPIXELS_FASTER_THAN_PARTIAL 0
+#endif
+
 #if(GR_GL_NO_CONSTANT_ATTRIBUTES) && (GR_GL_ATTRIBUTE_MATRICES)
     #error "Cannot combine GR_GL_NO_CONSTANT_ATTRIBUTES and GR_GL_ATTRIBUTE_MATRICES"
 #endif
diff --git a/include/gpu/GrGLConfig_chrome.h b/include/gpu/GrGLConfig_chrome.h
index 72d330a..ee3c991 100644
--- a/include/gpu/GrGLConfig_chrome.h
+++ b/include/gpu/GrGLConfig_chrome.h
@@ -17,6 +17,9 @@
 // For RGBA teximage/readpixels ANGLE will sw-convert to/from BGRA.
 #define GR_GL_RGBA_8888_PIXEL_OPS_SLOW  GR_WIN32_BUILD
 
+// ANGLE can go faster if the entire fbo is read rather than a subrect
+#define GR_GL_FULL_READPIXELS_FASTER_THAN_PARTIAL GR_WIN32_BUILD
+
 // cmd buffer allocates memory and memsets it to zero when it sees glBufferData
 // with NULL.
 #define GR_GL_USE_BUFFER_DATA_NULL_HINT 0
diff --git a/include/gpu/GrGLDefines.h b/include/gpu/GrGLDefines.h
index 650646d..4dc6278 100644
--- a/include/gpu/GrGLDefines.h
+++ b/include/gpu/GrGLDefines.h
@@ -176,6 +176,7 @@
 #define GR_GL_UNPACK_ALIGNMENT               0x0CF5
 #define GR_GL_UNPACK_FLIP_Y                  0x9240
 #define GR_GL_PACK_ALIGNMENT                 0x0D05
+#define GR_GL_PACK_REVERSE_ROW_ORDER         0x93A4
 #define GR_GL_MAX_TEXTURE_SIZE               0x0D33
 #define GR_GL_MAX_VIEWPORT_DIMS              0x0D3A
 #define GR_GL_SUBPIXEL_BITS                  0x0D50
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 57f9a31..70c9f6d 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1761,7 +1761,20 @@
             { config }
         };
 
-        ast.set(this, desc);
+        // When a full readback is faster than a partial we could always make
+        // the scratch exactly match the passed rect. However, if we see many
+        // different size rectangles we will trash our texture cache and pay the
+        // cost of creating and destroying many textures. So, we only request
+        // an exact match when the caller is reading an entire RT.
+        ScratchTexMatch match = kApprox_ScratchTexMatch;
+        if (0 == left &&
+            0 == top &&
+            target->width() == width &&
+            target->height() == height &&
+            fGpu->fullReadPixelsIsFasterThanPartial()) {
+            match = kExact_ScratchTexMatch;
+        }
+        ast.set(this, desc, match);
         GrTexture* texture = ast.texture();
         if (!texture) {
             return false;
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 6741aec..11bf153 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -190,14 +190,16 @@
      * pre- and unpremultiplied alpha. The caller is free to ignore the result
      * and call readPixels with the original config.
      */
-    virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config) {
+    virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config)
+                                                                        const {
         return config;
     }
 
     /**
      * Same as above but applies to writeTexturePixels
      */
-    virtual GrPixelConfig preferredWritePixelsConfig(GrPixelConfig config) {
+    virtual GrPixelConfig preferredWritePixelsConfig(GrPixelConfig config)
+                                                                        const {
         return config;
     }
 
@@ -222,7 +224,13 @@
                                             int left, int top,
                                             int width, int height,
                                             GrPixelConfig config,
-                                            size_t rowBytes) = 0;
+                                            size_t rowBytes) const = 0;
+     /**
+      * This should return true if reading a NxM rectangle of pixels from a
+      * render target is faster if the target has dimensons N and M and the read
+      * rectangle has its top-left at 0,0.
+      */
+     virtual bool fullReadPixelsIsFasterThanPartial() const { return false; };
 
     /**
      * Reads a rectangle of pixels from a render target. Fails if read requires
diff --git a/src/gpu/GrGpuGL.cpp b/src/gpu/GrGpuGL.cpp
index 9025c67..9f7e4ec 100644
--- a/src/gpu/GrGpuGL.cpp
+++ b/src/gpu/GrGpuGL.cpp
@@ -307,11 +307,14 @@
         fGLCaps.fUnpackRowLengthSupport = true;
         fGLCaps.fUnpackFlipYSupport = false;
         fGLCaps.fPackRowLengthSupport = true;
+        fGLCaps.fPackFlipYSupport = false;
     } else {
-        fGLCaps.fUnpackRowLengthSupport = this->hasExtension("GL_EXT_unpack_subimage");
+        fGLCaps.fUnpackRowLengthSupport =this->hasExtension("GL_EXT_unpack_subimage");
         fGLCaps.fUnpackFlipYSupport = this->hasExtension("GL_CHROMIUM_flipy");
         // no extension for pack row length
         fGLCaps.fPackRowLengthSupport = false;
+        fGLCaps.fPackFlipYSupport =
+            this->hasExtension("GL_ANGLE_pack_reverse_row_order");
     }
 
     if (kDesktop_GrGLBinding == this->glBinding()) {
@@ -440,7 +443,7 @@
     }
 }
 
-GrPixelConfig GrGpuGL::preferredReadPixelsConfig(GrPixelConfig config) {
+GrPixelConfig GrGpuGL::preferredReadPixelsConfig(GrPixelConfig config) const {
     if (GR_GL_RGBA_8888_PIXEL_OPS_SLOW && GrPixelConfigIsRGBA8888(config)) {
         return GrPixelConfigSwapRAndB(config);
     } else {
@@ -448,7 +451,7 @@
     }
 }
 
-GrPixelConfig GrGpuGL::preferredWritePixelsConfig(GrPixelConfig config) {
+GrPixelConfig GrGpuGL::preferredWritePixelsConfig(GrPixelConfig config) const {
     if (GR_GL_RGBA_8888_PIXEL_OPS_SLOW && GrPixelConfigIsRGBA8888(config)) {
         return GrPixelConfigSwapRAndB(config);
     } else {
@@ -456,6 +459,10 @@
     }
 }
 
+bool GrGpuGL::fullReadPixelsIsFasterThanPartial() const {
+    return SkToBool(GR_GL_FULL_READPIXELS_FASTER_THAN_PARTIAL);
+}
+
 void GrGpuGL::onResetContext() {
     if (gPrintStartupSpew && !fPrintedCaps) {
         fPrintedCaps = true;
@@ -541,6 +548,9 @@
     if (this->glCaps().fUnpackFlipYSupport) {
         GL_CALL(PixelStorei(GR_GL_UNPACK_FLIP_Y, GR_GL_FALSE));
     }
+    if (this->glCaps().fPackFlipYSupport) {
+        GL_CALL(PixelStorei(GR_GL_PACK_REVERSE_ROW_ORDER, GR_GL_FALSE));
+    }
 }
 
 GrTexture* GrGpuGL::onCreatePlatformTexture(const GrPlatformTextureDesc& desc) {
@@ -1390,8 +1400,13 @@
                                         int left, int top,
                                         int width, int height,
                                         GrPixelConfig config,
-                                        size_t rowBytes) {
-    // if we have to do memcpy to handle non-trim rowBytes then we
+                                        size_t rowBytes) const {
+    // if GL can do the flip then we'll never pay for it.
+    if (this->glCaps().fPackFlipYSupport) {
+        return false;
+    }
+
+    // If we have to do memcpy to handle non-trim rowBytes then we
     // get the flip for free. Otherwise it costs.
     if (this->glCaps().fPackRowLengthSupport) {
         return true;
@@ -1475,6 +1490,9 @@
             readDst = scratch.get();
         }
     }
+    if (!invertY && this->glCaps().fPackFlipYSupport) {
+        GL_CALL(PixelStorei(GR_GL_PACK_REVERSE_ROW_ORDER, 1));
+    }
     GL_CALL(ReadPixels(readRect.fLeft, readRect.fBottom,
                        readRect.fWidth, readRect.fHeight,
                        format, type, readDst));
@@ -1482,6 +1500,10 @@
         GrAssert(this->glCaps().fPackRowLengthSupport);
         GL_CALL(PixelStorei(GR_GL_PACK_ROW_LENGTH, 0));
     }
+    if (!invertY && this->glCaps().fPackFlipYSupport) {
+        GL_CALL(PixelStorei(GR_GL_PACK_REVERSE_ROW_ORDER, 0));
+        invertY = true;
+    }
 
     // now reverse the order of the rows, since GL's are bottom-to-top, but our
     // API presents top-to-bottom. We must preserve the padding contents. Note
@@ -2430,4 +2452,6 @@
              (fUnpackFlipYSupport ? "YES": "NO"));
     GrPrintf("Pack Row length support: %s\n",
              (fPackRowLengthSupport ? "YES": "NO"));
+    GrPrintf("Pack Flip Y support: %s\n",
+             (fPackFlipYSupport ? "YES": "NO"));
 }
diff --git a/src/gpu/GrGpuGL.h b/src/gpu/GrGpuGL.h
index e3c7203..57a4d97 100644
--- a/src/gpu/GrGpuGL.h
+++ b/src/gpu/GrGpuGL.h
@@ -29,18 +29,18 @@
     GrGLBinding glBinding() const { return fGLBinding; }
     GrGLVersion glVersion() const { return fGLVersion; }
 
+    // GrGpu overrides
     virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config)
-                                                                    SK_OVERRIDE;
+                                                            const SK_OVERRIDE;
     virtual GrPixelConfig preferredWritePixelsConfig(GrPixelConfig config)
-                                                                    SK_OVERRIDE;
-
+                                                            const SK_OVERRIDE;
     virtual bool readPixelsWillPayForYFlip(
                                     GrRenderTarget* renderTarget,
                                     int left, int top,
                                     int width, int height,
                                     GrPixelConfig config,
-                                    size_t rowBytes) SK_OVERRIDE;
-
+                                    size_t rowBytes) const SK_OVERRIDE;
+    virtual bool fullReadPixelsIsFasterThanPartial() const SK_OVERRIDE;
 protected:
     GrGpuGL(const GrGLInterface* glInterface, GrGLBinding glBinding);
 
@@ -56,7 +56,8 @@
             , fTextureSwizzleSupport(false)
             , fUnpackRowLengthSupport(false)
             , fUnpackFlipYSupport(false)
-            , fPackRowLengthSupport(false) {
+            , fPackRowLengthSupport(false)
+            , fPackFlipYSupport(false) {
             memset(fAASamples, 0, sizeof(fAASamples));
         }
         SkTArray<GrGLStencilBuffer::Format, true> fStencilFormats;
@@ -108,7 +109,10 @@
 
         // Is there support for GL_PACK_ROW_LENGTH
         bool fPackRowLengthSupport;
-        
+
+        // Is there support for GL_PACK_REVERSE_ROW_ORDER
+        bool fPackFlipYSupport;
+
         void print() const;
     } fGLCaps;