diff --git a/include/gpu/GrRenderTarget.h b/include/gpu/GrRenderTarget.h
index 19a37a5..fcb4c3d 100644
--- a/include/gpu/GrRenderTarget.h
+++ b/include/gpu/GrRenderTarget.h
@@ -141,9 +141,8 @@
     GrRenderTarget(GrGpu* gpu,
                    bool isWrapped,
                    GrTexture* texture,
-                   const GrTextureDesc& desc,
-                   GrSurfaceOrigin origin)
-        : INHERITED(gpu, isWrapped, desc, origin)
+                   const GrTextureDesc& desc)
+        : INHERITED(gpu, isWrapped, desc)
         , fStencilBuffer(NULL)
         , fTexture(texture) {
         fResolveRect.setLargestInverted();
diff --git a/include/gpu/GrSurface.h b/include/gpu/GrSurface.h
index 3429cc6..02fc0d5 100644
--- a/include/gpu/GrSurface.h
+++ b/include/gpu/GrSurface.h
@@ -34,8 +34,8 @@
     int height() const { return fDesc.fHeight; }
 
     GrSurfaceOrigin origin() const {
-        GrAssert(kTopLeft_GrSurfaceOrigin == fOrigin || kBottomLeft_GrSurfaceOrigin == fOrigin);
-        return fOrigin;
+        GrAssert(kTopLeft_GrSurfaceOrigin == fDesc.fOrigin || kBottomLeft_GrSurfaceOrigin == fDesc.fOrigin);
+        return fDesc.fOrigin;
     }
 
     /**
@@ -104,17 +104,14 @@
                              uint32_t pixelOpsFlags = 0) = 0;
 
 protected:
-    GrSurface(GrGpu* gpu, bool isWrapped, const GrTextureDesc& desc, GrSurfaceOrigin origin)
+    GrSurface(GrGpu* gpu, bool isWrapped, const GrTextureDesc& desc)
     : INHERITED(gpu, isWrapped)
-    , fDesc(desc)
-    , fOrigin(origin) {
+    , fDesc(desc) {
     }
 
     GrTextureDesc fDesc;
 
 private:
-    GrSurfaceOrigin fOrigin;
-
     typedef GrResource INHERITED;
 };
 
diff --git a/include/gpu/GrTexture.h b/include/gpu/GrTexture.h
index 94d5788..c088bdd 100644
--- a/include/gpu/GrTexture.h
+++ b/include/gpu/GrTexture.h
@@ -140,8 +140,8 @@
                                    // base class cons sets to NULL
                                    // subclass cons can create and set
 
-    GrTexture(GrGpu* gpu, bool isWrapped, const GrTextureDesc& desc, GrSurfaceOrigin origin)
-    : INHERITED(gpu, isWrapped, desc, origin)
+    GrTexture(GrGpu* gpu, bool isWrapped, const GrTextureDesc& desc)
+    : INHERITED(gpu, isWrapped, desc)
     , fRenderTarget(NULL) {
 
         // only make sense if alloc size is pow2
diff --git a/include/gpu/GrTypes.h b/include/gpu/GrTypes.h
index 055750d..88c5771 100644
--- a/include/gpu/GrTypes.h
+++ b/include/gpu/GrTypes.h
@@ -428,13 +428,14 @@
 /**
  * Some textures will be stored such that the upper and left edges of the content meet at the
  * the origin (in texture coord space) and for other textures the lower and left edges meet at
- * the origin. Render-targets are always consistent with the convention of the underlying
- * backend API to make it easier to mix native backend rendering with Skia rendering.
+ * the origin. kDefault_GrSurfaceOrigin sets textures to TopLeft, and render targets
+ * to BottomLeft.
  */
 
 enum GrSurfaceOrigin {
-    kBottomLeft_GrSurfaceOrigin,
+    kDefault_GrSurfaceOrigin,
     kTopLeft_GrSurfaceOrigin,
+    kBottomLeft_GrSurfaceOrigin,
 };
 
 /**
@@ -443,6 +444,7 @@
 struct GrTextureDesc {
     GrTextureDesc()
     : fFlags(kNone_GrTextureFlags)
+    , fOrigin(kDefault_GrSurfaceOrigin)
     , fWidth(0)
     , fHeight(0)
     , fConfig(kUnknown_GrPixelConfig)
@@ -450,6 +452,7 @@
     }
 
     GrTextureFlags         fFlags;  //!< bitfield of TextureFlags
+    GrSurfaceOrigin        fOrigin; //!< origin of the texture
     int                    fWidth;  //!< Width of the texture
     int                    fHeight; //!< Height of the texture
 
@@ -640,6 +643,7 @@
     int                             fWidth;         //<! width in pixels
     int                             fHeight;        //<! height in pixels
     GrPixelConfig                   fConfig;        //<! color format
+    GrSurfaceOrigin                 fOrigin;        //<! pixel origin
     /**
      * The number of samples per pixel. Gr uses this to influence decisions
      * about applying other forms of anti-aliasing.
diff --git a/samplecode/SampleApp.cpp b/samplecode/SampleApp.cpp
index dbc779f..7657813 100644
--- a/samplecode/SampleApp.cpp
+++ b/samplecode/SampleApp.cpp
@@ -295,6 +295,7 @@
             desc.fWidth = SkScalarRound(win->width());
             desc.fHeight = SkScalarRound(win->height());
             desc.fConfig = kSkia8888_PM_GrPixelConfig;
+            desc.fOrigin = kBottomLeft_GrSurfaceOrigin;
             GR_GL_GetIntegerv(fCurIntf, GR_GL_SAMPLES, &desc.fSampleCnt);
             GR_GL_GetIntegerv(fCurIntf, GR_GL_STENCIL_BITS, &desc.fStencilBits);
             GrGLint buffer;
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index b729d69..52e2003 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1328,10 +1328,6 @@
 
     bool unpremul = SkToBool(kUnpremul_PixelOpsFlag & flags);
 
-    // flipY will get set to false when it is handled below using a scratch. However, in that case
-    // we still want to do the read upside down.
-    bool readUpsideDown = flipY;
-
     if (unpremul && kRGBA_8888_GrPixelConfig != config && kBGRA_8888_GrPixelConfig != config) {
         // The unpremul flag is only allowed for these two configs.
         return false;
@@ -1359,6 +1355,7 @@
         desc.fWidth = width;
         desc.fHeight = height;
         desc.fConfig = readConfig;
+        desc.fOrigin = kTopLeft_GrSurfaceOrigin;
 
         // When a full readback is faster than a partial we could always make the scratch exactly
         // match the passed rect. However, if we see many different size rectangles we will trash
@@ -1377,13 +1374,7 @@
         if (texture) {
             // compute a matrix to perform the draw
             SkMatrix textureMatrix;
-            if (flipY) {
-                textureMatrix.setTranslate(SK_Scalar1 * left,
-                                    SK_Scalar1 * (top + height));
-                textureMatrix.set(SkMatrix::kMScaleY, -SK_Scalar1);
-            } else {
-                textureMatrix.setTranslate(SK_Scalar1 *left, SK_Scalar1 *top);
-            }
+            textureMatrix.setTranslate(SK_Scalar1 *left, SK_Scalar1 *top);
             textureMatrix.postIDiv(src->width(), src->height());
 
             SkAutoTUnref<const GrEffectRef> effect;
@@ -1404,7 +1395,6 @@
                                                     textureMatrix));
                 }
                 swapRAndB = false; // we will handle the swap in the draw.
-                flipY = false; // we already incorporated the y flip in the matrix
 
                 GrDrawTarget::AutoStateRestore asr(fGpu, GrDrawTarget::kReset_ASRInit);
                 GrDrawState* drawState = fGpu->drawState();
@@ -1423,11 +1413,11 @@
     }
     if (!fGpu->readPixels(target,
                           left, top, width, height,
-                          readConfig, buffer, rowBytes, readUpsideDown)) {
+                          readConfig, buffer, rowBytes)) {
         return false;
     }
     // Perform any conversions we weren't able to perform using a scratch texture.
-    if (unpremul || swapRAndB || flipY) {
+    if (unpremul || swapRAndB) {
         // These are initialized to suppress a warning
         SkCanvas::Config8888 srcC8888 = SkCanvas::kNative_Premul_Config8888;
         SkCanvas::Config8888 dstC8888 = SkCanvas::kNative_Premul_Config8888;
@@ -1439,47 +1429,11 @@
             GrAssert(c8888IsValid); // we should only do r/b swap on 8888 configs
             srcC8888 = swap_config8888_red_and_blue(srcC8888);
         }
-        if (flipY) {
-            size_t tightRB = width * GrBytesPerPixel(config);
-            if (0 == rowBytes) {
-                rowBytes = tightRB;
-            }
-            SkAutoSTMalloc<256, uint8_t> tempRow(tightRB);
-            intptr_t top = reinterpret_cast<intptr_t>(buffer);
-            intptr_t bot = top + (height - 1) * rowBytes;
-            while (top < bot) {
-                uint32_t* t = reinterpret_cast<uint32_t*>(top);
-                uint32_t* b = reinterpret_cast<uint32_t*>(bot);
-                uint32_t* temp = reinterpret_cast<uint32_t*>(tempRow.get());
-                memcpy(temp, t, tightRB);
-                if (c8888IsValid) {
-                    SkConvertConfig8888Pixels(t, tightRB, dstC8888,
-                                              b, tightRB, srcC8888,
-                                              width, 1);
-                    SkConvertConfig8888Pixels(b, tightRB, dstC8888,
-                                              temp, tightRB, srcC8888,
-                                              width, 1);
-                } else {
-                    memcpy(t, b, tightRB);
-                    memcpy(b, temp, tightRB);
-                }
-                top += rowBytes;
-                bot -= rowBytes;
-            }
-            // The above loop does nothing on the middle row when height is odd.
-            if (top == bot && c8888IsValid && dstC8888 != srcC8888) {
-                uint32_t* mid = reinterpret_cast<uint32_t*>(top);
-                SkConvertConfig8888Pixels(mid, tightRB, dstC8888, mid, tightRB, srcC8888, width, 1);
-            }
-        } else {
-            // if we aren't flipping Y then we have no reason to be here other than doing
-            // conversions for 8888 (r/b swap or upm).
-            GrAssert(c8888IsValid);
-            uint32_t* b32 = reinterpret_cast<uint32_t*>(buffer);
-            SkConvertConfig8888Pixels(b32, rowBytes, dstC8888,
-                                      b32, rowBytes, srcC8888,
-                                      width, height);
-        }
+        GrAssert(c8888IsValid);
+        uint32_t* b32 = reinterpret_cast<uint32_t*>(buffer);
+        SkConvertConfig8888Pixels(b32, rowBytes, dstC8888,
+                                  b32, rowBytes, srcC8888,
+                                  width, height);
     }
     return true;
 }
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 3da8219..47d4069 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -231,10 +231,10 @@
 bool GrGpu::readPixels(GrRenderTarget* target,
                        int left, int top, int width, int height,
                        GrPixelConfig config, void* buffer,
-                       size_t rowBytes, bool invertY) {
+                       size_t rowBytes) {
     this->handleDirtyContext();
     return this->onReadPixels(target, left, top, width, height,
-                              config, buffer, rowBytes, invertY);
+                              config, buffer, rowBytes);
 }
 
 void GrGpu::writeTexturePixels(GrTexture* texture,
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index bcda257..85cbd3f 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -234,8 +234,7 @@
      */
     bool readPixels(GrRenderTarget* renderTarget,
                     int left, int top, int width, int height,
-                    GrPixelConfig config, void* buffer, size_t rowBytes,
-                    bool invertY);
+                    GrPixelConfig config, void* buffer, size_t rowBytes);
 
     /**
      * Updates the pixels in a rectangle of a texture.
@@ -473,8 +472,7 @@
                               int left, int top, int width, int height,
                               GrPixelConfig,
                               void* buffer,
-                              size_t rowBytes,
-                              bool invertY) = 0;
+                              size_t rowBytes) = 0;
 
     // overridden by backend-specific derived class to perform the texture update
     virtual void onWriteTexturePixels(GrTexture* texture,
diff --git a/src/gpu/GrTexture.cpp b/src/gpu/GrTexture.cpp
index 614d771..4dea2a6 100644
--- a/src/gpu/GrTexture.cpp
+++ b/src/gpu/GrTexture.cpp
@@ -165,14 +165,16 @@
     GrCacheID::Key idKey;
     // Instead of a client-provided key of the texture contents we create a key from the
     // descriptor.
-    GR_STATIC_ASSERT(sizeof(idKey) >= 12);
+    GR_STATIC_ASSERT(sizeof(idKey) >= 16);
     GrAssert(desc.fHeight < (1 << 16));
     GrAssert(desc.fWidth < (1 << 16));
     idKey.fData32[0] = (desc.fWidth) | (desc.fHeight << 16);
     idKey.fData32[1] = desc.fConfig | desc.fSampleCnt << 16;
     idKey.fData32[2] = desc.fFlags;
-    static const int kPadSize = sizeof(idKey) - 12;
-    memset(idKey.fData8 + 12, 0, kPadSize);
+    idKey.fData32[3] = desc.fOrigin;    // Only needs 2 bits actually
+    static const int kPadSize = sizeof(idKey) - 16;
+    GR_STATIC_ASSERT(kPadSize >= 0);
+    memset(idKey.fData8 + 16, 0, kPadSize);
 
     GrCacheID cacheID(GrResourceKey::ScratchDomain(), idKey);
     return GrResourceKey(cacheID, texture_resource_type(), 0);
diff --git a/src/gpu/gl/GrGLIRect.h b/src/gpu/gl/GrGLIRect.h
index 038520d..cbc4cb8 100644
--- a/src/gpu/gl/GrGLIRect.h
+++ b/src/gpu/gl/GrGLIRect.h
@@ -38,15 +38,20 @@
 
     // sometimes we have a GrIRect from the client that we
     // want to simultaneously make relative to GL's viewport
-    // and convert from top-down to bottom-up.
+    // and (optionally) convert from top-down to bottom-up.
     void setRelativeTo(const GrGLIRect& glRect,
                        int leftOffset,
                        int topOffset,
                        int width,
-                       int height) {
+                       int height,
+                       GrSurfaceOrigin origin) {
         fLeft = glRect.fLeft + leftOffset;
         fWidth = width;
-        fBottom = glRect.fBottom + (glRect.fHeight - topOffset - height);
+        if (kBottomLeft_GrSurfaceOrigin == origin) {
+            fBottom = glRect.fBottom + (glRect.fHeight - topOffset - height);
+        } else {
+            fBottom = glRect.fBottom + topOffset;
+        }
         fHeight = height;
 
         GrAssert(fLeft >= 0);
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 2aa7236..a4f0df2 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -64,6 +64,7 @@
 
     fViewMatrix = SkMatrix::InvalidMatrix();
     fViewportSize.set(-1, -1);
+    fOrigin = (GrSurfaceOrigin) -1;
     fColor = GrColor_ILLEGAL;
     fColorFilterColor = GrColor_ILLEGAL;
     fRTHeight = -1;
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index 513bf75..8b494c8 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -199,10 +199,11 @@
     GrGLuint                    fGShaderID;
     GrGLuint                    fFShaderID;
     GrGLuint                    fProgramID;
-    // The matrix sent to GL is determined by both the client's matrix and
-    // the size of the viewport.
+    // The matrix sent to GL is determined by the client's matrix,
+    // the size of the viewport, and the origin of the render target.
     SkMatrix                    fViewMatrix;
     SkISize                     fViewportSize;
+    GrSurfaceOrigin             fOrigin;
 
     // these reflect the current values of uniforms (GL uniform values travel with program)
     GrColor                     fColor;
diff --git a/src/gpu/gl/GrGLRenderTarget.cpp b/src/gpu/gl/GrGLRenderTarget.cpp
index 47128e7..69d7b9c 100644
--- a/src/gpu/gl/GrGLRenderTarget.cpp
+++ b/src/gpu/gl/GrGLRenderTarget.cpp
@@ -27,13 +27,15 @@
 namespace {
 GrTextureDesc MakeDesc(GrTextureFlags flags,
                        int width, int height,
-                       GrPixelConfig config, int sampleCnt) {
+                       GrPixelConfig config, int sampleCnt,
+                       GrSurfaceOrigin origin) {
     GrTextureDesc temp;
     temp.fFlags = flags;
     temp.fWidth = width;
     temp.fHeight = height;
     temp.fConfig = config;
     temp.fSampleCnt = sampleCnt;
+    temp.fOrigin = origin;
     return temp;
 }
 
@@ -49,9 +51,8 @@
                 texture,
                 MakeDesc(kNone_GrTextureFlags,
                          viewport.fWidth, viewport.fHeight,
-                         desc.fConfig, desc.fSampleCnt),
-                texture->origin()) {
-    GrAssert(kBottomLeft_GrSurfaceOrigin == texture->origin());
+                         desc.fConfig, desc.fSampleCnt,
+                         desc.fOrigin)) {
     GrAssert(NULL != texID);
     GrAssert(NULL != texture);
     // FBO 0 can't also be a texture, right?
@@ -73,8 +74,8 @@
                 NULL,
                 MakeDesc(kNone_GrTextureFlags,
                          viewport.fWidth, viewport.fHeight,
-                         desc.fConfig, desc.fSampleCnt),
-                kBottomLeft_GrSurfaceOrigin) {
+                         desc.fConfig, desc.fSampleCnt,
+                         desc.fOrigin)) {
     this->init(desc, viewport, NULL);
 }
 
diff --git a/src/gpu/gl/GrGLRenderTarget.h b/src/gpu/gl/GrGLRenderTarget.h
index 9a39ca1..ea6ae87 100644
--- a/src/gpu/gl/GrGLRenderTarget.h
+++ b/src/gpu/gl/GrGLRenderTarget.h
@@ -25,12 +25,13 @@
     enum { kUnresolvableFBOID = 0 };
 
     struct Desc {
-        GrGLuint      fRTFBOID;
-        GrGLuint      fTexFBOID;
-        GrGLuint      fMSColorRenderbufferID;
-        bool          fIsWrapped;
-        GrPixelConfig fConfig;
-        int           fSampleCnt;
+        GrGLuint         fRTFBOID;
+        GrGLuint         fTexFBOID;
+        GrGLuint         fMSColorRenderbufferID;
+        bool             fIsWrapped;
+        GrPixelConfig    fConfig;
+        int              fSampleCnt;
+        GrSurfaceOrigin  fOrigin;
     };
 
     // creates a GrGLRenderTarget associated with a texture
diff --git a/src/gpu/gl/GrGLShaderBuilder.cpp b/src/gpu/gl/GrGLShaderBuilder.cpp
index a8514ad..1618fe5 100644
--- a/src/gpu/gl/GrGLShaderBuilder.cpp
+++ b/src/gpu/gl/GrGLShaderBuilder.cpp
@@ -260,6 +260,7 @@
 }
 
 const char* GrGLShaderBuilder::fragmentPosition() {
+#if 1
     if (fContext.caps().fragCoordConventionsSupport()) {
         if (!fSetupFragPosition) {
             fFSHeader.append("#extension GL_ARB_fragment_coord_conventions: require\n");
@@ -294,6 +295,18 @@
         GrAssert(GrGLUniformManager::kInvalidUniformHandle != fRTHeightUniform);
         return kCoordName;
     }
+#else
+    // This is the path we'll need to use once we have support for TopLeft
+    // render targets.
+    if (!fSetupFragPosition) {
+        fFSInputs.push_back().set(kVec4f_GrSLType,
+                                  GrGLShaderVar::kIn_TypeModifier,
+                                  "gl_FragCoord",
+                                  GrGLShaderVar::kDefault_Precision);
+        fSetupFragPosition = true;
+    }
+    return "gl_FragCoord";
+#endif
 }
 
 
diff --git a/src/gpu/gl/GrGLTexture.cpp b/src/gpu/gl/GrGLTexture.cpp
index f798b31..66d6371 100644
--- a/src/gpu/gl/GrGLTexture.cpp
+++ b/src/gpu/gl/GrGLTexture.cpp
@@ -28,7 +28,6 @@
                                       textureDesc.fIsWrapped));
 
     if (NULL != rtDesc) {
-        GrAssert(kBottomLeft_GrSurfaceOrigin == textureDesc.fOrigin);
         GrGLIRect vp;
         vp.fLeft   = 0;
         vp.fWidth  = textureDesc.fWidth;
@@ -42,14 +41,14 @@
 
 GrGLTexture::GrGLTexture(GrGpuGL* gpu,
                          const Desc& textureDesc)
-    : INHERITED(gpu, textureDesc.fIsWrapped, textureDesc, textureDesc.fOrigin) {
+    : INHERITED(gpu, textureDesc.fIsWrapped, textureDesc) {
     this->init(gpu, textureDesc, NULL);
 }
 
 GrGLTexture::GrGLTexture(GrGpuGL* gpu,
                          const Desc& textureDesc,
                          const GrGLRenderTarget::Desc& rtDesc)
-    : INHERITED(gpu, textureDesc.fIsWrapped, textureDesc, textureDesc.fOrigin) {
+    : INHERITED(gpu, textureDesc.fIsWrapped, textureDesc) {
     this->init(gpu, textureDesc, &rtDesc);
 }
 
diff --git a/src/gpu/gl/GrGLTexture.h b/src/gpu/gl/GrGLTexture.h
index 2314821..79cea08 100644
--- a/src/gpu/gl/GrGLTexture.h
+++ b/src/gpu/gl/GrGLTexture.h
@@ -59,7 +59,6 @@
     struct Desc : public GrTextureDesc {
         GrGLuint        fTextureID;
         bool            fIsWrapped;
-        GrSurfaceOrigin fOrigin;
     };
 
     // creates a texture that is also an RT
diff --git a/src/gpu/gl/GrGpuGL.cpp b/src/gpu/gl/GrGpuGL.cpp
index 7ca07a0..7f5f999 100644
--- a/src/gpu/gl/GrGpuGL.cpp
+++ b/src/gpu/gl/GrGpuGL.cpp
@@ -147,6 +147,17 @@
     return status == GR_GL_FRAMEBUFFER_COMPLETE;
 }
 
+static GrSurfaceOrigin resolve_origin(GrSurfaceOrigin origin, bool renderTarget) {
+    // By default, GrRenderTargets are GL's normal orientation so that they
+    // can be drawn to by the outside world without the client having
+    // to render upside down.
+    if (kDefault_GrSurfaceOrigin == origin) {
+        return renderTarget ? kBottomLeft_GrSurfaceOrigin : kTopLeft_GrSurfaceOrigin;
+    } else {
+        return origin;
+    }
+}
+
 GrGpuGL::GrGpuGL(const GrGLContextInfo& ctxInfo) : fGLContextInfo(ctxInfo) {
 
     GrAssert(ctxInfo.isInitialized());
@@ -485,12 +496,6 @@
         return NULL;
     }
 
-    // FIXME:  add support for TopLeft RT's by flipping all draws.
-    if (desc.fFlags & kRenderTarget_GrBackendTextureFlag &&
-        kBottomLeft_GrSurfaceOrigin != desc.fOrigin) {
-        return NULL;
-    }
-
     int maxSize = this->getCaps().maxTextureSize();
     if (desc.fWidth > maxSize || desc.fHeight > maxSize) {
         return NULL;
@@ -505,16 +510,18 @@
     glTexDesc.fSampleCnt = desc.fSampleCnt;
     glTexDesc.fTextureID = static_cast<GrGLuint>(desc.fTextureHandle);
     glTexDesc.fIsWrapped = true;
-    glTexDesc.fOrigin = desc.fOrigin;
+    bool renderTarget = 0 != (desc.fFlags & kRenderTarget_GrBackendTextureFlag);
+    glTexDesc.fOrigin = resolve_origin(desc.fOrigin, renderTarget);
 
     GrGLTexture* texture = NULL;
-    if (desc.fFlags & kRenderTarget_GrBackendTextureFlag) {
+    if (renderTarget) {
         GrGLRenderTarget::Desc glRTDesc;
         glRTDesc.fRTFBOID = 0;
         glRTDesc.fTexFBOID = 0;
         glRTDesc.fMSColorRenderbufferID = 0;
         glRTDesc.fConfig = desc.fConfig;
         glRTDesc.fSampleCnt = desc.fSampleCnt;
+        glRTDesc.fOrigin = glTexDesc.fOrigin;
         if (!this->createRenderTargetObjects(glTexDesc.fWidth,
                                              glTexDesc.fHeight,
                                              glTexDesc.fTextureID,
@@ -541,6 +548,12 @@
     glDesc.fTexFBOID = GrGLRenderTarget::kUnresolvableFBOID;
     glDesc.fSampleCnt = desc.fSampleCnt;
     glDesc.fIsWrapped = true;
+    glDesc.fOrigin = desc.fOrigin;
+    if (glDesc.fRTFBOID == 0) {
+        GrAssert(desc.fOrigin == kBottomLeft_GrSurfaceOrigin);
+    }
+
+    glDesc.fOrigin = resolve_origin(desc.fOrigin, true);
     GrGLIRect viewport;
     viewport.fLeft   = 0;
     viewport.fBottom = 0;
@@ -960,10 +973,8 @@
 
     const Caps& caps = this->getCaps();
 
-    // We keep GrRenderTargets in GL's normal orientation so that they
-    // can be drawn to by the outside world without the client having
-    // to render upside down.
-    glTexDesc.fOrigin = renderTarget ? kBottomLeft_GrSurfaceOrigin : kTopLeft_GrSurfaceOrigin;
+    glTexDesc.fOrigin = resolve_origin(desc.fOrigin, renderTarget);
+    glRTDesc.fOrigin = glTexDesc.fOrigin;
 
     glRTDesc.fSampleCnt = desc.fSampleCnt;
     if (GrGLCaps::kNone_MSFBOType == this->glCaps().msFBOType() &&
@@ -1278,7 +1289,8 @@
                               fScissorState.fRect.fLeft,
                               fScissorState.fRect.fTop,
                               fScissorState.fRect.width(),
-                              fScissorState.fRect.height());
+                              fScissorState.fRect.height(),
+                              rt->origin());
         // if the scissor fully contains the viewport then we fall through and
         // disable the scissor test.
         if (!scissor.contains(vp)) {
@@ -1404,6 +1416,11 @@
                                         int width, int height,
                                         GrPixelConfig config,
                                         size_t rowBytes) const {
+    // If this rendertarget is aready TopLeft, we don't need to flip.
+    if (kTopLeft_GrSurfaceOrigin == renderTarget->origin()) {
+        return false;
+    }
+
     // if GL can do the flip then we'll never pay for it.
     if (this->glCaps().packFlipYSupport()) {
         return false;
@@ -1430,10 +1447,10 @@
                            int width, int height,
                            GrPixelConfig config,
                            void* buffer,
-                           size_t rowBytes,
-                           bool invertY) {
+                           size_t rowBytes) {
     GrGLenum format;
     GrGLenum type;
+    bool flipY = kBottomLeft_GrSurfaceOrigin == target->origin();
     if (!this->configToGLFormats(config, false, NULL, &format, &type)) {
         return false;
     }
@@ -1469,7 +1486,7 @@
 
     // the read rect is viewport-relative
     GrGLIRect readRect;
-    readRect.setRelativeTo(glvp, left, top, width, height);
+    readRect.setRelativeTo(glvp, left, top, width, height, target->origin());
 
     size_t tightRowBytes = bpp * width;
     if (0 == rowBytes) {
@@ -1491,7 +1508,7 @@
             readDst = scratch.get();
         }
     }
-    if (!invertY && this->glCaps().packFlipYSupport()) {
+    if (flipY && this->glCaps().packFlipYSupport()) {
         GL_CALL(PixelStorei(GR_GL_PACK_REVERSE_ROW_ORDER, 1));
     }
     GL_CALL(ReadPixels(readRect.fLeft, readRect.fBottom,
@@ -1501,9 +1518,9 @@
         GrAssert(this->glCaps().packRowLengthSupport());
         GL_CALL(PixelStorei(GR_GL_PACK_ROW_LENGTH, 0));
     }
-    if (!invertY && this->glCaps().packFlipYSupport()) {
+    if (flipY && this->glCaps().packFlipYSupport()) {
         GL_CALL(PixelStorei(GR_GL_PACK_REVERSE_ROW_ORDER, 0));
-        invertY = true;
+        flipY = false;
     }
 
     // now reverse the order of the rows, since GL's are bottom-to-top, but our
@@ -1511,7 +1528,7 @@
     // that the above readPixels did not overwrite the padding.
     if (readDst == buffer) {
         GrAssert(rowBytes == readDstRowBytes);
-        if (!invertY) {
+        if (flipY) {
             scratch.reset(tightRowBytes);
             void* tmpRow = scratch.get();
             // flip y in-place by rows
@@ -1532,13 +1549,13 @@
         // const int halfY = height >> 1;
         const char* src = reinterpret_cast<const char*>(readDst);
         char* dst = reinterpret_cast<char*>(buffer);
-        if (!invertY) {
+        if (flipY) {
             dst += (height-1) * rowBytes;
         }
         for (int y = 0; y < height; y++) {
             memcpy(dst, src, tightRowBytes);
             src += readDstRowBytes;
-            if (invertY) {
+            if (!flipY) {
                 dst += rowBytes;
             } else {
                 dst -= rowBytes;
@@ -1735,7 +1752,7 @@
         const GrIRect dirtyRect = rt->getResolveRect();
         GrGLIRect r;
         r.setRelativeTo(vp, dirtyRect.fLeft, dirtyRect.fTop,
-                        dirtyRect.width(), dirtyRect.height());
+                        dirtyRect.width(), dirtyRect.height(), target->origin());
 
         GrAutoTRestore<ScissorState> asr;
         if (GrGLCaps::kAppleES_MSFBOType == this->glCaps().msFBOType()) {
diff --git a/src/gpu/gl/GrGpuGL.h b/src/gpu/gl/GrGpuGL.h
index 3b8c16f..5b24d40 100644
--- a/src/gpu/gl/GrGpuGL.h
+++ b/src/gpu/gl/GrGpuGL.h
@@ -86,8 +86,7 @@
                               int width, int height,
                               GrPixelConfig,
                               void* buffer,
-                              size_t rowBytes,
-                              bool invertY) SK_OVERRIDE;
+                              size_t rowBytes) SK_OVERRIDE;
 
     virtual void onWriteTexturePixels(GrTexture* texture,
                                       int left, int top, int width, int height,
@@ -322,11 +321,13 @@
     } fHWAAState;
 
     struct {
-        SkMatrix    fViewMatrix;
-        SkISize     fRTSize;
+        SkMatrix            fViewMatrix;
+        SkISize             fRTSize;
+        GrSurfaceOrigin     fLastOrigin;
         void invalidate() {
             fViewMatrix = SkMatrix::InvalidMatrix();
             fRTSize.fWidth = -1; // just make the first value compared illegal.
+            fLastOrigin = (GrSurfaceOrigin) -1;
         }
     } fHWPathMatrixState;
 
diff --git a/src/gpu/gl/GrGpuGL_program.cpp b/src/gpu/gl/GrGpuGL_program.cpp
index d5e8fbd..28f76de 100644
--- a/src/gpu/gl/GrGpuGL_program.cpp
+++ b/src/gpu/gl/GrGpuGL_program.cpp
@@ -92,13 +92,19 @@
     const SkMatrix& vm = this->getDrawState().getViewMatrix();
 
     if (kStencilPath_DrawType == type) {
-        if (fHWPathMatrixState.fViewMatrix != vm ||
+        if (fHWPathMatrixState.fLastOrigin != rt->origin() ||
+            fHWPathMatrixState.fViewMatrix != vm ||
             fHWPathMatrixState.fRTSize != viewportSize) {
             // rescale the coords from skia's "device" coords to GL's normalized coords,
-            // and perform a y-flip.
+            // and perform a y-flip if required.
             SkMatrix m;
-            m.setScale(SkIntToScalar(2) / rt->width(), SkIntToScalar(-2) / rt->height());
-            m.postTranslate(-SK_Scalar1, SK_Scalar1);
+            if (kBottomLeft_GrSurfaceOrigin == rt->origin()) {
+                m.setScale(SkIntToScalar(2) / rt->width(), SkIntToScalar(-2) / rt->height());
+                m.postTranslate(-SK_Scalar1, SK_Scalar1);
+            } else {
+                m.setScale(SkIntToScalar(2) / rt->width(), SkIntToScalar(2) / rt->height());
+                m.postTranslate(-SK_Scalar1, -SK_Scalar1);
+            }
             m.preConcat(vm);
 
             // GL wants a column-major 4x4.
@@ -128,14 +134,23 @@
             GL_CALL(LoadMatrixf(mv));
             fHWPathMatrixState.fViewMatrix = vm;
             fHWPathMatrixState.fRTSize = viewportSize;
+            fHWPathMatrixState.fLastOrigin = rt->origin();
         }
-    } else if (!fCurrentProgram->fViewMatrix.cheapEqualTo(vm) ||
+    } else if (fCurrentProgram->fOrigin != rt->origin() ||
+               !fCurrentProgram->fViewMatrix.cheapEqualTo(vm) ||
                fCurrentProgram->fViewportSize != viewportSize) {
         SkMatrix m;
-        m.setAll(
-            SkIntToScalar(2) / viewportSize.fWidth, 0, -SK_Scalar1,
-            0,-SkIntToScalar(2) / viewportSize.fHeight, SK_Scalar1,
+        if (kBottomLeft_GrSurfaceOrigin == rt->origin()) {
+            m.setAll(
+                SkIntToScalar(2) / viewportSize.fWidth, 0, -SK_Scalar1,
+                0,-SkIntToScalar(2) / viewportSize.fHeight, SK_Scalar1,
             0, 0, SkMatrix::I()[8]);
+        } else {
+            m.setAll(
+                SkIntToScalar(2) / viewportSize.fWidth, 0, -SK_Scalar1,
+                0, SkIntToScalar(2) / viewportSize.fHeight,-SK_Scalar1,
+            0, 0, SkMatrix::I()[8]);
+        }
         m.setConcat(m, vm);
 
         // ES doesn't allow you to pass true to the transpose param,
@@ -156,6 +171,7 @@
                                             mt);
         fCurrentProgram->fViewMatrix = vm;
         fCurrentProgram->fViewportSize = viewportSize;
+        fCurrentProgram->fOrigin = rt->origin();
     }
 }
 
diff --git a/tests/ReadPixelsTest.cpp b/tests/ReadPixelsTest.cpp
index 41ecccf..df51a19 100644
--- a/tests/ReadPixelsTest.cpp
+++ b/tests/ReadPixelsTest.cpp
@@ -303,7 +303,7 @@
         SkIRect::MakeLTRB(3 * DEV_W / 4, -10, DEV_W + 10, DEV_H + 10),
     };
 
-    for (int dtype = 0; dtype < 2; ++dtype) {
+    for (int dtype = 0; dtype < 3; ++dtype) {
         int glCtxTypeCnt = 1;
 #if SK_SUPPORT_GPU
         if (0 != dtype)  {
@@ -325,7 +325,16 @@
                 if (NULL == context) {
                     continue;
                 }
-                device.reset(new SkGpuDevice(context, SkBitmap::kARGB_8888_Config, DEV_W, DEV_H));
+                GrTextureDesc desc;
+                desc.fFlags = kRenderTarget_GrTextureFlagBit | kNoStencil_GrTextureFlagBit;
+                desc.fWidth = DEV_W;
+                desc.fHeight = DEV_H;
+                desc.fConfig = kSkia8888_PM_GrPixelConfig;
+                desc.fOrigin = 1 == dtype ? kBottomLeft_GrSurfaceOrigin
+                                          : kTopLeft_GrSurfaceOrigin;
+                GrAutoScratchTexture ast(context, desc, GrContext::kExact_ScratchTexMatch);
+                SkAutoTUnref<GrTexture> tex(ast.detach());
+                device.reset(new SkGpuDevice(context, tex));
 #else
                 continue;
 #endif
diff --git a/tests/WritePixelsTest.cpp b/tests/WritePixelsTest.cpp
index 14b2c51..83e0784 100644
--- a/tests/WritePixelsTest.cpp
+++ b/tests/WritePixelsTest.cpp
@@ -286,7 +286,8 @@
 enum DevType {
     kRaster_DevType,
 #if SK_SUPPORT_GPU
-    kGpu_DevType,
+    kGpu_BottomLeft_DevType,
+    kGpu_TopLeft_DevType,
 #endif
 };
 
@@ -299,7 +300,8 @@
     {kRaster_DevType, true},
     {kRaster_DevType, false},
 #if SK_SUPPORT_GPU && defined(SK_SCALAR_IS_FLOAT)
-    {kGpu_DevType, true}, // row bytes has no meaning on gpu devices
+    {kGpu_BottomLeft_DevType, true}, // row bytes has no meaning on gpu devices
+    {kGpu_TopLeft_DevType, true}, // row bytes has no meaning on gpu devices
 #endif
 };
 
@@ -321,8 +323,18 @@
             return new SkDevice(bmp);
         }
 #if SK_SUPPORT_GPU
-        case kGpu_DevType:
-            return new SkGpuDevice(grCtx, SkBitmap::kARGB_8888_Config, DEV_W, DEV_H);
+        case kGpu_BottomLeft_DevType:
+        case kGpu_TopLeft_DevType:
+            GrTextureDesc desc;
+            desc.fFlags = kRenderTarget_GrTextureFlagBit;
+            desc.fWidth = DEV_W;
+            desc.fHeight = DEV_H;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
+            desc.fOrigin = kGpu_TopLeft_DevType == c.fDevType ?
+                kTopLeft_GrSurfaceOrigin : kBottomLeft_GrSurfaceOrigin;
+            GrAutoScratchTexture ast(grCtx, desc, GrContext::kExact_ScratchTexMatch);
+            SkAutoTUnref<GrTexture> tex(ast.detach());
+            return new SkGpuDevice(grCtx, tex);
 #endif
     }
     return NULL;
@@ -401,14 +413,16 @@
     for (size_t i = 0; i < SK_ARRAY_COUNT(gCanvasConfigs); ++i) {
         int glCtxTypeCnt = 1;
 #if SK_SUPPORT_GPU
-        if (kGpu_DevType == gCanvasConfigs[i].fDevType)  {
+        bool isGPUDevice = kGpu_TopLeft_DevType == gCanvasConfigs[i].fDevType ||
+                           kGpu_BottomLeft_DevType == gCanvasConfigs[i].fDevType;
+        if (isGPUDevice) {
             glCtxTypeCnt = GrContextFactory::kGLContextTypeCnt;
         }
 #endif
         for (int glCtxType = 0; glCtxType < glCtxTypeCnt; ++glCtxType) {
             GrContext* context = NULL;
 #if SK_SUPPORT_GPU
-            if (kGpu_DevType == gCanvasConfigs[i].fDevType) {
+            if (isGPUDevice) {
                 GrContextFactory::GLContextType type =
                     static_cast<GrContextFactory::GLContextType>(glCtxType);
 #if SK_ANGLE // This test breaks ANGLE with GL errors in texsubimage2D. Disable until debugged.
