GPU-based Gaussian blur.

This is a first stab at implementing a GPU-based
Gaussian blur in Ganesh.  The convolution shader is implemented as a new
filtering mode.  There are several known issues:

- no support for blur types other than "normal"
- FBO truncation problem at high zoom values
- uses bilinear for upsampling instead of Mitchell

Review URL:  http://codereview.appspot.com/4645082/



git-svn-id: http://skia.googlecode.com/svn/trunk@1830 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index b75c917..092b0ba 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -1699,3 +1699,22 @@
     }
 }
 
+void GrContext::convolveRect(GrTexture* srcTexture,
+                             const SkRect& rect,
+                             float imageIncrement[2],
+                             const float* kernel,
+                             int kernelWidth) {
+    GrDrawTarget::AutoStateRestore asr(fGpu);
+    GrMatrix sampleM;
+    GrSamplerState sampler(GrSamplerState::kClamp_WrapMode, 
+                           GrSamplerState::kClamp_WrapMode,
+                           GrSamplerState::kConvolution_Filter);
+    sampler.setConvolutionParams(kernelWidth, kernel, imageIncrement);
+    sampleM.setScale(GR_Scalar1 / srcTexture->width(),
+                     GR_Scalar1 / srcTexture->height());
+    sampler.setMatrix(sampleM);
+    fGpu->setSamplerState(0, sampler);
+    fGpu->setViewMatrix(GrMatrix::I());
+    fGpu->setTexture(0, srcTexture);
+    fGpu->drawSimpleRect(rect, NULL, 1 << 0);
+}
diff --git a/gpu/src/GrGLProgram.cpp b/gpu/src/GrGLProgram.cpp
index d6a832c..4290c96 100644
--- a/gpu/src/GrGLProgram.cpp
+++ b/gpu/src/GrGLProgram.cpp
@@ -136,6 +136,13 @@
     s->appendS32(stage);
 }
 
+static void convolve_param_names(int stage, GrStringBuilder* k, GrStringBuilder* i) {
+    *k = "uKernel";
+    k->appendS32(stage);
+    *i = "uImageIncrement";
+    i->appendS32(stage);
+}
+
 static void tex_domain_name(int stage, GrStringBuilder* s) {
     *s = "uTexDom";
     s->appendS32(stage);
@@ -941,6 +948,22 @@
                                              texDomName.c_str()));
                 GrAssert(kUnusedUniform != locations.fTexDomUni);
             }
+
+            GrStringBuilder kernelName, imageIncrementName;
+            convolve_param_names(s, &kernelName, &imageIncrementName);
+            if (kUseUniform == locations.fKernelUni) {
+                locations.fKernelUni = GR_GL(GetUniformLocation(
+                                             progID,
+                                             kernelName.c_str()));
+                GrAssert(kUnusedUniform != locations.fKernelUni);
+            }
+
+            if (kUseUniform == locations.fImageIncrementUni) {
+                locations.fImageIncrementUni = GR_GL(GetUniformLocation(
+                                                     progID,
+                                                     imageIncrementName.c_str()));
+                GrAssert(kUnusedUniform != locations.fImageIncrementUni);
+            }
         }
     }
     GR_GL(UseProgram(progID));
@@ -1058,6 +1081,24 @@
         }
     }
 
+    GrStringBuilder kernelName, kernelWidthName, imageIncrementName;
+    convolve_param_names(stageNum, &kernelName, &imageIncrementName);
+
+    if (ProgramDesc::StageDesc::kConvolution_FetchMode == desc.fFetchMode) {
+        segments->fFSUnis.appendf("uniform float %s[%d];\n",
+                                  kernelName.c_str(), desc.fKernelWidth);
+        segments->fFSUnis.appendf("uniform vec2 %s;\n",
+                                  imageIncrementName.c_str());
+        segments->fVSUnis.appendf("uniform vec2 %s;\n",
+                                  imageIncrementName.c_str());
+        locations->fKernelUni = kUseUniform;
+        locations->fImageIncrementUni = kUseUniform;
+        float scale = (desc.fKernelWidth - 1) * 0.5f;
+        segments->fVSCode.appendf("\t%s -= vec2(%g, %g) * %s;\n",
+                                  varyingName.c_str(), scale, scale,
+                                  imageIncrementName.c_str());
+}
+
     /// Fragment Shader Stuff
     GrStringBuilder fsCoordName;
     // function used to access the shader, may be made projective
@@ -1229,6 +1270,17 @@
         segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(-%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName.c_str(), sampleCoords.c_str(), texelSizeName.c_str(), texelSizeName.c_str(), smear);
         segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(+%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName.c_str(), sampleCoords.c_str(), texelSizeName.c_str(), texelSizeName.c_str(), smear);
         segments->fFSCode.appendf("\t%s = .25 * %s%s;\n", fsOutColor, accumVar.c_str(), modulate.c_str());
+    } else if (ProgramDesc::StageDesc::kConvolution_FetchMode == desc.fFetchMode) {
+        segments->fFSCode.append("\tvec4 sum = vec4(0, 0, 0, 0);\n");
+        segments->fFSCode.appendf("\tvec2 coord = %s;\n", sampleCoords.c_str());
+        segments->fFSCode.appendf("\tfor (int i = 0; i < %d; i++) {\n", desc.fKernelWidth);
+        segments->fFSCode.appendf("\t\tsum += %s(%s, coord)%s * %s[i];\n",
+                                  texFunc.c_str(), samplerName.c_str(),
+                                  smear, kernelName.c_str());
+        segments->fFSCode.appendf("\t\tcoord += %s;\n",
+                                  imageIncrementName.c_str());
+        segments->fFSCode.appendf("\t}\n");
+        segments->fFSCode.appendf("\t%s = sum%s;\n", fsOutColor, modulate.c_str());
     } else {
         segments->fFSCode.appendf("\t%s = %s(%s, %s)%s%s;\n", fsOutColor, texFunc.c_str(), samplerName.c_str(), sampleCoords.c_str(), smear, modulate.c_str());
     }
diff --git a/gpu/src/GrGLProgram.h b/gpu/src/GrGLProgram.h
index edd4737..60a7177 100644
--- a/gpu/src/GrGLProgram.h
+++ b/gpu/src/GrGLProgram.h
@@ -114,6 +114,7 @@
             enum FetchMode {
                 kSingle_FetchMode,
                 k2x2_FetchMode,
+                kConvolution_FetchMode,
 
                 kFetchModeCnt,
             };
@@ -132,6 +133,7 @@
             uint8_t fModulation;  // casts to enum Modulation
             uint8_t fFetchMode;  // casts to enum FetchMode
             uint8_t fCoordMapping;  // casts to enum CoordMapping
+            uint8_t fKernelWidth;
 
             inline bool isEnabled() const {
                 return fOptFlags & kIsEnabled_OptFlagBit;
@@ -197,12 +199,16 @@
         GrGLint fSamplerUni;
         GrGLint fRadial2Uni;
         GrGLint fTexDomUni;
+        GrGLint fKernelUni;
+        GrGLint fImageIncrementUni;
         void reset() {
             fTextureMatrixUni = kUnusedUniform;
             fNormalizedTexelSizeUni = kUnusedUniform;
             fSamplerUni = kUnusedUniform;
             fRadial2Uni = kUnusedUniform;
             fTexDomUni = kUnusedUniform;
+            fKernelUni = kUnusedUniform;
+            fImageIncrementUni = kUnusedUniform;
         }
     };
 
diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp
index 1f9afdc..1a9d7fe 100644
--- a/gpu/src/GrGpuGL.cpp
+++ b/gpu/src/GrGpuGL.cpp
@@ -574,8 +574,8 @@
         fHWDrawState.fSamplerStates[s].setRadial2Params(-GR_ScalarMax,
                                                         -GR_ScalarMax,
                                                         true);
-
         fHWDrawState.fSamplerStates[s].setMatrix(GrMatrix::InvalidMatrix());
+        fHWDrawState.fSamplerStates[s].setConvolutionParams(0, NULL, NULL);
     }
 
     fHWBounds.fScissorRect.invalidate();
@@ -586,6 +586,7 @@
     fHWDrawState.fStencilSettings.invalidate();
     fHWStencilClip = false;
     fClipState.fClipIsDirty = true;
+    fClipState.fClipInStencil = false;
 
     fHWGeometryState.fIndexBuffer = NULL;
     fHWGeometryState.fVertexBuffer = NULL;
@@ -1786,6 +1787,20 @@
     }
 }
 
+static unsigned grToGLFilter(GrSamplerState::Filter filter) {
+    switch (filter) {
+        case GrSamplerState::kBilinear_Filter:
+        case GrSamplerState::k4x4Downsample_Filter:
+            return GR_GL_LINEAR;
+        case GrSamplerState::kNearest_Filter:
+        case GrSamplerState::kConvolution_Filter:
+            return GR_GL_NEAREST;
+        default:
+            GrAssert(!"Unknown filter type");
+            return GR_GL_LINEAR;
+    }
+}
+
 bool GrGpuGL::flushGLStateCommon(GrPrimitiveType type) {
 
     // GrGpu::setupClipAndFlushState should have already checked this
@@ -1827,11 +1842,7 @@
                                                 nextTexture->getTexParams();
             GrGLTexture::TexParams newTexParams;
 
-            if (GrSamplerState::kNearest_Filter == sampler.getFilter()) {
-                newTexParams.fFilter = GR_GL_NEAREST;
-            } else {
-                newTexParams.fFilter = GR_GL_LINEAR;
-            }
+            newTexParams.fFilter = grToGLFilter(sampler.getFilter());
 
             newTexParams.fWrapS =
                         GrGLTexture::WrapMode2GLWrap()[sampler.getWrapX()];
diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp
index 1309802..a52501b 100644
--- a/gpu/src/GrGpuGLShaders.cpp
+++ b/gpu/src/GrGpuGLShaders.cpp
@@ -453,6 +453,18 @@
     }
 }
 
+void GrGpuGLShaders::flushConvolution(int s) {
+    const GrSamplerState& sampler = fCurrDrawState.fSamplerStates[s];
+    int kernelUni = fProgramData->fUniLocations.fStages[s].fKernelUni;
+    if (GrGLProgram::kUnusedUniform != kernelUni) {
+        GR_GL(Uniform1fv(kernelUni, sampler.getKernelWidth(), sampler.getKernel()));
+    }
+    int imageIncrementUni = fProgramData->fUniLocations.fStages[s].fImageIncrementUni;
+    if (GrGLProgram::kUnusedUniform != imageIncrementUni) {
+        GR_GL(Uniform2fv(imageIncrementUni, 1, sampler.getImageIncrement()));
+    }
+}
+
 void GrGpuGLShaders::flushTexelSize(int s) {
     const int& uni = fProgramData->fUniLocations.fStages[s].fNormalizedTexelSizeUni;
     if (GrGLProgram::kUnusedUniform != uni) {
@@ -587,6 +599,8 @@
 
         this->flushRadial2(s);
 
+        this->flushConvolution(s);
+
         this->flushTexelSize(s);
 
         this->flushTextureDomain(s);
@@ -784,6 +798,10 @@
                 case GrSamplerState::k4x4Downsample_Filter:
                     stage.fFetchMode = StageDesc::k2x2_FetchMode;
                     break;
+                // performs fKernelWidth texture2D()s
+                case GrSamplerState::kConvolution_Filter:
+                    stage.fFetchMode = StageDesc::kConvolution_FetchMode;
+                    break;
                 default:
                     GrCrash("Unexpected filter!");
                     break;
@@ -802,6 +820,11 @@
             } else {
                 stage.fModulation = StageDesc::kColor_Modulation;
             }
+            if (sampler.getFilter() == GrSamplerState::kConvolution_Filter) {
+                stage.fKernelWidth = sampler.getKernelWidth();
+            } else {
+                stage.fKernelWidth = 0;
+            }
         } else {
             stage.fOptFlags     = 0;
             stage.fCoordMapping = (StageDesc::CoordMapping)0;
diff --git a/gpu/src/GrGpuGLShaders.h b/gpu/src/GrGpuGLShaders.h
index 17811c8..43ff5ea 100644
--- a/gpu/src/GrGpuGLShaders.h
+++ b/gpu/src/GrGpuGLShaders.h
@@ -70,6 +70,9 @@
     // flushes the parameters to two point radial gradient
     void flushRadial2(int stage);
 
+    // flushes the parameters for convolution
+    void flushConvolution(int stage);
+
     // flushes the normalized texel size
     void flushTexelSize(int stage);