diff --git a/gpu/include/GrMemory.h b/gpu/include/GrMemory.h
deleted file mode 100644
index be8b1d7..0000000
--- a/gpu/include/GrMemory.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
-    Copyright 2010 Google Inc.
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-         http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
- */
-
-
-#ifndef GrMemory_DEFINED
-#define GrMemory_DEFINED
-
-#include "GrNoncopyable.h"
-
-class GrAutoMalloc : GrNoncopyable {
-public:
-    GrAutoMalloc() : fPtr(NULL), fAllocatedBytes(0){
-    }
-
-    GrAutoMalloc(size_t bytes) : fPtr(GrMalloc(bytes)), fAllocatedBytes(bytes) {}
-    ~GrAutoMalloc() { GrFree(fPtr); }
-
-    /**
-     *  Return the allocated memory, or NULL if it has already been freed or
-     *  detached.
-     */
-    void* get() const { return fPtr; }
-
-    size_t size() const { return fAllocatedBytes; }
-
-    /**
-     *  transfer ownership of the memory to the caller. It must be freed with
-     *  a call to GrFree()
-     */
-    void* detach() {
-        void* ptr = fPtr;
-        fPtr = NULL;    // we no longer own the block
-        fAllocatedBytes = 0;
-        return ptr;
-    }
-
-    /**
-     *  Reallocates to a new size. May or may not call malloc. The contents
-     *  are not preserved. If growOnly is true it will never reduce the
-     *  allocated size.
-     */
-    void* realloc(size_t newSize, bool growOnly = false) {
-        bool alloc;
-        if (growOnly) {
-            alloc = newSize > fAllocatedBytes;
-        } else {
-            alloc = newSize != fAllocatedBytes;
-        }
-        if (alloc) {
-            GrFree(fPtr);
-            fPtr = newSize ? GrMalloc(newSize) : NULL;
-            fAllocatedBytes = newSize;
-        }
-        GrAssert(fAllocatedBytes >= newSize);
-        GR_DEBUGCODE(memset(fPtr, 0xEF, fAllocatedBytes));
-        return fPtr;
-    }
-
-    /**
-     *  free the block now. get() will now return NULL
-     */
-    void free() {
-        GrFree(fPtr);
-        fPtr = NULL;
-        fAllocatedBytes = 0;
-    }
-
-private:
-    void* fPtr;
-    size_t fAllocatedBytes;
-};
-
-/**
- *  Variant of GrAutoMalloc with a compile-time specified byte size that is
- *  pre-allocated in the class object, avoiding a call to to GrMalloc if
- *  possible.
- */
-template <size_t SIZE> class GrAutoSMalloc : GrNoncopyable {
-public:
-    GrAutoSMalloc() {
-        fPtr = fStorage;
-        fAllocatedBytes = SIZE;
-    }
-
-    explicit GrAutoSMalloc(size_t bytes) {
-        if (bytes > SIZE) {
-            fPtr = GrMalloc(bytes);
-            fAllocatedBytes = bytes;
-        } else {
-            fPtr = fStorage;
-            fAllocatedBytes = SIZE;
-        }
-    }
-
-    ~GrAutoSMalloc() {
-        if (fPtr != (void*)fStorage) {
-            GrFree(fPtr);
-        }
-    }
-
-    /**
-     *  Return the allocated memory, or NULL if it has already been freed or
-     *  detached.
-     */
-    void* get() const { return fPtr; }
-
-    /**
-     *  Reallocates to a new size. May or may not call malloc. The contents
-     *  are not preserved. If growOnly is true it will never reduce the
-     *  allocated size.
-     */
-    void* realloc(size_t newSize, bool growOnly = false) {
-        if (newSize <= SIZE) {
-            if (NULL == fPtr) {
-                fPtr = fStorage;
-                fAllocatedBytes = SIZE;
-            } else if (!growOnly && fPtr != (void*)fStorage) {
-                GrFree(fPtr);
-                fPtr = fStorage;
-                fAllocatedBytes = SIZE;
-            }
-        } else if ((newSize > fAllocatedBytes) ||
-                   (!growOnly && newSize < (fAllocatedBytes >> 1))) {
-            if (NULL != fPtr && fPtr != (void*)fStorage) {
-                GrFree(fPtr);
-            }
-            fPtr = GrMalloc(newSize);
-            fAllocatedBytes = newSize;
-        }
-        GrAssert(fAllocatedBytes >= newSize);
-        GrAssert((fPtr == fStorage) == (fAllocatedBytes == SIZE));
-        GR_DEBUGCODE(memset(fPtr, 0xEF, fAllocatedBytes));
-        return fPtr;
-    }
-
-    /**
-     *  free the block now. get() will now return NULL
-     */
-    void free() {
-        if (fPtr != (void*)fStorage) {
-            GrFree(fPtr);
-        }
-        fAllocatedBytes = 0;
-        fPtr = NULL;
-    }
-
-private:
-    void*    fPtr;
-    uint32_t fAllocatedBytes;
-    uint32_t fStorage[GrALIGN4(SIZE) >> 2];
-};
-
-/**
- *  Variant of GrAutoMalloc with a compile-time specified byte size that is
- *  pre-allocated in the class object, avoiding a call to to GrMalloc if
- *  possible.
- */
-template <int COUNT, typename T>
-class GrAutoSTMalloc : public GrAutoSMalloc<COUNT * sizeof(T)> {
-public:
-    GrAutoSTMalloc(int count) : GrAutoSMalloc<COUNT * sizeof(T)>(count * sizeof(T)) {}
-
-    operator T*() { return (T*)this->get(); }
-};
-
-
-#endif
-
diff --git a/gpu/src/GrAtlas.cpp b/gpu/src/GrAtlas.cpp
index c623952..6f2ed9e 100644
--- a/gpu/src/GrAtlas.cpp
+++ b/gpu/src/GrAtlas.cpp
@@ -17,7 +17,6 @@
 
 #include "GrAtlas.h"
 #include "GrGpu.h"
-#include "GrMemory.h"
 #include "GrRectanizer.h"
 #include "GrPlotMgr.h"
 
@@ -97,7 +96,7 @@
         return false;
     }
 
-    GrAutoSMalloc<1024> storage;
+    SkAutoSMalloc<1024> storage;
     int dstW = width + 2*BORDER;
     int dstH = height + 2*BORDER;
     if (BORDER) {
diff --git a/gpu/src/GrBufferAllocPool.cpp b/gpu/src/GrBufferAllocPool.cpp
index 73d707f..5e9dbea 100644
--- a/gpu/src/GrBufferAllocPool.cpp
+++ b/gpu/src/GrBufferAllocPool.cpp
@@ -98,7 +98,7 @@
         fFirstPreallocBuffer = (fFirstPreallocBuffer + fPreallocBuffersInUse) %
                                fPreallocBuffers.count();
     }
-    fCpuData.realloc(fGpu->supportsBufferLocking() ? 0 : fMinBlockSize);
+    fCpuData.alloc(fGpu->supportsBufferLocking() ? 0 : fMinBlockSize);
     GrAssert(0 == fPreallocBuffersInUse);
     VALIDATE();
 }
@@ -128,7 +128,6 @@
             GrAssert(buf->lockPtr() == fBufferPtr);
         } else {
             GrAssert(fCpuData.get() == fBufferPtr);
-            GrAssert(fCpuData.size() == fBlocks.back().fBuffer->size());
         }
     } else {
         GrAssert(fBlocks.empty() || !fBlocks.back().fBuffer->isLocked());
@@ -286,7 +285,7 @@
     }
 
     if (NULL == fBufferPtr) {
-        fBufferPtr = fCpuData.realloc(size);
+        fBufferPtr = fCpuData.alloc(size);
     }
 
     VALIDATE(true);
@@ -318,7 +317,6 @@
     GrAssert(NULL != buffer);
     GrAssert(!buffer->isLocked());
     GrAssert(fCpuData.get() == fBufferPtr);
-    GrAssert(fCpuData.size() == buffer->size());
     GrAssert(flushSize <= buffer->size());
 
     bool updated = false;
diff --git a/gpu/src/GrBufferAllocPool.h b/gpu/src/GrBufferAllocPool.h
index c18e36b..79036f2 100644
--- a/gpu/src/GrBufferAllocPool.h
+++ b/gpu/src/GrBufferAllocPool.h
@@ -21,7 +21,6 @@
 #include "GrNoncopyable.h"
 #include "GrTDArray.h"
 #include "GrTArray.h"
-#include "GrMemory.h"
 
 class GrGeometryBuffer;
 class GrGpu;
@@ -184,8 +183,8 @@
     GrTArray<BufferBlock>           fBlocks;
     int                             fPreallocBuffersInUse;
     int                             fFirstPreallocBuffer;
-    GrAutoMalloc                    fCpuData;
-    void*                       	fBufferPtr;
+    SkAutoMalloc                    fCpuData;
+    void*                           fBufferPtr;
 };
 
 class GrVertexBuffer;
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index d97974a..4982704 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -20,7 +20,6 @@
 #include "GrGpu.h"
 #include "GrIndexBuffer.h"
 #include "GrInOrderDrawBuffer.h"
-#include "GrMemory.h"
 #include "GrPathRenderer.h"
 #include "GrPathUtils.h"
 #include "GrTextureCache.h"
@@ -291,7 +290,7 @@
             rtDesc.fWidth  = GrNextPow2(desc.fWidth);
             rtDesc.fHeight = GrNextPow2(desc.fHeight);
             int bpp = GrBytesPerPixel(desc.fFormat);
-            GrAutoSMalloc<128*128*4> stretchedPixels(bpp *
+            SkAutoSMalloc<128*128*4> stretchedPixels(bpp *
                                                      rtDesc.fWidth *
                                                      rtDesc.fHeight);
             stretchImage(stretchedPixels.get(), rtDesc.fWidth, rtDesc.fHeight,
diff --git a/gpu/src/GrGLProgram.cpp b/gpu/src/GrGLProgram.cpp
index 319995c..733e705 100644
--- a/gpu/src/GrGLProgram.cpp
+++ b/gpu/src/GrGLProgram.cpp
@@ -18,7 +18,6 @@
 
 #include "GrBinHashKey.h"
 #include "GrGLConfig.h"
-#include "GrMemory.h"
 
 #include "SkXfermode.h"
 #include SK_USER_TRACE_INCLUDE_FILE
@@ -775,7 +774,7 @@
     if (!compiled) {
         GrGLint infoLen = GR_GL_INIT_ZERO;
         GR_GL(GetShaderiv(shader, GR_GL_INFO_LOG_LENGTH, &infoLen));
-        GrAutoMalloc log(sizeof(char)*(infoLen+1)); // outside if for debugger
+        SkAutoMalloc log(sizeof(char)*(infoLen+1)); // outside if for debugger
         if (infoLen > 0) {
             GR_GL(GetShaderInfoLog(shader, infoLen+1, NULL, (char*)log.get()));
             for (int i = 0; i < stringCnt; ++i) {
@@ -853,7 +852,7 @@
     if (!linked) {
         GrGLint infoLen = GR_GL_INIT_ZERO;
         GR_GL(GetProgramiv(progID, GR_GL_INFO_LOG_LENGTH, &infoLen));
-        GrAutoMalloc log(sizeof(char)*(infoLen+1));  // outside if for debugger
+        SkAutoMalloc log(sizeof(char)*(infoLen+1));  // outside if for debugger
         if (infoLen > 0) {
             GR_GL(GetProgramInfoLog(progID,
                                     infoLen+1,
diff --git a/gpu/src/GrGLTexture.cpp b/gpu/src/GrGLTexture.cpp
index d36e21b..207246b 100644
--- a/gpu/src/GrGLTexture.cpp
+++ b/gpu/src/GrGLTexture.cpp
@@ -17,7 +17,6 @@
 
 #include "GrGLTexture.h"
 #include "GrGpuGL.h"
-#include "GrMemory.h"
 
 #define GPUGL static_cast<GrGpuGL*>(getGpu())
 
@@ -168,7 +167,7 @@
     GrAssert(fUploadFormat != GR_GL_PALETTE8_RGBA8);
 
     // in case we need a temporary, trimmed copy of the src pixels
-    GrAutoSMalloc<128 * 128> trimStorage;
+    SkAutoSMalloc<128 * 128> trimStorage;
 
     /*
      *  check if our srcData has extra bytes past each row. If so, we need
diff --git a/gpu/src/GrGpu.cpp b/gpu/src/GrGpu.cpp
index db7de2c..192d29b 100644
--- a/gpu/src/GrGpu.cpp
+++ b/gpu/src/GrGpu.cpp
@@ -15,7 +15,6 @@
  */
 
 #include "GrGpu.h"
-#include "GrMemory.h"
 #include "GrTextStrike.h"
 #include "GrTextureCache.h"
 #include "GrClipIterator.h"
diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp
index ffdf496..fe51875 100644
--- a/gpu/src/GrGpuGL.cpp
+++ b/gpu/src/GrGpuGL.cpp
@@ -15,8 +15,8 @@
  */
 
 #include "GrGpuGL.h"
-#include "GrMemory.h"
 #include "GrTypes.h"
+#include "SkTemplates.h"
 
 static const GrGLuint GR_MAX_GLUINT = ~0;
 static const GrGLint  GR_INVAL_GLINT = ~0;
@@ -312,7 +312,7 @@
 
     GrGLint numFormats;
     GR_GL_GetIntegerv(GR_GL_NUM_COMPRESSED_TEXTURE_FORMATS, &numFormats);
-    GrAutoSTMalloc<10, GrGLint> formats(numFormats);
+    SkAutoSTMalloc<10, GrGLint> formats(numFormats);
     GR_GL_GetIntegerv(GR_GL_COMPRESSED_TEXTURE_FORMATS, formats);
     for (int i = 0; i < numFormats; ++i) {
         if (formats[i] == GR_GL_PALETTE8_RGBA8) {
@@ -780,7 +780,7 @@
     glDesc.fUploadByteCount = GrBytesPerPixel(desc.fFormat);
 
     // in case we need a temporary, trimmed copy of the src pixels
-    GrAutoSMalloc<128 * 128> trimStorage;
+    SkAutoSMalloc<128 * 128> trimStorage;
 
     /*
      *  check if our srcData has extra bytes past each row. If so, we need
@@ -875,7 +875,7 @@
             maxTexels = GrMax(extraW * desc.fHeight, maxTexels);
             maxTexels = GrMax(desc.fWidth * extraH, maxTexels);
 
-            GrAutoSMalloc<128*128> texels(glDesc.fUploadByteCount * maxTexels);
+            SkAutoSMalloc<128*128> texels(glDesc.fUploadByteCount * maxTexels);
 
             uint32_t rowSize = desc.fWidth * glDesc.fUploadByteCount;
             if (extraH) {
@@ -1325,7 +1325,7 @@
     // API presents top-to-bottom
     {
         size_t stride = width * GrBytesPerPixel(config);
-        GrAutoMalloc rowStorage(stride);
+        SkAutoMalloc rowStorage(stride);
         void* tmp = rowStorage.get();
 
         const int halfY = height >> 1;
diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp
index 0b8aad7..b474be2 100644
--- a/gpu/src/GrGpuGLShaders.cpp
+++ b/gpu/src/GrGpuGLShaders.cpp
@@ -18,7 +18,6 @@
 #include "GrGLProgram.h"
 #include "GrGpuGLShaders.h"
 #include "GrGpuVertex.h"
-#include "GrMemory.h"
 #include "GrNoncopyable.h"
 #include "GrStringBuilder.h"
 #include "GrRandom.h"
diff --git a/gpu/src/GrMemory.cpp b/gpu/src/GrMemory.cpp
index 3da924a..02ac025 100644
--- a/gpu/src/GrMemory.cpp
+++ b/gpu/src/GrMemory.cpp
@@ -15,8 +15,6 @@
  */
 
 
-#include "GrMemory.h"
-
 #include <stdlib.h>
 
 void* GrMalloc(size_t bytes) {
diff --git a/gpu/src/GrPathRenderer.cpp b/gpu/src/GrPathRenderer.cpp
index b565838..1e3c645 100644
--- a/gpu/src/GrPathRenderer.cpp
+++ b/gpu/src/GrPathRenderer.cpp
@@ -3,10 +3,11 @@
 #include "GrPoint.h"
 #include "GrDrawTarget.h"
 #include "GrPathUtils.h"
-#include "GrMemory.h"
 #include "GrTexture.h"
 
 #include "SkString.h"
+#include "SkTemplates.h"
+
 #include SK_USER_TRACE_INCLUDE_FILE
 
 GrPathRenderer::GrPathRenderer()
@@ -234,7 +235,7 @@
     GrPoint* vert = base;
     GrPoint* subpathBase = base;
 
-    GrAutoSTMalloc<8, uint16_t> subpathVertCount(subpathCnt);
+    SkAutoSTMalloc<8, uint16_t> subpathVertCount(subpathCnt);
 
     // TODO: use primitve restart if available rather than multiple draws
     GrPrimitiveType             type;
diff --git a/gpu/src/GrTesselatedPathRenderer.cpp b/gpu/src/GrTesselatedPathRenderer.cpp
index c1d5ac0..5ddba99 100644
--- a/gpu/src/GrTesselatedPathRenderer.cpp
+++ b/gpu/src/GrTesselatedPathRenderer.cpp
@@ -16,11 +16,12 @@
 
 #include "GrTesselatedPathRenderer.h"
 
-#include "GrMemory.h"
 #include "GrPathUtils.h"
 #include "GrPoint.h"
 #include "GrTDArray.h"
 
+#include "SkTemplates.h"
+
 #include <limits.h>
 #include <sk_glu.h>
 
@@ -393,12 +394,12 @@
     if (maxPts > USHRT_MAX) {
         return;
     }
-    GrAutoSTMalloc<8, GrPoint> baseMem(maxPts);
-    GrPoint* base = (GrPoint*) baseMem;
+    SkAutoSTMalloc<8, GrPoint> baseMem(maxPts);
+    GrPoint* base = baseMem;
     GrPoint* vert = base;
     GrPoint* subpathBase = base;
 
-    GrAutoSTMalloc<8, uint16_t> subpathVertCount(subpathCnt);
+    SkAutoSTMalloc<8, uint16_t> subpathVertCount(subpathCnt);
 
     GrPoint pts[4];
     SkPath::Iter iter(path, false);
diff --git a/gpu/src/GrTextStrike.cpp b/gpu/src/GrTextStrike.cpp
index 455f88a..c44ad38 100644
--- a/gpu/src/GrTextStrike.cpp
+++ b/gpu/src/GrTextStrike.cpp
@@ -17,7 +17,6 @@
 
 #include "GrAtlas.h"
 #include "GrGpu.h"
-#include "GrMemory.h"
 #include "GrRectanizer.h"
 #include "GrTextStrike.h"
 #include "GrTextStrike_impl.h"
@@ -189,7 +188,7 @@
 
     int bytesPerPixel = GrMaskFormatBytesPerPixel(fMaskFormat);
     size_t size = glyph->fBounds.area() * bytesPerPixel;
-    GrAutoSMalloc<1024> storage(size);
+    SkAutoSMalloc<1024> storage(size);
     if (!scaler->getPackedGlyphImage(glyph->fPackedID, glyph->width(),
                                      glyph->height(),
                                      glyph->width() * bytesPerPixel,
diff --git a/gyp/gpu.gyp b/gyp/gpu.gyp
index ccd3c7d..dc157c5 100644
--- a/gyp/gpu.gyp
+++ b/gyp/gpu.gyp
@@ -107,7 +107,6 @@
         '../gpu/include/GrIPoint.h',
         '../gpu/include/GrKey.h',
         '../gpu/include/GrMatrix.h',
-        '../gpu/include/GrMemory.h',
         '../gpu/include/GrMesh.h',
         '../gpu/include/GrNoncopyable.h',
         '../gpu/include/GrPaint.h',
diff --git a/include/core/SkTemplates.h b/include/core/SkTemplates.h
index 55109bf..996e0b7 100644
--- a/include/core/SkTemplates.h
+++ b/include/core/SkTemplates.h
@@ -179,36 +179,88 @@
 */
 template <typename T> class SkAutoTMalloc : SkNoncopyable {
 public:
-    SkAutoTMalloc(size_t count)
-    {
+    SkAutoTMalloc(size_t count) {
         fPtr = (T*)sk_malloc_flags(count * sizeof(T), SK_MALLOC_THROW | SK_MALLOC_TEMP);
     }
-    ~SkAutoTMalloc()
-    {
+
+    ~SkAutoTMalloc() {
         sk_free(fPtr);
     }
+
+    // doesn't preserve contents
+    void realloc (size_t count) {
+        sk_free(fPtr);
+        fPtr = fPtr = (T*)sk_malloc_flags(count * sizeof(T), SK_MALLOC_THROW | SK_MALLOC_TEMP);
+    }
+
     T* get() const { return fPtr; }
 
+    operator T*() {
+        return fPtr;
+    }
+
+    operator const T*() const {
+        return fPtr;
+    }
+
+    T& operator[](int index) {
+        return fPtr[index];
+    }
+
+    const T& operator[](int index) const {
+        return fPtr[index];
+    }
+
 private:
     T*  fPtr;
 };
 
 template <size_t N, typename T> class SkAutoSTMalloc : SkNoncopyable {
 public:
-    SkAutoSTMalloc(size_t count)
-    {
-        if (count <= N)
+    SkAutoSTMalloc(size_t count) {
+        if (count <= N) {
             fPtr = fTStorage;
-        else
+        } else {
             fPtr = (T*)sk_malloc_flags(count * sizeof(T), SK_MALLOC_THROW | SK_MALLOC_TEMP);
+        }
     }
-    ~SkAutoSTMalloc()
-    {
-        if (fPtr != fTStorage)
+
+    ~SkAutoSTMalloc() {
+        if (fPtr != fTStorage) {
             sk_free(fPtr);
+        }
     }
+
+    // doesn't preserve contents
+    void realloc (size_t count) {
+        if (fPtr != fTStorage) {
+            sk_free(fPtr);
+        }
+        if (count <= N) {
+            fPtr = fTStorage;
+        } else {
+            fPtr = (T*)sk_malloc_flags(count * sizeof(T), SK_MALLOC_THROW | SK_MALLOC_TEMP);
+        }
+    }
+
     T* get() const { return fPtr; }
 
+    operator T*() {
+        return fPtr;
+    }
+
+    operator const T*() const {
+        return fPtr;
+    }
+
+    T& operator[](int index) {
+        return fPtr[index];
+    }
+
+    const T& operator[](int index) const {
+        return fPtr[index];
+    }
+
 private:
     T*          fPtr;
     union {
