Merge "libqdutils: Add generic metadata structure for VFM"
diff --git a/libgralloc/alloc_controller.cpp b/libgralloc/alloc_controller.cpp
index 06ce156..33c4f32 100644
--- a/libgralloc/alloc_controller.cpp
+++ b/libgralloc/alloc_controller.cpp
@@ -86,8 +86,13 @@
 //-------------- AdrenoMemInfo-----------------------//
 AdrenoMemInfo::AdrenoMemInfo()
 {
+    LINK_adreno_compute_aligned_width_and_height = NULL;
+    LINK_adreno_compute_padding = NULL;
+
     libadreno_utils = ::dlopen("libadreno_utils.so", RTLD_NOW);
     if (libadreno_utils) {
+        *(void **)&LINK_adreno_compute_aligned_width_and_height =
+            ::dlsym(libadreno_utils, "compute_aligned_width_and_height");
         *(void **)&LINK_adreno_compute_padding = ::dlsym(libadreno_utils,
                                            "compute_surface_padding");
     }
@@ -100,9 +105,11 @@
     }
 }
 
-int AdrenoMemInfo::getStride(int width, int format)
+void AdrenoMemInfo::getAlignedWidthAndHeight(int width, int height, int format,
+                              int& aligned_w, int& aligned_h)
 {
-    int stride = ALIGN(width, 32);
+    aligned_w = ALIGN(width, 32);
+    aligned_h = ALIGN(height, 32);
     // Currently surface padding is only computed for RGB* surfaces.
     if (format <= HAL_PIXEL_FORMAT_sRGB_X_8888) {
         // Don't add any additional padding if debug.gralloc.map_fb_memory
@@ -111,7 +118,7 @@
         if((property_get("debug.gralloc.map_fb_memory", property, NULL) > 0) &&
            (!strncmp(property, "1", PROPERTY_VALUE_MAX ) ||
            (!strncasecmp(property,"true", PROPERTY_VALUE_MAX )))) {
-              return stride;
+              return;
         }
 
         int bpp = 4;
@@ -125,25 +132,39 @@
                 break;
             default: break;
         }
-        if ((libadreno_utils) && (LINK_adreno_compute_padding)) {
-            int surface_tile_height = 1;   // Linear surface
+        if (libadreno_utils) {
             int raster_mode         = 0;   // Adreno unknown raster mode.
             int padding_threshold   = 512; // Threshold for padding surfaces.
-            // the function below expects the width to be a multiple of
-            // 32 pixels, hence we pass stride instead of width.
-            stride = LINK_adreno_compute_padding(stride, bpp,
-                                      surface_tile_height, raster_mode,
-                                      padding_threshold);
+            // the function below computes aligned width and aligned height
+            // based on linear or macro tile mode selected.
+            if(LINK_adreno_compute_aligned_width_and_height) {
+               int tile_mode = 0;   // Linear surface
+               LINK_adreno_compute_aligned_width_and_height(width,
+                                     height, bpp, tile_mode,
+                                     raster_mode, padding_threshold,
+                                     &aligned_w, &aligned_h);
+
+            } else if(LINK_adreno_compute_padding) {
+                int surface_tile_height = 1;   // Linear surface
+                aligned_w = LINK_adreno_compute_padding(width, bpp,
+                                     surface_tile_height, raster_mode,
+                                     padding_threshold);
+                ALOGW("%s: Warning!! Old GFX API is used to calculate stride",
+                                                            __FUNCTION__);
+            } else {
+                ALOGW("%s: Warning!! Symbols compute_surface_padding and " \
+                    "compute_aligned_width_and_height not found", __FUNCTION__);
+            }
         }
     } else {
         switch (format)
         {
             case HAL_PIXEL_FORMAT_YCrCb_420_SP_ADRENO:
             case HAL_PIXEL_FORMAT_RAW_SENSOR:
-                stride = ALIGN(width, 32);
+                aligned_w = ALIGN(width, 32);
                 break;
             case HAL_PIXEL_FORMAT_YCbCr_420_SP_TILED:
-                stride = ALIGN(width, 128);
+                aligned_w = ALIGN(width, 128);
                 break;
             case HAL_PIXEL_FORMAT_YCbCr_420_SP:
             case HAL_PIXEL_FORMAT_YCrCb_420_SP:
@@ -152,22 +173,21 @@
             case HAL_PIXEL_FORMAT_YCrCb_422_SP:
             case HAL_PIXEL_FORMAT_YCbCr_422_I:
             case HAL_PIXEL_FORMAT_YCrCb_422_I:
-                stride = ALIGN(width, 16);
+                aligned_w = ALIGN(width, 16);
                 break;
             case HAL_PIXEL_FORMAT_YCbCr_420_SP_VENUS:
             case HAL_PIXEL_FORMAT_NV12_ENCODEABLE:
-                stride = VENUS_Y_STRIDE(COLOR_FMT_NV12, width);
+                aligned_w = VENUS_Y_STRIDE(COLOR_FMT_NV12, width);
                 break;
             case HAL_PIXEL_FORMAT_BLOB:
-                stride = width;
+                aligned_w = width;
                 break;
             case HAL_PIXEL_FORMAT_NV21_ZSL:
-                stride = ALIGN(width, 64);
+                aligned_w = ALIGN(width, 64);
                 break;
             default: break;
         }
     }
-    return stride;
 }
 
 //-------------- IAllocController-----------------------//
@@ -274,8 +294,11 @@
 {
     size_t size;
 
-    alignedw = AdrenoMemInfo::getInstance().getStride(width, format);
-    alignedh = ALIGN(height, 32);
+    AdrenoMemInfo::getInstance().getAlignedWidthAndHeight(width,
+                                                          height,
+                                                          format,
+                                                          alignedw,
+                                                          alignedh);
     switch (format) {
         case HAL_PIXEL_FORMAT_RGBA_8888:
         case HAL_PIXEL_FORMAT_RGBX_8888:
diff --git a/libgralloc/gr.h b/libgralloc/gr.h
index 5343c35..1949f45 100644
--- a/libgralloc/gr.h
+++ b/libgralloc/gr.h
@@ -89,12 +89,13 @@
     ~AdrenoMemInfo();
 
     /*
-     * Function to compute the adreno stride based on the width and format.
+     * Function to compute the adreno aligned width and aligned height
+     * based on the width and format.
      *
-     * @return stride.
+     * @return aligned width, aligned height
      */
-    int getStride(int width, int format);
-
+    void getAlignedWidthAndHeight(int width, int height, int format,
+                                  int& alignedw, int &alignedh);
     private:
         // Pointer to the padding library.
         void *libadreno_utils;
@@ -104,5 +105,15 @@
                                                 int surface_tile_height,
                                                 int screen_tile_height,
                                                 int padding_threshold);
+        // link to the surface padding library.
+        void (*LINK_adreno_compute_aligned_width_and_height) (int width,
+                                                int height,
+                                                int bpp,
+                                                int tile_mode,
+                                                int raster_mode,
+                                                int padding_threshold,
+                                                int *aligned_w,
+                                                int *aligned_h);
+
 };
 #endif /* GR_H_ */
diff --git a/libgralloc/mapper.cpp b/libgralloc/mapper.cpp
index ca0db00..a07bdc3 100644
--- a/libgralloc/mapper.cpp
+++ b/libgralloc/mapper.cpp
@@ -329,7 +329,10 @@
                 int width   = va_arg(args, int);
                 int format  = va_arg(args, int);
                 int *stride = va_arg(args, int *);
-                *stride = AdrenoMemInfo::getInstance().getStride(width, format);
+                int alignedw = 0, alignedh = 0;
+                AdrenoMemInfo::getInstance().getAlignedWidthAndHeight(width,
+                                     0, format, alignedw, alignedh);
+                *stride = alignedw;
                 res = 0;
             } break;
         case GRALLOC_MODULE_PERFORM_GET_CUSTOM_STRIDE_FROM_HANDLE:
diff --git a/libhwcomposer/hwc_copybit.cpp b/libhwcomposer/hwc_copybit.cpp
index 2104bff..bb1b032 100644
--- a/libhwcomposer/hwc_copybit.cpp
+++ b/libhwcomposer/hwc_copybit.cpp
@@ -170,11 +170,32 @@
     // Following are MDP3 limitations for which we
     // need to fallback to GPU composition:
     // 1. Plane alpha is not supported by MDP3.
+    // 2. Scaling is within range
     if (qdutils::MDPVersion::getInstance().getMDPVersion() < 400) {
         for (int i = ctx->listStats[dpy].numAppLayers-1; i >= 0 ; i--) {
+            int dst_h, dst_w, src_h, src_w;
+            float dx, dy;
             hwc_layer_1_t *layer = (hwc_layer_1_t *) &list->hwLayers[i];
             if (layer->planeAlpha != 0xFF)
                 return true;
+
+            if (layer->transform & HAL_TRANSFORM_ROT_90) {
+                src_h = layer->sourceCrop.right - layer->sourceCrop.left;
+                src_w = layer->sourceCrop.bottom - layer->sourceCrop.top;
+            } else {
+                src_h = layer->sourceCrop.bottom - layer->sourceCrop.top;
+                src_w = layer->sourceCrop.right - layer->sourceCrop.left;
+            }
+            dst_h = layer->displayFrame.bottom - layer->displayFrame.top;
+            dst_w = layer->displayFrame.right - layer->displayFrame.left;
+
+            dx = (float)dst_w/src_w;
+            dy = (float)dst_h/src_h;
+            if (dx > MAX_SCALE_FACTOR || dx < MIN_SCALE_FACTOR)
+                return false;
+
+            if (dy > MAX_SCALE_FACTOR || dy < MIN_SCALE_FACTOR)
+                return false;
         }
     }
 
diff --git a/libhwcomposer/hwc_copybit.h b/libhwcomposer/hwc_copybit.h
index 8278ff3..fd5c939 100644
--- a/libhwcomposer/hwc_copybit.h
+++ b/libhwcomposer/hwc_copybit.h
@@ -22,6 +22,11 @@
 #include "hwc_utils.h"
 
 #define NUM_RENDER_BUFFERS 3
+//These scaling factors are specific for MDP3. Normally scaling factor
+//is only 4, but copybit will create temp buffer to let it run through
+//twice
+#define MAX_SCALE_FACTOR 16
+#define MIN_SCALE_FACTOR 0.0625
 
 namespace qhwc {
 
diff --git a/libhwcomposer/hwc_mdpcomp.cpp b/libhwcomposer/hwc_mdpcomp.cpp
index ab11bc4..2a1b123 100644
--- a/libhwcomposer/hwc_mdpcomp.cpp
+++ b/libhwcomposer/hwc_mdpcomp.cpp
@@ -44,7 +44,7 @@
 bool MDPComp::sEnablePartialFrameUpdate = false;
 int MDPComp::sMaxPipesPerMixer = MAX_PIPES_PER_MIXER;
 float MDPComp::sMaxBw = 2.3f;
-uint32_t MDPComp::sCompBytesClaimed = 0;
+double MDPComp::sBwClaimed = 0.0;
 
 MDPComp* MDPComp::getObject(hwc_context_t *ctx, const int& dpy) {
     if(isDisplaySplit(ctx, dpy)) {
@@ -260,7 +260,8 @@
     memcpy(&drop, &curFrame.drop, sizeof(drop));
 }
 
-bool MDPComp::LayerCache::isSameFrame(const FrameInfo& curFrame) {
+bool MDPComp::LayerCache::isSameFrame(const FrameInfo& curFrame,
+                                      hwc_display_contents_1_t* list) {
     if(layerCount != curFrame.layerCount)
         return false;
     for(int i = 0; i < curFrame.layerCount; i++) {
@@ -268,6 +269,10 @@
                 (curFrame.drop[i] != drop[i])) {
             return false;
         }
+        if(curFrame.isFBComposed[i] &&
+           (hnd[i] != list->hwLayers[i].handle)){
+            return false;
+        }
     }
     return true;
 }
@@ -301,12 +306,6 @@
 
     hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
     hwc_rect_t dst = layer->displayFrame;
-
-    if(dst.left < 0 || dst.top < 0 || dst.right > hw_w || dst.bottom > hw_h) {
-       hwc_rect_t scissor = {0, 0, hw_w, hw_h };
-       qhwc::calculate_crop_rects(crop, dst, scissor, layer->transform);
-    }
-
     int crop_w = crop.right - crop.left;
     int crop_h = crop.bottom - crop.top;
     int dst_w = dst.right - dst.left;
@@ -429,7 +428,6 @@
         hwc_rect_t dstRect = layer->displayFrame;
         hwc_rect_t srcRect = integerizeSourceCrop(layer->sourceCropf);
         int transform = layer->transform;
-        trimLayer(ctx, mDpy, transform, srcRect, dstRect);
 
         hwc_rect_t res  = getIntersection(visibleRect, dstRect);
 
@@ -489,7 +487,6 @@
             int transform = list->hwLayers[index].transform;
 
             /* Intersect against display boundaries */
-            trimLayer(ctx, mDpy, transform, srcRect, dstRect);
             roi = getUnion(roi, dstRect);
         }
     }
@@ -591,18 +588,8 @@
     mCurrentFrame.mdpCount = mCurrentFrame.layerCount - mCurrentFrame.fbCount -
         mCurrentFrame.dropCount;
 
-    if(mCurrentFrame.mdpCount > sMaxPipesPerMixer) {
-        ALOGD_IF(isDebug(), "%s: Exceeds MAX_PIPES_PER_MIXER",__FUNCTION__);
-        return false;
-    }
-
-    if(!arePipesAvailable(ctx, list)) {
-        return false;
-    }
-
-    uint32_t size = calcMDPBytesRead(ctx, list);
-    if(!bandwidthCheck(ctx, size)) {
-        ALOGD_IF(isDebug(), "%s: Exceeds bandwidth",__FUNCTION__);
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
         return false;
     }
 
@@ -618,7 +605,8 @@
 
     bool ret = false;
     if(isLoadBasedCompDoable(ctx, list)) {
-        ret = loadBasedComp(ctx, list);
+        ret = loadBasedCompPreferGPU(ctx, list) ||
+                loadBasedCompPreferMDP(ctx, list);
     }
 
     if(!ret) {
@@ -662,32 +650,24 @@
         return false;
     }
 
-    if(mdpCount > (sMaxPipesPerMixer - 1)) { // -1 since FB is used
-        ALOGD_IF(isDebug(), "%s: Exceeds MAX_PIPES_PER_MIXER",__FUNCTION__);
-        return false;
-    }
-
-    if(!arePipesAvailable(ctx, list)) {
-        return false;
-    }
-
-    uint32_t size = calcMDPBytesRead(ctx, list);
-    if(!bandwidthCheck(ctx, size)) {
-        ALOGD_IF(isDebug(), "%s: Exceeds bandwidth",__FUNCTION__);
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
         return false;
     }
 
     return true;
 }
 
-bool MDPComp::loadBasedComp(hwc_context_t *ctx,
+bool MDPComp::loadBasedCompPreferGPU(hwc_context_t *ctx,
         hwc_display_contents_1_t* list) {
     int numAppLayers = ctx->listStats[mDpy].numAppLayers;
     mCurrentFrame.reset(numAppLayers);
 
-    //TODO BatchSize could be optimized further based on available pipes, split
-    //displays etc.
-    const int batchSize = numAppLayers - (sMaxPipesPerMixer - 1);
+    int stagesForMDP = min(sMaxPipesPerMixer, ctx->mOverlay->availablePipes(
+            mDpy, Overlay::MIXER_DEFAULT));
+    //If MDP has X possible stages, it can take X layers.
+    const int batchSize = numAppLayers - (stagesForMDP - 1); //1 for FB
+
     if(batchSize <= 0) {
         ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
         return false;
@@ -733,7 +713,8 @@
     mCurrentFrame.fbCount = batchSize;
     mCurrentFrame.mdpCount = mCurrentFrame.layerCount - batchSize;
 
-    if(!arePipesAvailable(ctx, list)) {
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
         return false;
     }
 
@@ -742,6 +723,59 @@
     return true;
 }
 
+bool MDPComp::loadBasedCompPreferMDP(hwc_context_t *ctx,
+        hwc_display_contents_1_t* list) {
+    const int numAppLayers = ctx->listStats[mDpy].numAppLayers;
+    //TODO get the ib from sysfs node.
+    //Full screen is from ib perspective, not actual full screen
+    const int bpp = 4;
+    double panelRefRate =
+                1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
+
+    double bwLeft = sMaxBw - sBwClaimed;
+
+    const int fullScreenLayers = bwLeft * 1000000000 / (ctx->dpyAttr[mDpy].xres
+            * ctx->dpyAttr[mDpy].yres * bpp * panelRefRate);
+
+    const int fbBatchSize = numAppLayers - (fullScreenLayers - 1);
+    //If batch size is not at least 2, we aren't really preferring MDP, since
+    //only 1 layer going to GPU could actually translate into an entire FB
+    //needed to be fetched by MDP, thus needing more b/w rather than less.
+    if(fbBatchSize < 2 || fbBatchSize > numAppLayers) {
+        ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
+        return false;
+    }
+
+    //Top-most layers constitute FB batch
+    const int fbBatchStart = numAppLayers - fbBatchSize;
+
+    //Bottom-most layers constitute MDP batch
+    for(int i = 0; i < fbBatchStart; i++) {
+        hwc_layer_1_t* layer = &list->hwLayers[i];
+        if(not isSupportedForMDPComp(ctx, layer)) {
+            ALOGD_IF(isDebug(), "%s: MDP unsupported layer found at %d",
+                    __FUNCTION__, i);
+            return false;
+        }
+        mCurrentFrame.isFBComposed[i] = false;
+    }
+
+    mCurrentFrame.fbZ = fbBatchStart;
+    mCurrentFrame.fbCount = fbBatchSize;
+    mCurrentFrame.mdpCount = mCurrentFrame.layerCount - fbBatchSize;
+
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
+        return false;
+    }
+
+    ALOGD_IF(isDebug(), "%s: FB Z %d, num app layers %d, MDP Batch Size %d",
+                __FUNCTION__, mCurrentFrame.fbZ, numAppLayers,
+                numAppLayers - fbBatchSize);
+
+    return true;
+}
+
 bool MDPComp::isLoadBasedCompDoable(hwc_context_t *ctx,
         hwc_display_contents_1_t* list) {
     if(mDpy or isSecurePresent(ctx, mDpy) or
@@ -758,7 +792,6 @@
     mCurrentFrame.reset(numAppLayers);
     updateYUV(ctx, list, secureOnly);
     int mdpCount = mCurrentFrame.mdpCount;
-    int fbNeeded = (mCurrentFrame.fbCount != 0);
 
     if(!isYuvPresent(ctx, mDpy)) {
         return false;
@@ -773,18 +806,8 @@
     if(!mdpCount)
         return false;
 
-    if(mdpCount > (sMaxPipesPerMixer - fbNeeded)) {
-        ALOGD_IF(isDebug(), "%s: Exceeds MAX_PIPES_PER_MIXER",__FUNCTION__);
-        return false;
-    }
-
-    if(!arePipesAvailable(ctx, list)) {
-        return false;
-    }
-
-    uint32_t size = calcMDPBytesRead(ctx, list);
-    if(!bandwidthCheck(ctx, size)) {
-        ALOGD_IF(isDebug(), "%s: Exceeds bandwidth",__FUNCTION__);
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
         return false;
     }
 
@@ -1000,7 +1023,6 @@
             mCurrentFrame.isFBComposed[i] = true;
         } else {
             mCurrentFrame.isFBComposed[i] = false;
-            mCachedFrame.hnd[i] = list->hwLayers[i].handle;
         }
     }
 
@@ -1108,6 +1130,27 @@
     return true;
 }
 
+bool MDPComp::resourceCheck(hwc_context_t *ctx,
+        hwc_display_contents_1_t *list) {
+    const bool fbUsed = mCurrentFrame.fbCount;
+    if(mCurrentFrame.mdpCount > sMaxPipesPerMixer - fbUsed) {
+        ALOGD_IF(isDebug(), "%s: Exceeds MAX_PIPES_PER_MIXER",__FUNCTION__);
+        return false;
+    }
+
+    if(!arePipesAvailable(ctx, list)) {
+        return false;
+    }
+
+    uint32_t size = calcMDPBytesRead(ctx, list);
+    if(!bandwidthCheck(ctx, size)) {
+        ALOGD_IF(isDebug(), "%s: Exceeds bandwidth",__FUNCTION__);
+        return false;
+    }
+
+    return true;
+}
+
 uint32_t MDPComp::calcMDPBytesRead(hwc_context_t *ctx,
         hwc_display_contents_1_t* list) {
     uint32_t size = 0;
@@ -1122,7 +1165,6 @@
             if (hnd) {
                 hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
                 hwc_rect_t dst = layer->displayFrame;
-                trimLayer(ctx, mDpy, layer->transform, crop, dst);
                 float bpp = ((float)hnd->size) / (hnd->width * hnd->height);
                 size += bpp * (crop.right - crop.left) *
                     (crop.bottom - crop.top) *
@@ -1145,10 +1187,10 @@
     //Will be added for other targets if we run into bandwidth issues and when
     //we have profiling data to set an upper limit.
     if(qdutils::MDPVersion::getInstance().is8x74v2()) {
-        const uint32_t ONE_GIG = 1024 * 1024 * 1024;
+        const uint32_t ONE_GIG = 1000 * 1000 * 1000;
         double panelRefRate =
                 1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
-        if((size + sCompBytesClaimed) > ((sMaxBw / panelRefRate) * ONE_GIG)) {
+        if((size * panelRefRate) > ((sMaxBw - sBwClaimed) * ONE_GIG)) {
             return false;
         }
     }
@@ -1208,7 +1250,7 @@
         } else { //Success
             //Any change in composition types needs an FB refresh
             mCurrentFrame.needsRedraw = false;
-            if(!mCachedFrame.isSameFrame(mCurrentFrame) ||
+            if(!mCachedFrame.isSameFrame(mCurrentFrame, list) ||
                      (list->flags & HWC_GEOMETRY_CHANGED) ||
                      isSkipPresent(ctx, mDpy)) {
                 mCurrentFrame.needsRedraw = true;
@@ -1263,7 +1305,9 @@
     }
 
 exit:
-    sCompBytesClaimed += calcMDPBytesRead(ctx, list);
+    //gbps (bytes / nanosec = gigabytes / sec)
+    sBwClaimed += calcMDPBytesRead(ctx, list) /
+            (double)ctx->dpyAttr[mDpy].vsync_period;
     return ret;
 }
 
diff --git a/libhwcomposer/hwc_mdpcomp.h b/libhwcomposer/hwc_mdpcomp.h
index 1d5d715..adf74bb 100644
--- a/libhwcomposer/hwc_mdpcomp.h
+++ b/libhwcomposer/hwc_mdpcomp.h
@@ -52,7 +52,7 @@
     /* Initialize MDP comp*/
     static bool init(hwc_context_t *ctx);
     static void resetIdleFallBack() { sIdleFallBack = false; }
-    static void reset() { sCompBytesClaimed = 0; };
+    static void reset() { sBwClaimed = 0.0; };
 
 protected:
     enum { MAX_SEC_LAYERS = 1 }; //TODO add property support
@@ -118,7 +118,8 @@
         void reset();
         void cacheAll(hwc_display_contents_1_t* list);
         void updateCounts(const FrameInfo&);
-        bool isSameFrame(const FrameInfo& curFrame);
+        bool isSameFrame(const FrameInfo& curFrame,
+                         hwc_display_contents_1_t* list);
     };
 
     /* allocates pipe from pipe book */
@@ -146,8 +147,16 @@
     bool partialMDPComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
     /* Partial MDP comp that uses caching to save power as primary goal */
     bool cacheBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
-    /* Partial MDP comp that uses number of pixels to optimize perf goal */
-    bool loadBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
+    /* Partial MDP comp that prefers GPU perf-wise. Since the GPU's
+     * perf is proportional to the pixels it processes, we use the number of
+     * pixels as a heuristic */
+    bool loadBasedCompPreferGPU(hwc_context_t *ctx,
+            hwc_display_contents_1_t* list);
+    /* Partial MDP comp that prefers MDP perf-wise. Since the MDP's perf is
+     * proportional to the bandwidth, overlaps it sees, we use that as a
+     * heuristic */
+    bool loadBasedCompPreferMDP(hwc_context_t *ctx,
+            hwc_display_contents_1_t* list);
     /* Checks if its worth doing load based partial comp */
     bool isLoadBasedCompDoable(hwc_context_t *ctx,
             hwc_display_contents_1_t* list);
@@ -192,6 +201,7 @@
     bool programYUV(hwc_context_t *ctx, hwc_display_contents_1_t* list);
     void reset(const int& numAppLayers, hwc_display_contents_1_t* list);
     bool isSupportedForMDPComp(hwc_context_t *ctx, hwc_layer_1_t* layer);
+    bool resourceCheck(hwc_context_t *ctx, hwc_display_contents_1_t *list);
 
     int mDpy;
     static bool sEnabled;
@@ -203,9 +213,9 @@
     static int sMaxPipesPerMixer;
     //Max bandwidth. Value is in GBPS. For ex: 2.3 means 2.3GBPS
     static float sMaxBw;
-    //Tracks composition bytes claimed. Represented as the total w*h*bpp
-    //going to MDP mixers
-    static uint32_t sCompBytesClaimed;
+    //Tracks composition bandwidth claimed. Represented as the total
+    //w*h*bpp*fps (gigabytes-per-second) going to MDP mixers.
+    static double sBwClaimed;
     static IdleInvalidator *idleInvalidator;
     struct FrameInfo mCurrentFrame;
     struct LayerCache mCachedFrame;
diff --git a/libhwcomposer/hwc_utils.cpp b/libhwcomposer/hwc_utils.cpp
index 9d1e30c..f32ad34 100644
--- a/libhwcomposer/hwc_utils.cpp
+++ b/libhwcomposer/hwc_utils.cpp
@@ -612,7 +612,6 @@
 
     hwc_rect_t displayFrame  = layer->displayFrame;
     hwc_rect_t sourceCrop = integerizeSourceCrop(layer->sourceCropf);
-    trimLayer(ctx, dpy, layer->transform, sourceCrop, displayFrame);
 
     dst_w = displayFrame.right - displayFrame.left;
     dst_h = displayFrame.bottom - displayFrame.top;
@@ -640,7 +639,6 @@
     hwc_rect_t sourceCrop = integerizeSourceCrop(layer->sourceCropf);
     hwc_rect_t displayFrame  = layer->displayFrame;
     private_handle_t *hnd = (private_handle_t *)layer->handle;
-    trimLayer(ctx, dpy, layer->transform, sourceCrop, displayFrame);
 
     cropL = sourceCrop;
     dstL = displayFrame;
@@ -701,8 +699,35 @@
     return false;
 }
 
+static void trimLayer(hwc_context_t *ctx, const int& dpy, const int& transform,
+        hwc_rect_t& crop, hwc_rect_t& dst) {
+    int hw_w = ctx->dpyAttr[dpy].xres;
+    int hw_h = ctx->dpyAttr[dpy].yres;
+    if(dst.left < 0 || dst.top < 0 ||
+            dst.right > hw_w || dst.bottom > hw_h) {
+        hwc_rect_t scissor = {0, 0, hw_w, hw_h };
+        qhwc::calculate_crop_rects(crop, dst, scissor, transform);
+    }
+}
+
+static void trimList(hwc_context_t *ctx, hwc_display_contents_1_t *list,
+        const int& dpy) {
+    for(uint32_t i = 0; i < list->numHwLayers - 1; i++) {
+        hwc_layer_1_t *layer = &list->hwLayers[i];
+        hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
+        trimLayer(ctx, dpy,
+                list->hwLayers[i].transform,
+                (hwc_rect_t&)crop,
+                (hwc_rect_t&)list->hwLayers[i].displayFrame);
+        layer->sourceCropf.left = crop.left;
+        layer->sourceCropf.right = crop.right;
+        layer->sourceCropf.top = crop.top;
+        layer->sourceCropf.bottom = crop.bottom;
+    }
+}
+
 void setListStats(hwc_context_t *ctx,
-        const hwc_display_contents_1_t *list, int dpy) {
+        hwc_display_contents_1_t *list, int dpy) {
     const int prevYuvCount = ctx->listStats[dpy].yuvCount;
     memset(&ctx->listStats[dpy], 0, sizeof(ListStats));
     ctx->listStats[dpy].numAppLayers = list->numHwLayers - 1;
@@ -719,6 +744,7 @@
                       (int)ctx->dpyAttr[dpy].xres, (int)ctx->dpyAttr[dpy].yres);
     ctx->listStats[dpy].secureUI = false;
 
+    trimList(ctx, list, dpy);
     optimizeLayerRects(ctx, list, dpy);
 
     for (size_t i = 0; i < (size_t)ctx->listStats[dpy].numAppLayers; i++) {
@@ -1223,17 +1249,6 @@
     return ret;
 }
 
-void trimLayer(hwc_context_t *ctx, const int& dpy, const int& transform,
-        hwc_rect_t& crop, hwc_rect_t& dst) {
-    int hw_w = ctx->dpyAttr[dpy].xres;
-    int hw_h = ctx->dpyAttr[dpy].yres;
-    if(dst.left < 0 || dst.top < 0 ||
-            dst.right > hw_w || dst.bottom > hw_h) {
-        hwc_rect_t scissor = {0, 0, hw_w, hw_h };
-        qhwc::calculate_crop_rects(crop, dst, scissor, transform);
-    }
-}
-
 void setMdpFlags(hwc_layer_1_t *layer,
         ovutils::eMdpFlags &mdpFlags,
         int rotDownscale, int transform) {
@@ -1442,7 +1457,6 @@
     }
 
     setMdpFlags(layer, mdpFlags, downscale, transform);
-    trimLayer(ctx, dpy, transform, crop, dst);
 
     if(isYuvBuffer(hnd) && //if 90 component or downscale, use rot
             ((transform & HWC_TRANSFORM_ROT_90) || downscale)) {
@@ -1559,7 +1573,6 @@
         }
     }
 
-
     setMdpFlags(layer, mdpFlagsL, 0, transform);
 
     if(lDest != OV_INVALID && rDest != OV_INVALID) {
@@ -1567,8 +1580,6 @@
         setMdpFlags(mdpFlagsL, OV_MDSS_MDP_DUAL_PIPE);
     }
 
-    trimLayer(ctx, dpy, transform, crop, dst);
-
     //Will do something only if feature enabled and conditions suitable
     //hollow call otherwise
     if(ctx->mAD->prepare(ctx, crop, whf, hnd)) {
diff --git a/libhwcomposer/hwc_utils.h b/libhwcomposer/hwc_utils.h
index 3dc327f..66a02d6 100644
--- a/libhwcomposer/hwc_utils.h
+++ b/libhwcomposer/hwc_utils.h
@@ -187,7 +187,7 @@
 // -----------------------------------------------------------------------------
 // Utility functions - implemented in hwc_utils.cpp
 void dumpLayer(hwc_layer_1_t const* l);
-void setListStats(hwc_context_t *ctx, const hwc_display_contents_1_t *list,
+void setListStats(hwc_context_t *ctx, hwc_display_contents_1_t *list,
         int dpy);
 void initContext(hwc_context_t *ctx);
 void closeContext(hwc_context_t *ctx);
@@ -254,10 +254,6 @@
 int hwc_sync(hwc_context_t *ctx, hwc_display_contents_1_t* list, int dpy,
         int fd);
 
-//Trims a layer's source crop which is outside of screen boundary.
-void trimLayer(hwc_context_t *ctx, const int& dpy, const int& transform,
-        hwc_rect_t& crop, hwc_rect_t& dst);
-
 //Sets appropriate mdp flags for a layer.
 void setMdpFlags(hwc_layer_1_t *layer,
         ovutils::eMdpFlags &mdpFlags,
diff --git a/liboverlay/Android.mk b/liboverlay/Android.mk
index 560b57f..d8c2ab5 100644
--- a/liboverlay/Android.mk
+++ b/liboverlay/Android.mk
@@ -6,8 +6,12 @@
 LOCAL_MODULE_PATH             := $(TARGET_OUT_SHARED_LIBRARIES)
 LOCAL_MODULE_TAGS             := optional
 LOCAL_C_INCLUDES              := $(common_includes) $(kernel_includes)
-LOCAL_SHARED_LIBRARIES        := $(common_libs) libqdutils libmemalloc libsync
+LOCAL_SHARED_LIBRARIES        := $(common_libs) libqdutils libmemalloc \
+                                 libsync libdl
 LOCAL_CFLAGS                  := $(common_flags) -DLOG_TAG=\"qdoverlay\"
+ifeq ($(TARGET_USES_QSEED_SCALAR),true)
+    LOCAL_CFLAGS += -DUSES_QSEED_SCALAR
+endif
 LOCAL_ADDITIONAL_DEPENDENCIES := $(common_deps)
 LOCAL_SRC_FILES := \
       overlay.cpp \
diff --git a/liboverlay/overlay.cpp b/liboverlay/overlay.cpp
index b095e9e..9222af5 100644
--- a/liboverlay/overlay.cpp
+++ b/liboverlay/overlay.cpp
@@ -27,16 +27,23 @@
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include <dlfcn.h>
 #include "overlay.h"
 #include "pipes/overlayGenPipe.h"
 #include "mdp_version.h"
 #include "qdMetaData.h"
 
+#ifdef USES_QSEED_SCALAR
+#include <scale/scale.h>
+using namespace scale;
+#endif
+
 #define PIPE_DEBUG 0
 
 namespace overlay {
 using namespace utils;
 
+
 Overlay::Overlay() {
     PipeBook::NUM_PIPES = qdutils::MDPVersion::getInstance().getTotalPipes();
     for(int i = 0; i < PipeBook::NUM_PIPES; i++) {
@@ -44,12 +51,14 @@
     }
 
     mDumpStr[0] = '\0';
+    initScalar();
 }
 
 Overlay::~Overlay() {
     for(int i = 0; i < PipeBook::NUM_PIPES; i++) {
         mPipeBook[i].destroy();
     }
+    destroyScalar();
 }
 
 void Overlay::configBegin() {
@@ -60,6 +69,13 @@
     }
     sForceSetBitmap = 0;
     mDumpStr[0] = '\0';
+
+#ifdef USES_QSEED_SCALAR
+    Scale *scalar = getScalar();
+    if(scalar) {
+        scalar->configBegin();
+    }
+#endif
 }
 
 void Overlay::configDone() {
@@ -81,6 +97,13 @@
     }
     dump();
     PipeBook::save();
+
+#ifdef USES_QSEED_SCALAR
+    Scale *scalar = getScalar();
+    if(scalar) {
+        scalar->configDone();
+    }
+#endif
 }
 
 eDest Overlay::nextPipe(eMdpPipeType type, int dpy, int mixer) {
@@ -381,6 +404,42 @@
     }
 }
 
+void Overlay::initScalar() {
+#ifdef USES_QSEED_SCALAR
+    if(sLibScaleHandle == NULL) {
+        sLibScaleHandle = dlopen("libscale.so", RTLD_NOW);
+    }
+
+    if(sLibScaleHandle) {
+        if(sScale == NULL) {
+            Scale* (*getInstance)();
+            *(void **) &getInstance = dlsym(sLibScaleHandle, "getInstance");
+            if(getInstance) {
+                sScale = getInstance();
+            }
+        }
+    }
+#endif
+}
+
+void Overlay::destroyScalar() {
+#ifdef USES_QSEED_SCALAR
+    if(sLibScaleHandle) {
+        if(sScale) {
+            void (*destroyInstance)(Scale*);
+            *(void **) &destroyInstance = dlsym(sLibScaleHandle,
+                    "destroyInstance");
+            if(destroyInstance) {
+                destroyInstance(sScale);
+                sScale = NULL;
+            }
+        }
+        dlclose(sLibScaleHandle);
+        sLibScaleHandle = NULL;
+    }
+#endif
+}
+
 void Overlay::PipeBook::init() {
     mPipe = NULL;
     mDisplay = DPY_UNUSED;
@@ -406,5 +465,7 @@
 int Overlay::PipeBook::sAllocatedBitmap = 0;
 utils::eMdpPipeType Overlay::PipeBook::pipeTypeLUT[utils::OV_MAX] =
     {utils::OV_MDP_PIPE_ANY};
+void *Overlay::sLibScaleHandle = NULL;
+scale::Scale *Overlay::sScale = NULL;
 
 }; // namespace overlay
diff --git a/liboverlay/overlay.h b/liboverlay/overlay.h
index c16f6e6..854fa30 100644
--- a/liboverlay/overlay.h
+++ b/liboverlay/overlay.h
@@ -34,6 +34,9 @@
 #include "utils/threads.h"
 
 struct MetaData_t;
+namespace scale {
+class Scale;
+};
 
 namespace overlay {
 class GenericPipe;
@@ -111,6 +114,8 @@
     static int getFbForDpy(const int& dpy);
     static bool displayCommit(const int& fd, const utils::Dim& roi);
     static bool displayCommit(const int& fd);
+    /* Returns the scalar object */
+    static scale::Scale *getScalar();
 
 private:
     /* Ctor setup */
@@ -118,6 +123,10 @@
     /*Validate index range, abort if invalid */
     void validate(int index);
     void dump() const;
+    /* Creates a scalar object using libscale.so */
+    static void initScalar();
+    /* Destroys the scalar object using libscale.so */
+    static void destroyScalar();
 
     /* Just like a Facebook for pipes, but much less profile info */
     struct PipeBook {
@@ -177,6 +186,8 @@
     static int sDpyFbMap[DPY_MAX];
     static int sDMAMode;
     static int sForceSetBitmap;
+    static void *sLibScaleHandle;
+    static scale::Scale *sScale;
 };
 
 inline void Overlay::validate(int index) {
@@ -249,6 +260,10 @@
     sForceSetBitmap |= (1 << dpy);
 }
 
+inline scale::Scale *Overlay::getScalar() {
+    return sScale;
+}
+
 inline bool Overlay::PipeBook::valid() {
     return (mPipe != NULL);
 }
diff --git a/liboverlay/overlayCtrlData.h b/liboverlay/overlayCtrlData.h
index c3a7aa3..6746792 100644
--- a/liboverlay/overlayCtrlData.h
+++ b/liboverlay/overlayCtrlData.h
@@ -53,7 +53,7 @@
     /* dtor close */
     ~Ctrl();
     /* init fd etc*/
-    bool init(uint32_t fbnum);
+    bool init(uint32_t dpy);
     /* close underlying mdp */
     bool close();
 
@@ -100,7 +100,7 @@
     /* calls close */
     ~Data();
     /* init fd etc */
-    bool init(uint32_t fbnum);
+    bool init(uint32_t dpy);
     /* calls underlying mdp close */
     bool close();
     /* set overlay pipe id in the mdp struct */
@@ -145,10 +145,10 @@
     return true;
 }
 
-inline bool Ctrl::init(uint32_t fbnum) {
+inline bool Ctrl::init(uint32_t dpy) {
     // MDP/FD init
-    if(!mMdp.init(fbnum)) {
-        ALOGE("Ctrl failed to init fbnum=%d", fbnum);
+    if(!mMdp.init(dpy)) {
+        ALOGE("Ctrl failed to init dpy=%d", dpy);
         return false;
     }
     return true;
@@ -239,8 +239,8 @@
 
 inline int Data::getPipeId() const { return mMdp.getPipeId(); }
 
-inline bool Data::init(uint32_t fbnum) {
-    if(!mMdp.init(fbnum)) {
+inline bool Data::init(uint32_t dpy) {
+    if(!mMdp.init(dpy)) {
         ALOGE("Data cannot init mdp");
         return false;
     }
diff --git a/liboverlay/overlayMdp.cpp b/liboverlay/overlayMdp.cpp
index 674e62d..b4058bd 100644
--- a/liboverlay/overlayMdp.cpp
+++ b/liboverlay/overlayMdp.cpp
@@ -20,6 +20,12 @@
 #include "overlayUtils.h"
 #include "overlayMdp.h"
 #include "mdp_version.h"
+#include <overlay.h>
+
+#ifdef USES_QSEED_SCALAR
+#include <scale/scale.h>
+using namespace scale;
+#endif
 
 #define HSIC_SETTINGS_DEBUG 0
 
@@ -39,13 +45,20 @@
 namespace ovutils = overlay::utils;
 namespace overlay {
 
-bool MdpCtrl::init(uint32_t fbnum) {
+bool MdpCtrl::init(uint32_t dpy) {
+    int fbnum = Overlay::getFbForDpy(dpy);
+    if( fbnum < 0 ) {
+        ALOGE("%s: Invalid FB for the display: %d",__FUNCTION__, dpy);
+        return false;
+    }
+
     // FD init
     if(!utils::openDev(mFd, fbnum,
                 Res::fbPath, O_RDWR)){
         ALOGE("Ctrl failed to init fbnum=%d", fbnum);
         return false;
     }
+    mDpy = dpy;
     return true;
 }
 
@@ -57,6 +70,7 @@
     mOrientation = utils::OVERLAY_TRANSFORM_0;
     mDownscale = 0;
     mForceSet = false;
+    mDpy = 0;
 #ifdef USES_POST_PROCESSING
     mPPChanged = false;
     memset(&mParams, 0, sizeof(struct compute_params));
@@ -201,11 +215,22 @@
             mdp_wrapper::dump("== Bad OVInfo is: ", mOVInfo);
             mdp_wrapper::dump("== Last good known OVInfo is: ", mLkgo);
             this->restore();
+#ifdef USES_QSEED_SCALAR
+            if(Overlay::getScalar()) {
+                Overlay::getScalar()->configAbort(mDpy);
+            }
+#endif
             return false;
         }
         this->save();
     }
 
+#ifdef USES_QSEED_SCALAR
+    if(Overlay::getScalar()) {
+        Overlay::getScalar()->configSet(mOVInfo, mDpy, mFd.getFD());
+    }
+#endif
+
     return true;
 }
 
@@ -371,4 +396,21 @@
     return true;
 }
 
+
+//// MdpData ////////////
+bool MdpData::init(uint32_t dpy) {
+    int fbnum = Overlay::getFbForDpy(dpy);
+    if( fbnum < 0 ) {
+        ALOGE("%s: Invalid FB for the display: %d",__FUNCTION__, dpy);
+        return false;
+    }
+
+    // FD init
+    if(!utils::openDev(mFd, fbnum, Res::fbPath, O_RDWR)){
+        ALOGE("Ctrl failed to init fbnum=%d", fbnum);
+        return false;
+    }
+    return true;
+}
+
 } // overlay
diff --git a/liboverlay/overlayMdp.h b/liboverlay/overlayMdp.h
index 5bfec6b..fe4ad69 100644
--- a/liboverlay/overlayMdp.h
+++ b/liboverlay/overlayMdp.h
@@ -39,8 +39,8 @@
     explicit MdpCtrl();
     /* dtor close */
     ~MdpCtrl();
-    /* init underlying device using fbnum */
-    bool init(uint32_t fbnum);
+    /* init underlying device using fbnum for dpy */
+    bool init(uint32_t dpy);
     /* unset overlay, reset and close fd */
     bool close();
     /* reset and set ov id to -1 / MSMFB_NEW_REQUEST */
@@ -132,6 +132,7 @@
     OvFD          mFd;
     int mDownscale;
     bool mForceSet;
+    int mDpy;
 
 #ifdef USES_POST_PROCESSING
     /* PP Compute Params */
@@ -174,7 +175,7 @@
     /* dtor close*/
     ~MdpData();
     /* init FD */
-    bool init(uint32_t fbnum);
+    bool init(uint32_t dpy);
     /* memset0 the underlying mdp object */
     void reset();
     /* close fd, and reset */
@@ -395,15 +396,6 @@
 
 inline MdpData::~MdpData() { close(); }
 
-inline bool MdpData::init(uint32_t fbnum) {
-    // FD init
-    if(!utils::openDev(mFd, fbnum, Res::fbPath, O_RDWR)){
-        ALOGE("Ctrl failed to init fbnum=%d", fbnum);
-        return false;
-    }
-    return true;
-}
-
 inline void MdpData::reset() {
     overlay::utils::memset0(mOvData);
     mOvData.data.memory_id = -1;
diff --git a/liboverlay/pipes/overlayGenPipe.cpp b/liboverlay/pipes/overlayGenPipe.cpp
index 35f686c..06e8257 100644
--- a/liboverlay/pipes/overlayGenPipe.cpp
+++ b/liboverlay/pipes/overlayGenPipe.cpp
@@ -28,7 +28,6 @@
 */
 
 #include "overlayGenPipe.h"
-#include "overlay.h"
 #include "mdp_version.h"
 
 namespace overlay {
@@ -47,20 +46,12 @@
     ALOGE_IF(DEBUG_OVERLAY, "GenericPipe init");
     mRotDownscaleOpt = false;
 
-    int fbNum = Overlay::getFbForDpy(mDpy);
-    if( fbNum < 0 ) {
-        ALOGE("%s: Invalid FB for the display: %d",__FUNCTION__, mDpy);
-        return false;
-    }
-
-    ALOGD_IF(DEBUG_OVERLAY,"%s: mFbNum:%d",__FUNCTION__, fbNum);
-
-    if(!mCtrlData.ctrl.init(fbNum)) {
+    if(!mCtrlData.ctrl.init(mDpy)) {
         ALOGE("GenericPipe failed to init ctrl");
         return false;
     }
 
-    if(!mCtrlData.data.init(fbNum)) {
+    if(!mCtrlData.data.init(mDpy)) {
         ALOGE("GenericPipe failed to init data");
         return false;
     }
diff --git a/libqdutils/mdp_version.cpp b/libqdutils/mdp_version.cpp
index 259d078..1850801 100644
--- a/libqdutils/mdp_version.cpp
+++ b/libqdutils/mdp_version.cpp
@@ -241,11 +241,17 @@
 }
 
 bool MDPVersion::is8x74v2() {
-    if( mMdpRev >= MDSS_MDP_HW_REV_102 && mMdpRev < MDSS_MDP_HW_REV_103) {
+    if( mMdpRev >= MDSS_MDP_HW_REV_102 && mMdpRev < MDSS_MDP_HW_REV_200) {
         return true;
     }
     return false;
 }
 
+bool MDPVersion::is8x92() {
+    if( mMdpRev >= MDSS_MDP_HW_REV_200 && mMdpRev < MDSS_MDP_HW_REV_206) {
+        return true;
+    }
+    return false;
+}
 }; //namespace qdutils
 
diff --git a/libqdutils/mdp_version.h b/libqdutils/mdp_version.h
index a14592b..b995582 100644
--- a/libqdutils/mdp_version.h
+++ b/libqdutils/mdp_version.h
@@ -57,7 +57,8 @@
     MDSS_MDP_HW_REV_100 = 0x10000000, //8974 v1
     MDSS_MDP_HW_REV_101 = 0x10010000, //8x26
     MDSS_MDP_HW_REV_102 = 0x10020000, //8974 v2
-    MDSS_MDP_HW_REV_103 = 0x10030000, //Future
+    MDSS_MDP_HW_REV_200 = 0x20000000, //8092
+    MDSS_MDP_HW_REV_206 = 0x20060000, //Future
 };
 
 enum {
@@ -103,6 +104,7 @@
     bool supportsBWC();
     bool is8x26();
     bool is8x74v2();
+    bool is8x92();
     int getLeftSplit() { return mSplit.left(); }
     int getRightSplit() { return mSplit.right(); }
 private: