hwc: Iterative Load based comp
While performing load based mixed composition, iterate from max
stages of mixer to at least 2.
Get rid of modes that preferred GPU or MDP. This new mode will
include both plus the missed out batches.
Change-Id: Ideb06a2332fe2f59968367d28df848be01f4aedc
diff --git a/libhwcomposer/hwc_mdpcomp.cpp b/libhwcomposer/hwc_mdpcomp.cpp
index 469ffba..2356c9c 100644
--- a/libhwcomposer/hwc_mdpcomp.cpp
+++ b/libhwcomposer/hwc_mdpcomp.cpp
@@ -44,8 +44,6 @@
bool MDPComp::sEnableMixedMode = true;
bool MDPComp::sEnablePartialFrameUpdate = false;
int MDPComp::sMaxPipesPerMixer = MAX_PIPES_PER_MIXER;
-double MDPComp::sMaxBw = 0.0;
-double MDPComp::sBwClaimed = 0.0;
bool MDPComp::sEnable4k2kYUVSplit = false;
MDPComp* MDPComp::getObject(hwc_context_t *ctx, const int& dpy) {
@@ -663,13 +661,11 @@
bool ret = false;
if(list->flags & HWC_GEOMETRY_CHANGED) { //Try load based first
- ret = loadBasedCompPreferGPU(ctx, list) or
- loadBasedCompPreferMDP(ctx, list) or
+ ret = loadBasedComp(ctx, list) or
cacheBasedComp(ctx, list);
} else {
ret = cacheBasedComp(ctx, list) or
- loadBasedCompPreferGPU(ctx, list) or
- loadBasedCompPreferMDP(ctx, list);
+ loadBasedComp(ctx, list);
}
return ret;
@@ -725,196 +721,89 @@
return true;
}
-bool MDPComp::loadBasedCompPreferGPU(hwc_context_t *ctx,
- hwc_display_contents_1_t* list) {
- if(not isLoadBasedCompDoable(ctx)) {
- return false;
- }
-
- int numAppLayers = ctx->listStats[mDpy].numAppLayers;
- mCurrentFrame.reset(numAppLayers);
-
- int stagesForMDP = min(sMaxPipesPerMixer, ctx->mOverlay->availablePipes(
- mDpy, Overlay::MIXER_DEFAULT));
- //If MDP has X possible stages, it can take X layers.
- const int batchSize = (numAppLayers - mCurrentFrame.dropCount) -
- (stagesForMDP - 1); //1 for FB
-
- if(batchSize <= 0) {
- ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
- return false;
- }
-
- int minBatchStart = -1;
- int minBatchEnd = -1;
- size_t minBatchPixelCount = SIZE_MAX;
-
- /* Iterate through the layer list to find out a contigous batch of batchSize
- * non-dropped layers with loweest pixel count */
- for(int i = 0; i <= (numAppLayers - batchSize); i++) {
- if(mCurrentFrame.drop[i])
- continue;
-
- int batchCount = batchSize;
- uint32_t batchPixelCount = 0;
- int j = i;
- for(; j < numAppLayers && batchCount; j++){
- if(!mCurrentFrame.drop[j]) {
- hwc_layer_1_t* layer = &list->hwLayers[j];
- hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
- hwc_rect_t dst = layer->displayFrame;
-
- /* If we have a valid ROI, count pixels only for the MDP fetched
- * region of the buffer */
- if((ctx->listStats[mDpy].roi.w != ctx->dpyAttr[mDpy].xres) ||
- (ctx->listStats[mDpy].roi.h != ctx->dpyAttr[mDpy].yres)) {
- hwc_rect_t roi;
- roi.left = ctx->listStats[mDpy].roi.x;
- roi.top = ctx->listStats[mDpy].roi.y;
- roi.right = roi.left + ctx->listStats[mDpy].roi.w;
- roi.bottom = roi.top + ctx->listStats[mDpy].roi.h;
-
- /* valid ROI means no scaling layer is composed. So check
- * only intersection to find actual fetched pixels */
- crop = getIntersection(roi, dst);
- }
-
- batchPixelCount += (crop.right - crop.left) *
- (crop.bottom - crop.top);
- batchCount--;
- }
- }
-
- /* we dont want to program any batch of size lesser than batchSize */
- if(!batchCount && (batchPixelCount < minBatchPixelCount)) {
- minBatchPixelCount = batchPixelCount;
- minBatchStart = i;
- minBatchEnd = j-1;
- }
- }
-
- if(minBatchStart < 0) {
- ALOGD_IF(isDebug(), "%s: No batch found batchSize %d numAppLayers %d",
- __FUNCTION__, batchSize, numAppLayers);
- return false;
- }
-
- /* non-dropped layers falling ouside the selected batch will be marked for
- * MDP */
- for(int i = 0; i < numAppLayers; i++) {
- if((i < minBatchStart || i > minBatchEnd) && !mCurrentFrame.drop[i] ) {
- hwc_layer_1_t* layer = &list->hwLayers[i];
- if(not isSupportedForMDPComp(ctx, layer)) {
- ALOGD_IF(isDebug(), "%s: MDP unsupported layer found at %d",
- __FUNCTION__, i);
- reset(ctx);
- return false;
- }
- mCurrentFrame.isFBComposed[i] = false;
- }
- }
-
- mCurrentFrame.fbZ = minBatchStart;
- mCurrentFrame.fbCount = batchSize;
- mCurrentFrame.mdpCount = mCurrentFrame.layerCount - mCurrentFrame.fbCount -
- mCurrentFrame.dropCount;
-
- ALOGD_IF(isDebug(), "%s: fbZ %d batchSize %d fbStart: %d fbEnd: %d",
- __FUNCTION__, mCurrentFrame.fbZ, batchSize, minBatchStart,
- minBatchEnd);
-
- if(sEnable4k2kYUVSplit){
- adjustForSourceSplit(ctx, list);
- }
-
- if(!postHeuristicsHandling(ctx, list)) {
- ALOGD_IF(isDebug(), "post heuristic handling failed");
- reset(ctx);
- return false;
- }
-
- return true;
-}
-
-bool MDPComp::loadBasedCompPreferMDP(hwc_context_t *ctx,
+bool MDPComp::loadBasedComp(hwc_context_t *ctx,
hwc_display_contents_1_t* list) {
if(not isLoadBasedCompDoable(ctx)) {
return false;
}
const int numAppLayers = ctx->listStats[mDpy].numAppLayers;
- mCurrentFrame.reset(numAppLayers);
+ const int numNonDroppedLayers = numAppLayers - mCurrentFrame.dropCount;
+ const int stagesForMDP = min(sMaxPipesPerMixer,
+ ctx->mOverlay->availablePipes(mDpy, Overlay::MIXER_DEFAULT));
- //Full screen is from ib perspective, not actual full screen
- const int bpp = 4;
- double panelRefRate =
- 1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
+ int mdpBatchSize = stagesForMDP - 1; //1 stage for FB
+ int fbBatchSize = numNonDroppedLayers - mdpBatchSize;
+ int lastMDPSupportedIndex = numAppLayers;
+ int dropCount = 0;
- double bwLeft = sMaxBw - sBwClaimed;
-
- const int fullScreenLayers = bwLeft * 1000000000 / (ctx->dpyAttr[mDpy].xres
- * ctx->dpyAttr[mDpy].yres * bpp * panelRefRate);
-
- const int fbBatchSize = (numAppLayers - mCurrentFrame.dropCount)
- - (fullScreenLayers - 1);
-
- //If batch size is not at least 2, we aren't really preferring MDP, since
- //only 1 layer going to GPU could actually translate into an entire FB
- //needed to be fetched by MDP, thus needing more b/w rather than less.
- if(fbBatchSize < 2 || fbBatchSize > numAppLayers) {
- ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
- return false;
- }
-
- //Find top fbBatchSize non-dropped layers to get your batch
- int fbStart = -1, fbEnd = -1, batchCount = fbBatchSize;
- for(int i = numAppLayers - 1; i >= 0; i--) {
- if(mCurrentFrame.drop[i])
+ //Find the minimum MDP batch size
+ for(int i = 0; i < numAppLayers;i++) {
+ if(mCurrentFrame.drop[i]) {
+ dropCount++;
continue;
-
- if(fbEnd < 0)
- fbEnd = i;
-
- if(!(--batchCount)) {
- fbStart = i;
+ }
+ hwc_layer_1_t* layer = &list->hwLayers[i];
+ if(not isSupportedForMDPComp(ctx, layer)) {
+ lastMDPSupportedIndex = i;
+ mdpBatchSize = min(i - dropCount, stagesForMDP - 1);
+ fbBatchSize = numNonDroppedLayers - mdpBatchSize;
break;
}
}
- //Bottom layers constitute MDP batch
- for(int i = 0; i < fbStart; i++) {
- if((i < fbStart || i > fbEnd) && !mCurrentFrame.drop[i] ) {
- hwc_layer_1_t* layer = &list->hwLayers[i];
- if(not isSupportedForMDPComp(ctx, layer)) {
- ALOGD_IF(isDebug(), "%s: MDP unsupported layer found at %d",
- __FUNCTION__, i);
- reset(ctx);
- return false;
- }
- mCurrentFrame.isFBComposed[i] = false;
- }
+ ALOGD_IF(isDebug(), "%s:Before optimizing fbBatch, mdpbatch %d, fbbatch %d "
+ "dropped %d", __FUNCTION__, mdpBatchSize, fbBatchSize,
+ mCurrentFrame.dropCount);
+
+ //Start at a point where the fb batch should at least have 2 layers, for
+ //this mode to be justified.
+ while(fbBatchSize < 2) {
+ ++fbBatchSize;
+ --mdpBatchSize;
}
- mCurrentFrame.fbZ = fbStart;
- mCurrentFrame.fbCount = fbBatchSize;
- mCurrentFrame.mdpCount = mCurrentFrame.layerCount - mCurrentFrame.fbCount
- - mCurrentFrame.dropCount;
-
- ALOGD_IF(isDebug(), "%s: FB Z %d, app layers %d, non-dropped layers: %d, "
- "MDP Batch Size %d",__FUNCTION__, mCurrentFrame.fbZ, numAppLayers,
- numAppLayers - mCurrentFrame.dropCount, mCurrentFrame.mdpCount);
-
- if(sEnable4k2kYUVSplit){
- adjustForSourceSplit(ctx, list);
- }
-
- if(!postHeuristicsHandling(ctx, list)) {
- ALOGD_IF(isDebug(), "post heuristic handling failed");
- reset(ctx);
+ //If there are no layers for MDP, this mode doesnt make sense.
+ if(mdpBatchSize < 1) {
+ ALOGD_IF(isDebug(), "%s: No MDP layers after optimizing for fbBatch",
+ __FUNCTION__);
return false;
}
- return true;
+ mCurrentFrame.reset(numAppLayers);
+
+ //Try with successively smaller mdp batch sizes until we succeed or reach 1
+ while(mdpBatchSize > 0) {
+ //Mark layers for MDP comp
+ int mdpBatchLeft = mdpBatchSize;
+ for(int i = 0; i < lastMDPSupportedIndex and mdpBatchLeft; i++) {
+ if(mCurrentFrame.drop[i]) {
+ continue;
+ }
+ mCurrentFrame.isFBComposed[i] = false;
+ --mdpBatchLeft;
+ }
+
+ mCurrentFrame.fbZ = mdpBatchSize;
+ mCurrentFrame.fbCount = fbBatchSize;
+ mCurrentFrame.mdpCount = mdpBatchSize;
+
+ ALOGD_IF(isDebug(), "%s:Trying with: mdpbatch %d fbbatch %d dropped %d",
+ __FUNCTION__, mdpBatchSize, fbBatchSize,
+ mCurrentFrame.dropCount);
+
+ if(postHeuristicsHandling(ctx, list)) {
+ ALOGD_IF(isDebug(), "%s: Postheuristics handling succeeded",
+ __FUNCTION__);
+ return true;
+ }
+
+ reset(ctx);
+ --mdpBatchSize;
+ ++fbBatchSize;
+ }
+
+ return false;
}
bool MDPComp::isLoadBasedCompDoable(hwc_context_t *ctx) {
@@ -1322,45 +1211,6 @@
return true;
}
-double MDPComp::calcMDPBytesRead(hwc_context_t *ctx,
- hwc_display_contents_1_t* list) {
- double size = 0;
- const double GIG = 1000000000.0;
-
- //Skip for targets where no device tree value for bw is supplied
- if(sMaxBw <= 0.0) {
- return 0.0;
- }
-
- for (uint32_t i = 0; i < list->numHwLayers - 1; i++) {
- if(!mCurrentFrame.isFBComposed[i]) {
- hwc_layer_1_t* layer = &list->hwLayers[i];
- private_handle_t *hnd = (private_handle_t *)layer->handle;
- if (hnd) {
- hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
- hwc_rect_t dst = layer->displayFrame;
- float bpp = ((float)hnd->size) / (hnd->width * hnd->height);
- size += (bpp * (crop.right - crop.left) *
- (crop.bottom - crop.top) *
- ctx->dpyAttr[mDpy].yres / (dst.bottom - dst.top)) /
- GIG;
- }
- }
- }
-
- if(mCurrentFrame.fbCount) {
- hwc_layer_1_t* layer = &list->hwLayers[list->numHwLayers - 1];
- int tempw, temph;
- size += (getBufferSizeAndDimensions(
- layer->displayFrame.right - layer->displayFrame.left,
- layer->displayFrame.bottom - layer->displayFrame.top,
- HAL_PIXEL_FORMAT_RGBA_8888,
- tempw, temph)) / GIG;
- }
-
- return size;
-}
-
bool MDPComp::hwLimitationsCheck(hwc_context_t* ctx,
hwc_display_contents_1_t* list) {
@@ -1441,13 +1291,6 @@
if(isFrameDoable(ctx)) {
generateROI(ctx, list);
- //Convert from kbps to gbps
- sMaxBw = mdpVersion.getHighBw() / 1000000.0;
- if (ctx->mExtDisplay->isConnected() ||
- ctx->mMDP.panel != MIPI_CMD_PANEL) {
- sMaxBw = mdpVersion.getLowBw() / 1000000.0;
- }
-
if(tryFullFrame(ctx, list) || tryVideoOnly(ctx, list)) {
setMDPCompLayerFlags(ctx, list);
} else {
@@ -1472,9 +1315,6 @@
mCachedFrame.cacheAll(list);
mCachedFrame.updateCounts(mCurrentFrame);
- double panelRefRate =
- 1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
- sBwClaimed += calcMDPBytesRead(ctx, list) * panelRefRate;
return ret;
}
diff --git a/libhwcomposer/hwc_mdpcomp.h b/libhwcomposer/hwc_mdpcomp.h
index 4215881..4e4bea4 100644
--- a/libhwcomposer/hwc_mdpcomp.h
+++ b/libhwcomposer/hwc_mdpcomp.h
@@ -52,7 +52,7 @@
/* Initialize MDP comp*/
static bool init(hwc_context_t *ctx);
static void resetIdleFallBack() { sIdleFallBack = false; }
- static void reset() { sBwClaimed = 0.0; sHandleTimeout = false; };
+ static void reset() { sHandleTimeout = false; };
protected:
enum { MAX_SEC_LAYERS = 1 }; //TODO add property support
@@ -158,16 +158,11 @@
bool partialMDPComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
/* Partial MDP comp that uses caching to save power as primary goal */
bool cacheBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
- /* Partial MDP comp that prefers GPU perf-wise. Since the GPU's
- * perf is proportional to the pixels it processes, we use the number of
- * pixels as a heuristic */
- bool loadBasedCompPreferGPU(hwc_context_t *ctx,
- hwc_display_contents_1_t* list);
- /* Partial MDP comp that prefers MDP perf-wise. Since the MDP's perf is
- * proportional to the bandwidth, overlaps it sees, we use that as a
- * heuristic */
- bool loadBasedCompPreferMDP(hwc_context_t *ctx,
- hwc_display_contents_1_t* list);
+ /* Partial MDP comp that balances the load between MDP and GPU such that
+ * MDP is loaded to the max of its capacity. The lower z order layers are
+ * fed to MDP, whereas the upper ones to GPU, because the upper ones have
+ * lower number of pixels and can reduce GPU processing time */
+ bool loadBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
/* Checks if its worth doing load based partial comp */
bool isLoadBasedCompDoable(hwc_context_t *ctx);
/* checks for conditions where only video can be bypassed */
@@ -176,9 +171,6 @@
bool secureOnly);
/* checks for conditions where YUV layers cannot be bypassed */
bool isYUVDoable(hwc_context_t* ctx, hwc_layer_1_t* layer);
- /* calcs bytes read by MDP in gigs for a given frame */
- double calcMDPBytesRead(hwc_context_t *ctx,
- hwc_display_contents_1_t* list);
/* checks if MDP/MDSS can process current list w.r.to HW limitations
* All peculiar HW limitations should go here */
bool hwLimitationsCheck(hwc_context_t* ctx, hwc_display_contents_1_t* list);
@@ -234,11 +226,6 @@
/* Handles the timeout event from kernel, if the value is set to true */
static bool sHandleTimeout;
static int sMaxPipesPerMixer;
- //Max bandwidth. Value is in GBPS. For ex: 2.3 means 2.3GBPS
- static double sMaxBw;
- //Tracks composition bandwidth claimed. Represented as the total
- //w*h*bpp*fps (gigabytes-per-second) going to MDP mixers.
- static double sBwClaimed;
static IdleInvalidator *idleInvalidator;
struct FrameInfo mCurrentFrame;
struct LayerCache mCachedFrame;