MPEG4Writer: Add AVIF in HEIF container

Bug: b/246835215
Test: treehugger, test in ag/20088992

Change-Id: I5407de348d8d9b8f421edfa1ce97ab04798d6100
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index a78fb30..c93d033 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -157,6 +157,8 @@
     bool isHevc() const { return mIsHevc; }
     bool isAv1() const { return mIsAv1; }
     bool isHeic() const { return mIsHeic; }
+    bool isAvif() const { return mIsAvif; }
+    bool isHeif() const { return mIsHeif; }
     bool isAudio() const { return mIsAudio; }
     bool isMPEG4() const { return mIsMPEG4; }
     bool usePrefix() const { return mIsAvc || mIsHevc || mIsHeic || mIsDovi; }
@@ -325,6 +327,8 @@
     bool mIsAudio;
     bool mIsVideo;
     bool mIsHeic;
+    bool mIsAvif;
+    bool mIsHeif;
     bool mIsMPEG4;
     bool mGotStartKeyFrame;
     bool mIsMalformed;
@@ -550,6 +554,7 @@
     mStreamableFile = false;
     mTimeScale = -1;
     mHasFileLevelMeta = false;
+    mIsAvif = false;
     mFileLevelMetaDataSize = 0;
     mPrimaryItemId = 0;
     mAssociationEntryCount = 0;
@@ -670,6 +675,8 @@
         return "mett";
     } else if (!strcasecmp(MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC, mime)) {
         return "heic";
+    } else if (!strcasecmp(MEDIA_MIMETYPE_IMAGE_AVIF, mime)) {
+        return "avif";
     } else {
         ALOGE("Track (%s) other than video/audio/metadata is not supported", mime);
     }
@@ -714,8 +721,9 @@
     Track *track = new Track(this, source, 1 + mTracks.size());
     mTracks.push_back(track);
 
-    mHasMoovBox |= !track->isHeic();
-    mHasFileLevelMeta |= track->isHeic();
+    mHasMoovBox |= !track->isHeif();
+    mHasFileLevelMeta |= track->isHeif();
+    mIsAvif |= track->isAvif();
 
     return OK;
 }
@@ -797,7 +805,7 @@
 
     for (List<Track *>::iterator it = mTracks.begin();
          it != mTracks.end(); ++it) {
-        if ((*it)->isHeic()) {
+        if ((*it)->isHeif()) {
             metaSize += (*it)->getMetaSizeIncrease(rotation, mTracks.size());
         }
     }
@@ -999,8 +1007,8 @@
         return err;
     }
 
-    ALOGV("muxer starting: mHasMoovBox %d, mHasFileLevelMeta %d",
-            mHasMoovBox, mHasFileLevelMeta);
+    ALOGV("muxer starting: mHasMoovBox %d, mHasFileLevelMeta %d, mIsAvif %d",
+            mHasMoovBox, mHasFileLevelMeta, mIsAvif);
 
     err = startWriterThread();
     if (err != OK) {
@@ -1316,7 +1324,7 @@
         }
 
         // skip image tracks
-        if ((*it)->isHeic()) continue;
+        if ((*it)->isHeif()) continue;
         nonImageTrackCount++;
 
         int64_t durationUs = (*it)->getDurationUs();
@@ -1494,7 +1502,7 @@
     int64_t minCttsOffsetTimeUs = kMaxCttsOffsetTimeUs;
     for (List<Track *>::iterator it = mTracks.begin();
         it != mTracks.end(); ++it) {
-        if (!(*it)->isHeic()) {
+        if (!(*it)->isHeif()) {
             minCttsOffsetTimeUs =
                 std::min(minCttsOffsetTimeUs, (*it)->getMinCttsOffsetTimeUs());
         }
@@ -1510,7 +1518,7 @@
 
     for (List<Track *>::iterator it = mTracks.begin();
         it != mTracks.end(); ++it) {
-        if (!(*it)->isHeic()) {
+        if (!(*it)->isHeif()) {
             (*it)->writeTrackHeader();
         }
     }
@@ -1530,17 +1538,27 @@
         writeFourcc("isom");
         writeFourcc("3gp4");
     } else {
-        // Only write "heic" as major brand if the client specified HEIF
-        // AND we indeed receive some image heic tracks.
+        // Only write "heic"/"avif" as major brand if the client specified HEIF/AVIF
+        // AND we indeed receive some image heic/avif tracks.
         if (fileType == OUTPUT_FORMAT_HEIF && mHasFileLevelMeta) {
-            writeFourcc("heic");
+            if (mIsAvif) {
+                writeFourcc("avif");
+            } else {
+                writeFourcc("heic");
+            }
         } else {
             writeFourcc("mp42");
         }
         writeInt32(0);
         if (mHasFileLevelMeta) {
-            writeFourcc("mif1");
-            writeFourcc("heic");
+            if (mIsAvif) {
+                writeFourcc("mif1");
+                writeFourcc("miaf");
+                writeFourcc("avif");
+            } else {
+                writeFourcc("mif1");
+                writeFourcc("heic");
+            }
         }
         if (mHasMoovBox) {
             writeFourcc("isom");
@@ -2117,7 +2135,8 @@
 
     for (List<Track *>::iterator it = mTracks.begin();
          it != mTracks.end(); ++it) {
-        if (!(*it)->isHeic() && (*it)->getDurationUs() >= mMaxFileDurationLimitUs) {
+        if (!(*it)->isHeif() &&
+                (*it)->getDurationUs() >= mMaxFileDurationLimitUs) {
             return true;
         }
     }
@@ -2224,6 +2243,8 @@
     mIsAudio = !strncasecmp(mime, "audio/", 6);
     mIsVideo = !strncasecmp(mime, "video/", 6);
     mIsHeic = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
+    mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF);
+    mIsHeif = mIsHeic || mIsAvif;
     mIsMPEG4 = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) ||
                !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC);
 
@@ -2235,7 +2256,7 @@
         }
     }
 
-    if (!mIsHeic) {
+    if (!mIsHeif) {
         setTimeScale();
     } else {
         CHECK(mMeta->findInt32(kKeyWidth, &mWidth) && (mWidth > 0));
@@ -2316,7 +2337,7 @@
 
 void MPEG4Writer::Track::updateTrackSizeEstimate() {
     mEstimatedTrackSizeBytes = mMdatSizeBytes;  // media data size
-    if (!isHeic() && !mOwner->isFileStreamable()) {
+    if (!isHeif() && !mOwner->isFileStreamable()) {
         mEstimatedTrackSizeBytes += trackMetaDataSize();
     }
 }
@@ -2399,7 +2420,7 @@
 
 bool MPEG4Writer::Track::isExifData(
         MediaBufferBase *buffer, uint32_t *tiffHdrOffset) const {
-    if (!mIsHeic) {
+    if (!mIsHeif) {
         return false;
     }
 
@@ -2428,12 +2449,12 @@
 }
 
 void MPEG4Writer::Track::addChunkOffset(off64_t offset) {
-    CHECK(!mIsHeic);
+    CHECK(!mIsHeif);
     mCo64TableEntries->add(hton64(offset));
 }
 
 void MPEG4Writer::Track::addItemOffsetAndSize(off64_t offset, size_t size, bool isExif) {
-    CHECK(mIsHeic);
+    CHECK(mIsHeif);
 
     if (offset > UINT32_MAX || size > UINT32_MAX) {
         ALOGE("offset or size is out of range: %lld, %lld",
@@ -2479,8 +2500,10 @@
 
     if (mProperties.empty()) {
         mProperties.push_back(mOwner->addProperty_l({
-            .type = FOURCC('h', 'v', 'c', 'C'),
-            .hvcc = ABuffer::CreateAsCopy(mCodecSpecificData, mCodecSpecificDataSize)
+            .type = static_cast<uint32_t>(mIsAvif ?
+                  FOURCC('a', 'v', '1', 'C') :
+                  FOURCC('h', 'v', 'c', 'C')),
+            .data = ABuffer::CreateAsCopy(mCodecSpecificData, mCodecSpecificDataSize)
         }));
 
         mProperties.push_back(mOwner->addProperty_l({
@@ -2500,7 +2523,7 @@
     mTileIndex++;
     if (hasGrid) {
         mDimgRefs.value.push_back(mOwner->addItem_l({
-            .itemType = "hvc1",
+            .itemType = mIsAvif ? "av01" : "hvc1",
             .itemId = mItemIdBase++,
             .isPrimary = false,
             .isHidden = true,
@@ -2536,7 +2559,7 @@
         }
     } else {
         mImageItemId = mOwner->addItem_l({
-            .itemType = "hvc1",
+            .itemType = mIsAvif ? "av01" : "hvc1",
             .itemId = mItemIdBase++,
             .isPrimary = (mIsPrimary != 0),
             .isHidden = false,
@@ -2553,7 +2576,7 @@
 // it affects the 'dimg' refs for tiled image, as we only have the refs after the
 // last tile sample is written.
 void MPEG4Writer::Track::flushItemRefs() {
-    CHECK(mIsHeic);
+    CHECK(mIsHeif);
 
     if (mImageItemId > 0) {
         mOwner->addRefs_l(mImageItemId, mDimgRefs);
@@ -2654,7 +2677,8 @@
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
                !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
         mMeta->findData(kKeyHVCC, &type, &data, &size);
-    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1) ||
+               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
         mMeta->findData(kKeyAV1C, &type, &data, &size);
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
         getDolbyVisionProfile();
@@ -2766,7 +2790,7 @@
         size_t bytesWritten;
         off64_t offset = addSample_l(*it, usePrefix, tiffHdrOffset, &bytesWritten);
 
-        if (chunk->mTrack->isHeic()) {
+        if (chunk->mTrack->isHeif()) {
             chunk->mTrack->addItemOffsetAndSize(offset, bytesWritten, isExif);
         } else if (isFirstSample) {
             chunk->mTrack->addChunkOffset(offset);
@@ -2918,11 +2942,11 @@
     mStartTimeRealUs = startTimeUs;
 
     int32_t rotationDegrees;
-    if ((mIsVideo || mIsHeic) && params &&
+    if ((mIsVideo || mIsHeif) && params &&
             params->findInt32(kKeyRotation, &rotationDegrees)) {
         mRotation = rotationDegrees;
     }
-    if (mIsHeic) {
+    if (mIsHeif) {
         // Reserve the item ids, so that the item ids are ordered in the same
         // order that the image tracks are added.
         // If we leave the item ids to be assigned when the sample is written out,
@@ -3598,7 +3622,7 @@
         }
 
         // Per-frame metadata sample's size must be smaller than max allowed.
-        if (!mIsVideo && !mIsAudio && !mIsHeic &&
+        if (!mIsVideo && !mIsAudio && !mIsHeif &&
                 buffer->range_length() >= kMaxMetadataSize) {
             ALOGW("Buffer size is %zu. Maximum metadata buffer size is %lld for %s track",
                     buffer->range_length(), (long long)kMaxMetadataSize, trackName);
@@ -3722,7 +3746,7 @@
             mGotStartKeyFrame = true;
         }
 ////////////////////////////////////////////////////////////////////////////////
-        if (!mIsHeic) {
+        if (!mIsHeif) {
             if (mStszTableEntries->count() == 0) {
                 mFirstSampleTimeRealUs = systemTime() / 1000;
                 if (timestampUs < 0 && mFirstSampleStartOffsetUs == 0) {
@@ -3942,7 +3966,7 @@
             off64_t offset = mOwner->addSample_l(
                     copy, usePrefix, tiffHdrOffset, &bytesWritten);
 
-            if (mIsHeic) {
+            if (mIsHeif) {
                 addItemOffsetAndSize(offset, bytesWritten, isExif);
             } else {
                 if (mCo64TableEntries->count() == 0) {
@@ -3955,7 +3979,7 @@
         }
 
         mChunkSamples.push_back(copy);
-        if (mIsHeic) {
+        if (mIsHeif) {
             bufferChunk(0 /*timestampUs*/);
             ++nChunks;
         } else if (interleaveDurationUs == 0) {
@@ -3993,7 +4017,7 @@
 
     // Add final entries only for non-empty tracks.
     if (mStszTableEntries->count() > 0) {
-        if (mIsHeic) {
+        if (mIsHeif) {
             if (!mChunkSamples.empty()) {
                 bufferChunk(0);
                 ++nChunks;
@@ -4066,7 +4090,7 @@
         mOwner->mStartMeta->findInt32(kKeyEmptyTrackMalFormed, &emptyTrackMalformed) &&
         emptyTrackMalformed) {
         // MediaRecorder(sets kKeyEmptyTrackMalFormed by default) report empty tracks as malformed.
-        if (!mIsHeic && mStszTableEntries->count() == 0) {  // no samples written
+        if (!mIsHeif && mStszTableEntries->count() == 0) {  // no samples written
             ALOGE("The number of recorded samples is 0");
             mIsMalformed = true;
             return true;
@@ -4229,7 +4253,7 @@
 
 int32_t MPEG4Writer::Track::getMetaSizeIncrease(
         int32_t angle, int32_t trackCount) const {
-    CHECK(mIsHeic);
+    CHECK(mIsHeif);
 
     int32_t grid = (mTileWidth > 0);
     int32_t rotate = (angle > 0);
@@ -4281,7 +4305,8 @@
         !strcasecmp(MEDIA_MIMETYPE_VIDEO_HEVC, mime) ||
         !strcasecmp(MEDIA_MIMETYPE_VIDEO_AV1, mime) ||
         !strcasecmp(MEDIA_MIMETYPE_VIDEO_DOLBY_VISION, mime) ||
-        !strcasecmp(MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC, mime)) {
+        !strcasecmp(MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC, mime) ||
+        !strcasecmp(MEDIA_MIMETYPE_IMAGE_AVIF, mime)) {
         if (!mCodecSpecificData ||
             mCodecSpecificDataSize <= 0) {
             ALOGE("Missing codec specific data");
@@ -4300,7 +4325,7 @@
 const char *MPEG4Writer::Track::getTrackType() const {
     return mIsAudio ? "Audio" :
            mIsVideo ? "Video" :
-           mIsHeic  ? "Image" :
+           mIsHeif  ? "Image" :
                       "Metadata";
 }
 
@@ -5413,7 +5438,7 @@
             case FOURCC('h', 'v', 'c', 'C'):
             {
                 beginBox("hvcC");
-                sp<ABuffer> hvcc = mProperties[propIndex].hvcc;
+                sp<ABuffer> hvcc = mProperties[propIndex].data;
                 // Patch avcc's lengthSize field to match the number
                 // of bytes we use to indicate the size of a nal unit.
                 uint8_t *ptr = (uint8_t *)hvcc->data();
@@ -5422,6 +5447,14 @@
                 endBox();
                 break;
             }
+            case FOURCC('a', 'v', '1', 'C'):
+            {
+                beginBox("av1C");
+                sp<ABuffer> av1c = mProperties[propIndex].data;
+                write(av1c->data(), av1c->size());
+                endBox();
+                break;
+            }
             case FOURCC('i', 's', 'p', 'e'):
             {
                 beginBox("ispe");
@@ -5525,7 +5558,7 @@
 
     for (List<Track *>::iterator it = mTracks.begin();
         it != mTracks.end(); ++it) {
-        if ((*it)->isHeic()) {
+        if ((*it)->isHeif()) {
             (*it)->flushItemRefs();
         }
     }
diff --git a/media/libstagefright/MediaMuxer.cpp b/media/libstagefright/MediaMuxer.cpp
index a946f71..80f705b 100644
--- a/media/libstagefright/MediaMuxer.cpp
+++ b/media/libstagefright/MediaMuxer.cpp
@@ -138,7 +138,7 @@
         return INVALID_OPERATION;
     }
     if (!isMp4Format(mFormat)) {
-        ALOGE("setLocation() is only supported for .mp4, .3gp or .heic output.");
+        ALOGE("setLocation() is only supported for .mp4, .3gp, .heic or .avif output.");
         return INVALID_OPERATION;
     }
 
diff --git a/media/libstagefright/include/media/stagefright/MPEG4Writer.h b/media/libstagefright/include/media/stagefright/MPEG4Writer.h
index 7c3eca6..cf76606 100644
--- a/media/libstagefright/include/media/stagefright/MPEG4Writer.h
+++ b/media/libstagefright/include/media/stagefright/MPEG4Writer.h
@@ -196,7 +196,9 @@
     typedef key_value_pair_t< const char *, Vector<uint16_t> > ItemRefs;
     typedef struct _ItemInfo {
         bool isGrid() const { return !strcmp("grid", itemType); }
-        bool isImage() const { return !strcmp("hvc1", itemType) || isGrid(); }
+        bool isImage() const {
+            return !strcmp("hvc1", itemType) || !strcmp("av01", itemType) || isGrid();
+        }
         const char *itemType;
         uint16_t itemId;
         bool isPrimary;
@@ -224,10 +226,11 @@
         int32_t width;
         int32_t height;
         int32_t rotation;
-        sp<ABuffer> hvcc;
+        sp<ABuffer> data;
     } ItemProperty;
 
     bool mHasFileLevelMeta;
+    bool mIsAvif; // used to differentiate HEIC and AVIF under the same OUTPUT_FORMAT_HEIF
     uint64_t mFileLevelMetaDataSize;
     bool mHasMoovBox;
     uint32_t mPrimaryItemId;