1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19 
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include <utils/Log.h>
30 
31 #include "AC4Parser.h"
32 #include "MPEG4Extractor.h"
33 #include "SampleTable.h"
34 #include "ItemTable.h"
35 
36 #include <media/esds/ESDS.h>
37 #include <ID3.h>
38 #include <media/stagefright/DataSourceBase.h>
39 #include <media/ExtractorUtils.h>
40 #include <media/stagefright/foundation/ABitReader.h>
41 #include <media/stagefright/foundation/ABuffer.h>
42 #include <media/stagefright/foundation/ADebug.h>
43 #include <media/stagefright/foundation/AMessage.h>
44 #include <media/stagefright/foundation/AudioPresentationInfo.h>
45 #include <media/stagefright/foundation/AUtils.h>
46 #include <media/stagefright/foundation/ByteUtils.h>
47 #include <media/stagefright/foundation/ColorUtils.h>
48 #include <media/stagefright/foundation/avc_utils.h>
49 #include <media/stagefright/foundation/hexdump.h>
50 #include <media/stagefright/foundation/OpusHeader.h>
51 #include <media/stagefright/MediaBufferGroup.h>
52 #include <media/stagefright/MediaDefs.h>
53 #include <media/stagefright/MetaDataBase.h>
54 #include <utils/String8.h>
55 
56 #include <byteswap.h>
57 
58 #ifndef UINT32_MAX
59 #define UINT32_MAX       (4294967295U)
60 #endif
61 
62 #define ALAC_SPECIFIC_INFO_SIZE (36)
63 
64 // TODO : Remove the defines once mainline media is built against NDK >= 31.
65 // The mp4 extractor is part of mainline and builds against NDK 29 as of
66 // writing. These keys are available only from NDK 31:
67 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
68   "mpegh-profile-level-indication"
69 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
70   "mpegh-reference-channel-layout"
71 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
72   "mpegh-compatible-sets"
73 
74 namespace android {
75 
76 enum {
77     // max track header chunk to return
78     kMaxTrackHeaderSize = 32,
79 
80     // maximum size of an atom. Some atoms can be bigger according to the spec,
81     // but we only allow up to this size.
82     kMaxAtomSize = 64 * 1024 * 1024,
83 };
84 
85 class MPEG4Source : public MediaTrackHelper {
86 static const size_t  kMaxPcmFrameSize = 8192;
87 public:
88     // Caller retains ownership of both "dataSource" and "sampleTable".
89     MPEG4Source(AMediaFormat *format,
90                 DataSourceHelper *dataSource,
91                 int32_t timeScale,
92                 const sp<SampleTable> &sampleTable,
93                 Vector<SidxEntry> &sidx,
94                 const Trex *trex,
95                 off64_t firstMoofOffset,
96                 const sp<ItemTable> &itemTable,
97                 uint64_t elstShiftStartTicks,
98                 uint64_t elstInitialEmptyEditTicks);
99     virtual status_t init();
100 
101     virtual media_status_t start();
102     virtual media_status_t stop();
103 
104     virtual media_status_t getFormat(AMediaFormat *);
105 
106     virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()107     bool supportsNonBlockingRead() override { return true; }
108     virtual media_status_t fragmentedRead(
109             MediaBufferHelper **buffer, const ReadOptions *options = NULL);
110 
111     virtual ~MPEG4Source();
112 
113 private:
114     Mutex mLock;
115 
116     AMediaFormat *mFormat;
117     DataSourceHelper *mDataSource;
118     int32_t mTimescale;
119     sp<SampleTable> mSampleTable;
120     uint32_t mCurrentSampleIndex;
121     uint32_t mCurrentFragmentIndex;
122     Vector<SidxEntry> &mSegments;
123     const Trex *mTrex;
124     off64_t mFirstMoofOffset;
125     off64_t mCurrentMoofOffset;
126     off64_t mCurrentMoofSize;
127     off64_t mNextMoofOffset;
128     uint32_t mCurrentTime; // in media timescale ticks
129     int32_t mLastParsedTrackId;
130     int32_t mTrackId;
131 
132     int32_t mCryptoMode;    // passed in from extractor
133     int32_t mDefaultIVSize; // passed in from extractor
134     uint8_t mCryptoKey[16]; // passed in from extractor
135     int32_t mDefaultEncryptedByteBlock;
136     int32_t mDefaultSkipByteBlock;
137     uint32_t mCurrentAuxInfoType;
138     uint32_t mCurrentAuxInfoTypeParameter;
139     int32_t mCurrentDefaultSampleInfoSize;
140     uint32_t mCurrentSampleInfoCount;
141     uint32_t mCurrentSampleInfoAllocSize;
142     uint8_t* mCurrentSampleInfoSizes;
143     uint32_t mCurrentSampleInfoOffsetCount;
144     uint32_t mCurrentSampleInfoOffsetsAllocSize;
145     uint64_t* mCurrentSampleInfoOffsets;
146 
147     bool mIsAVC;
148     bool mIsHEVC;
149     bool mIsDolbyVision;
150     bool mIsAC4;
151     bool mIsMpegH = false;
152     bool mIsPcm;
153     size_t mNALLengthSize;
154 
155     bool mStarted;
156 
157     MediaBufferHelper *mBuffer;
158 
159     size_t mSrcBufferSize;
160     uint8_t *mSrcBuffer;
161 
162     bool mIsHeif;
163     bool mIsAvif;
164     bool mIsAudio;
165     bool mIsUsac = false;
166     sp<ItemTable> mItemTable;
167 
168     /* Shift start offset (move to earlier time) when media_time > 0,
169      * in media time scale.
170      */
171     uint64_t mElstShiftStartTicks;
172     /* Initial start offset (move to later time), empty edit list entry
173      * in media time scale.
174      */
175     uint64_t mElstInitialEmptyEditTicks;
176 
177     size_t parseNALSize(const uint8_t *data) const;
178     status_t parseChunk(off64_t *offset);
179     status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
180     status_t parseTrackFragmentRun(off64_t offset, off64_t size);
181     status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
182     status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
183     status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
184             uint32_t flags, off64_t size);
185     status_t parseSampleEncryption(off64_t offset, off64_t size);
186     // returns -1 for invalid layer ID
187     int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
188     size_t getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const;
189     size_t getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const;
190 
191     struct TrackFragmentHeaderInfo {
192         enum Flags {
193             kBaseDataOffsetPresent         = 0x01,
194             kSampleDescriptionIndexPresent = 0x02,
195             kDefaultSampleDurationPresent  = 0x08,
196             kDefaultSampleSizePresent      = 0x10,
197             kDefaultSampleFlagsPresent     = 0x20,
198             kDurationIsEmpty               = 0x10000,
199         };
200 
201         uint32_t mTrackID;
202         uint32_t mFlags;
203         uint64_t mBaseDataOffset;
204         uint32_t mSampleDescriptionIndex;
205         uint32_t mDefaultSampleDuration;
206         uint32_t mDefaultSampleSize;
207         uint32_t mDefaultSampleFlags;
208 
209         uint64_t mDataOffset;
210     };
211     TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
212 
213     struct Sample {
214         off64_t offset;
215         size_t size;
216         uint32_t duration;
217         int32_t compositionOffset;
218         uint8_t iv[16];
219         Vector<uint32_t> clearsizes;
220         Vector<uint32_t> encryptedsizes;
221     };
222     Vector<Sample> mCurrentSamples;
223     std::map<off64_t, uint32_t> mDrmOffsets;
224 
225     MPEG4Source(const MPEG4Source &);
226     MPEG4Source &operator=(const MPEG4Source &);
227 };
228 
229 // This custom data source wraps an existing one and satisfies requests
230 // falling entirely within a cached range from the cache while forwarding
231 // all remaining requests to the wrapped datasource.
232 // This is used to cache the full sampletable metadata for a single track,
233 // possibly wrapping multiple times to cover all tracks, i.e.
234 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
235 
236 class CachedRangedDataSource : public DataSourceHelper {
237 public:
238     explicit CachedRangedDataSource(DataSourceHelper *source);
239     virtual ~CachedRangedDataSource();
240 
241     ssize_t readAt(off64_t offset, void *data, size_t size) override;
242     status_t getSize(off64_t *size) override;
243     uint32_t flags() override;
244 
245     status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
246 
247 
248 private:
249     Mutex mLock;
250 
251     DataSourceHelper *mSource;
252     bool mOwnsDataSource;
253     off64_t mCachedOffset;
254     size_t mCachedSize;
255     uint8_t *mCache;
256 
257     void clearCache();
258 
259     CachedRangedDataSource(const CachedRangedDataSource &);
260     CachedRangedDataSource &operator=(const CachedRangedDataSource &);
261 };
262 
CachedRangedDataSource(DataSourceHelper * source)263 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
264     : DataSourceHelper(source),
265       mSource(source),
266       mOwnsDataSource(false),
267       mCachedOffset(0),
268       mCachedSize(0),
269       mCache(NULL) {
270 }
271 
~CachedRangedDataSource()272 CachedRangedDataSource::~CachedRangedDataSource() {
273     clearCache();
274     if (mOwnsDataSource) {
275         delete mSource;
276     }
277 }
278 
clearCache()279 void CachedRangedDataSource::clearCache() {
280     if (mCache) {
281         free(mCache);
282         mCache = NULL;
283     }
284 
285     mCachedOffset = 0;
286     mCachedSize = 0;
287 }
288 
readAt(off64_t offset,void * data,size_t size)289 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
290     Mutex::Autolock autoLock(mLock);
291 
292     if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
293         memcpy(data, &mCache[offset - mCachedOffset], size);
294         return size;
295     }
296 
297     return mSource->readAt(offset, data, size);
298 }
299 
getSize(off64_t * size)300 status_t CachedRangedDataSource::getSize(off64_t *size) {
301     return mSource->getSize(size);
302 }
303 
flags()304 uint32_t CachedRangedDataSource::flags() {
305     return mSource->flags();
306 }
307 
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)308 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
309         size_t size,
310         bool assumeSourceOwnershipOnSuccess) {
311     Mutex::Autolock autoLock(mLock);
312 
313     clearCache();
314 
315     mCache = (uint8_t *)malloc(size);
316 
317     if (mCache == NULL) {
318         return -ENOMEM;
319     }
320 
321     mCachedOffset = offset;
322     mCachedSize = size;
323 
324     ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
325 
326     if (err < (ssize_t)size) {
327         clearCache();
328 
329         return ERROR_IO;
330     }
331     mOwnsDataSource = assumeSourceOwnershipOnSuccess;
332     return OK;
333 }
334 
335 ////////////////////////////////////////////////////////////////////////////////
336 
337 static const bool kUseHexDump = false;
338 
FourCC2MIME(uint32_t fourcc)339 static const char *FourCC2MIME(uint32_t fourcc) {
340     switch (fourcc) {
341         case FOURCC("mp4a"):
342             return MEDIA_MIMETYPE_AUDIO_AAC;
343 
344         case FOURCC("samr"):
345             return MEDIA_MIMETYPE_AUDIO_AMR_NB;
346 
347         case FOURCC("sawb"):
348             return MEDIA_MIMETYPE_AUDIO_AMR_WB;
349 
350         case FOURCC("ec-3"):
351             return MEDIA_MIMETYPE_AUDIO_EAC3;
352 
353         case FOURCC("mp4v"):
354             return MEDIA_MIMETYPE_VIDEO_MPEG4;
355 
356         case FOURCC("s263"):
357         case FOURCC("h263"):
358         case FOURCC("H263"):
359             return MEDIA_MIMETYPE_VIDEO_H263;
360 
361         case FOURCC("avc1"):
362             return MEDIA_MIMETYPE_VIDEO_AVC;
363 
364         case FOURCC("hvc1"):
365         case FOURCC("hev1"):
366             return MEDIA_MIMETYPE_VIDEO_HEVC;
367 
368         case FOURCC("dvav"):
369         case FOURCC("dva1"):
370         case FOURCC("dvhe"):
371         case FOURCC("dvh1"):
372         case FOURCC("dav1"):
373             return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
374 
375         case FOURCC("ac-4"):
376             return MEDIA_MIMETYPE_AUDIO_AC4;
377         case FOURCC("Opus"):
378             return MEDIA_MIMETYPE_AUDIO_OPUS;
379 
380         case FOURCC("twos"):
381         case FOURCC("sowt"):
382             return MEDIA_MIMETYPE_AUDIO_RAW;
383         case FOURCC("alac"):
384             return MEDIA_MIMETYPE_AUDIO_ALAC;
385         case FOURCC("fLaC"):
386             return MEDIA_MIMETYPE_AUDIO_FLAC;
387         case FOURCC("av01"):
388             return MEDIA_MIMETYPE_VIDEO_AV1;
389         case FOURCC("vp09"):
390             return MEDIA_MIMETYPE_VIDEO_VP9;
391         case FOURCC(".mp3"):
392         case 0x6D730055: // "ms U" mp3 audio
393             return MEDIA_MIMETYPE_AUDIO_MPEG;
394         case FOURCC("mha1"):
395             return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
396         case FOURCC("mhm1"):
397             return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
398         case FOURCC("dtsc"):
399             return MEDIA_MIMETYPE_AUDIO_DTS;
400         case FOURCC("dtse"):
401         case FOURCC("dtsh"):
402             return MEDIA_MIMETYPE_AUDIO_DTS_HD;
403         case FOURCC("dtsl"):
404             return MEDIA_MIMETYPE_AUDIO_DTS_HD_MA;
405         case FOURCC("dtsx"):
406             return MEDIA_MIMETYPE_AUDIO_DTS_UHD_P2;
407         default:
408             ALOGW("Unknown fourcc: %c%c%c%c",
409                    (fourcc >> 24) & 0xff,
410                    (fourcc >> 16) & 0xff,
411                    (fourcc >> 8) & 0xff,
412                    fourcc & 0xff
413                    );
414             return "application/octet-stream";
415     }
416 }
417 
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)418 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
419     if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
420         // AMR NB audio is always mono, 8kHz
421         *channels = 1;
422         *rate = 8000;
423         return true;
424     } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
425         // AMR WB audio is always mono, 16kHz
426         *channels = 1;
427         *rate = 16000;
428         return true;
429     }
430     return false;
431 }
432 
MPEG4Extractor(DataSourceHelper * source,const char * mime)433 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
434     : mMoofOffset(0),
435       mMoofFound(false),
436       mMdatFound(false),
437       mDataSource(source),
438       mInitCheck(NO_INIT),
439       mHeaderTimescale(0),
440       mIsQT(false),
441       mIsHeif(false),
442       mHasMoovBox(false),
443       mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
444       mIsAvif(false),
445       mFirstTrack(NULL),
446       mLastTrack(NULL) {
447     ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
448     mFileMetaData = AMediaFormat_new();
449 }
450 
~MPEG4Extractor()451 MPEG4Extractor::~MPEG4Extractor() {
452     Track *track = mFirstTrack;
453     while (track) {
454         Track *next = track->next;
455 
456         delete track;
457         track = next;
458     }
459     mFirstTrack = mLastTrack = NULL;
460 
461     for (size_t i = 0; i < mPssh.size(); i++) {
462         delete [] mPssh[i].data;
463     }
464     mPssh.clear();
465 
466     delete mDataSource;
467     AMediaFormat_delete(mFileMetaData);
468 }
469 
flags() const470 uint32_t MPEG4Extractor::flags() const {
471     return CAN_PAUSE |
472             ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
473                     (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
474 }
475 
getMetaData(AMediaFormat * meta)476 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
477     status_t err;
478     if ((err = readMetaData()) != OK) {
479         return AMEDIA_ERROR_UNKNOWN;
480     }
481     AMediaFormat_copy(meta, mFileMetaData);
482     return AMEDIA_OK;
483 }
484 
countTracks()485 size_t MPEG4Extractor::countTracks() {
486     status_t err;
487     if ((err = readMetaData()) != OK) {
488         ALOGV("MPEG4Extractor::countTracks: no tracks");
489         return 0;
490     }
491 
492     size_t n = 0;
493     Track *track = mFirstTrack;
494     while (track) {
495         ++n;
496         track = track->next;
497     }
498 
499     ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
500     return n;
501 }
502 
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)503 media_status_t MPEG4Extractor::getTrackMetaData(
504         AMediaFormat *meta,
505         size_t index, uint32_t flags) {
506     status_t err;
507     if ((err = readMetaData()) != OK) {
508         return AMEDIA_ERROR_UNKNOWN;
509     }
510 
511     Track *track = mFirstTrack;
512     while (index > 0) {
513         if (track == NULL) {
514             return AMEDIA_ERROR_UNKNOWN;
515         }
516 
517         track = track->next;
518         --index;
519     }
520 
521     if (track == NULL) {
522         return AMEDIA_ERROR_UNKNOWN;
523     }
524 
525     [this, &track] {
526         int64_t duration;
527         int32_t samplerate;
528         // Only for audio track.
529         if (track->elst_needs_processing && mHeaderTimescale != 0 &&
530             AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
531             AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
532             // Elst has to be processed only the first time this function is called.
533             track->elst_needs_processing = false;
534 
535             if (track->elst_segment_duration > INT64_MAX) {
536                 return;
537             }
538             int64_t segment_duration = track->elst_segment_duration;
539             int64_t media_time = track->elst_media_time;
540             int64_t halfscale = track->timescale / 2;
541 
542             ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
543                   ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
544                   segment_duration, media_time,
545                   halfscale, mHeaderTimescale, track->timescale);
546 
547             if ((uint32_t)samplerate != track->timescale){
548                 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
549                     samplerate);
550             }
551             // Both delay and paddingsamples have to be set inorder for either to be
552             // effective in the lower layers.
553             int64_t delay = 0;
554             if (media_time > 0) { // Gapless playback
555                 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
556                 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
557                         __builtin_add_overflow(delay, halfscale, &delay) ||
558                         (delay /= track->timescale, false) ||
559                         delay > INT32_MAX ||
560                         delay < INT32_MIN) {
561                     ALOGW("ignoring edit list with bogus values");
562                     return;
563                 }
564             }
565             ALOGV("delay = %" PRId64, delay);
566             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
567 
568             int64_t paddingsamples = 0;
569             if (segment_duration > 0) {
570                 int64_t scaled_duration;
571                 // scaled_duration = duration * mHeaderTimescale;
572                 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
573                     return;
574                 }
575                 ALOGV("scaled_duration = %" PRId64, scaled_duration);
576 
577                 int64_t segment_end;
578                 int64_t padding;
579                 int64_t segment_duration_e6;
580                 int64_t media_time_scaled_e6;
581                 int64_t media_time_scaled;
582                 // padding = scaled_duration - ((segment_duration * 1000000) +
583                 //                  ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
584                 // segment_duration is based on timescale in movie header box(mdhd)
585                 // media_time is based on timescale track header/media timescale
586                 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
587                     __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
588                     __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
589                     return;
590                 }
591                 media_time_scaled_e6 /= track->timescale;
592                 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
593                     || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
594                     return;
595                 }
596                 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
597                 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
598                 // might be slightly shorter than the segment duration, which would make the
599                 // padding negative. Clamp to zero.
600                 if (padding > 0) {
601                     int64_t halfscale_mht = mHeaderTimescale / 2;
602                     int64_t halfscale_e6;
603                     int64_t timescale_e6;
604                     // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
605                     //                / (mHeaderTimescale * 1000000);
606                     if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
607                             __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
608                             __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
609                             __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
610                             (paddingsamples /= timescale_e6, false) ||
611                             paddingsamples > INT32_MAX) {
612                         return;
613                     }
614                 }
615             }
616             ALOGV("paddingsamples = %" PRId64, paddingsamples);
617             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
618         }
619     }();
620 
621     if ((flags & kIncludeExtensiveMetaData)
622             && !track->includes_expensive_metadata) {
623         track->includes_expensive_metadata = true;
624 
625         const char *mime;
626         CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
627         if (!strncasecmp("video/", mime, 6)) {
628             // MPEG2 tracks do not provide CSD, so read the stream header
629             if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
630                 off64_t offset;
631                 size_t size;
632                 if (track->sampleTable->getMetaDataForSample(
633                             0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
634                     if (size > kMaxTrackHeaderSize) {
635                         size = kMaxTrackHeaderSize;
636                     }
637                     uint8_t header[kMaxTrackHeaderSize];
638                     if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
639                         AMediaFormat_setBuffer(track->meta,
640                                 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
641                     }
642                 }
643             }
644 
645             if (mMoofOffset > 0) {
646                 int64_t duration;
647                 if (AMediaFormat_getInt64(track->meta,
648                         AMEDIAFORMAT_KEY_DURATION, &duration)) {
649                     // nothing fancy, just pick a frame near 1/4th of the duration
650                     AMediaFormat_setInt64(track->meta,
651                             AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
652                 }
653             } else {
654                 uint32_t sampleIndex;
655                 uint64_t sampleTime;
656                 if (track->timescale != 0 &&
657                         track->sampleTable->findThumbnailSample(&sampleIndex) == OK
658                         && track->sampleTable->getMetaDataForSample(
659                             sampleIndex, NULL /* offset */, NULL /* size */,
660                             &sampleTime) == OK) {
661                         AMediaFormat_setInt64(track->meta,
662                                 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
663                                 ((int64_t)sampleTime * 1000000) / track->timescale);
664                 }
665             }
666         }
667     }
668 
669     return AMediaFormat_copy(meta, track->meta);
670 }
671 
readMetaData()672 status_t MPEG4Extractor::readMetaData() {
673     if (mInitCheck != NO_INIT) {
674         return mInitCheck;
675     }
676 
677     off64_t offset = 0;
678     status_t err;
679     bool sawMoovOrSidx = false;
680 
681     while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
682              (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
683                      (mItemTable != NULL) && mItemTable->isValid()))) {
684         off64_t orig_offset = offset;
685         err = parseChunk(&offset, 0);
686 
687         if (err != OK && err != UNKNOWN_ERROR) {
688             break;
689         } else if (offset <= orig_offset) {
690             // only continue parsing if the offset was advanced,
691             // otherwise we might end up in an infinite loop
692             ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
693             err = ERROR_MALFORMED;
694             break;
695         } else if (err == UNKNOWN_ERROR) {
696             sawMoovOrSidx = true;
697         }
698     }
699 
700     if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
701         off64_t exifOffset;
702         size_t exifSize;
703         if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
704             AMediaFormat_setInt64(mFileMetaData,
705                     AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
706             AMediaFormat_setInt64(mFileMetaData,
707                     AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
708         }
709         off64_t xmpOffset;
710         size_t xmpSize;
711         if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
712             // TODO(chz): b/175717339
713             // Use a hard-coded string here instead of named keys. The keys are available
714             // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
715             // of 29. This hard-coded string can be replaced with the named constant once
716             // the mp4 extractor is built against API 31+.
717             AMediaFormat_setInt64(mFileMetaData,
718                     "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
719             AMediaFormat_setInt64(mFileMetaData,
720                     "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
721         }
722         for (uint32_t imageIndex = 0;
723                 imageIndex < mItemTable->countImages(); imageIndex++) {
724             AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
725             if (meta == NULL) {
726                 ALOGE("heif image %u has no meta!", imageIndex);
727                 continue;
728             }
729             // Some heif files advertise image sequence brands (eg. 'hevc') in
730             // ftyp box, but don't have any valid tracks in them. Instead of
731             // reporting the entire file as malformed, we override the error
732             // to allow still images to be extracted.
733             if (err != OK) {
734                 ALOGW("Extracting still images only");
735                 err = OK;
736             }
737             mInitCheck = OK;
738 
739             ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
740             Track *track = new Track;
741             if (mLastTrack != NULL) {
742                 mLastTrack->next = track;
743             } else {
744                 mFirstTrack = track;
745             }
746             mLastTrack = track;
747 
748             track->meta = meta;
749             AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
750             track->timescale = 1000000;
751         }
752     }
753 
754     if (mInitCheck == OK) {
755         if (findTrackByMimePrefix("video/") != NULL) {
756             AMediaFormat_setString(mFileMetaData,
757                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
758         } else if (findTrackByMimePrefix("audio/") != NULL) {
759             AMediaFormat_setString(mFileMetaData,
760                     AMEDIAFORMAT_KEY_MIME, "audio/mp4");
761         } else if (findTrackByMimePrefix(
762                 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
763             AMediaFormat_setString(mFileMetaData,
764                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
765         } else if (findTrackByMimePrefix(
766                 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
767             AMediaFormat_setString(mFileMetaData,
768                     AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
769         } else {
770             AMediaFormat_setString(mFileMetaData,
771                     AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
772         }
773     } else {
774         mInitCheck = err;
775     }
776 
777     CHECK_NE(err, (status_t)NO_INIT);
778 
779     // copy pssh data into file metadata
780     uint64_t psshsize = 0;
781     for (size_t i = 0; i < mPssh.size(); i++) {
782         psshsize += 20 + mPssh[i].datalen;
783     }
784     if (psshsize > 0 && psshsize <= UINT32_MAX) {
785         char *buf = (char*)malloc(psshsize);
786         if (!buf) {
787             ALOGE("b/28471206");
788             return NO_MEMORY;
789         }
790         char *ptr = buf;
791         for (size_t i = 0; i < mPssh.size(); i++) {
792             memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
793             memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
794             ptr += (20 + mPssh[i].datalen);
795         }
796         AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
797         free(buf);
798     }
799 
800     return mInitCheck;
801 }
802 
803 struct PathAdder {
PathAdderandroid::PathAdder804     PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
805         : mPath(path) {
806         mPath->push(chunkType);
807     }
808 
~PathAdderandroid::PathAdder809     ~PathAdder() {
810         mPath->pop();
811     }
812 
813 private:
814     Vector<uint32_t> *mPath;
815 
816     PathAdder(const PathAdder &);
817     PathAdder &operator=(const PathAdder &);
818 };
819 
underMetaDataPath(const Vector<uint32_t> & path)820 static bool underMetaDataPath(const Vector<uint32_t> &path) {
821     return path.size() >= 5
822         && path[0] == FOURCC("moov")
823         && path[1] == FOURCC("udta")
824         && path[2] == FOURCC("meta")
825         && path[3] == FOURCC("ilst");
826 }
827 
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)828 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
829     return path.size() >= 2
830             && path[0] == FOURCC("moov")
831             && path[1] == FOURCC("meta")
832             && (depth == 2
833             || (depth == 3
834                     && (path[2] == FOURCC("hdlr")
835                     ||  path[2] == FOURCC("ilst")
836                     ||  path[2] == FOURCC("keys"))));
837 }
838 
839 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)840 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
841     // delta between mpeg4 time and unix epoch time
842     static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
843     if (time_1904 < INT64_MIN + delta) {
844         return false;
845     }
846     time_t time_1970 = time_1904 - delta;
847 
848     char tmp[32];
849     struct tm* tm = gmtime(&time_1970);
850     if (tm != NULL &&
851             strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
852         *s = tmp;
853         return true;
854     }
855     return false;
856 }
857 
parseChunk(off64_t * offset,int depth)858 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
859     ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
860 
861     if (*offset < 0) {
862         ALOGE("b/23540914");
863         return ERROR_MALFORMED;
864     }
865     if (depth > 100) {
866         ALOGE("b/27456299");
867         return ERROR_MALFORMED;
868     }
869     uint32_t hdr[2];
870     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
871         return ERROR_IO;
872     }
873     uint64_t chunk_size = ntohl(hdr[0]);
874     int32_t chunk_type = ntohl(hdr[1]);
875     off64_t data_offset = *offset + 8;
876 
877     if (chunk_size == 1) {
878         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
879             return ERROR_IO;
880         }
881         chunk_size = ntoh64(chunk_size);
882         data_offset += 8;
883 
884         if (chunk_size < 16) {
885             // The smallest valid chunk is 16 bytes long in this case.
886             return ERROR_MALFORMED;
887         }
888     } else if (chunk_size == 0) {
889         if (depth == 0) {
890             // atom extends to end of file
891             off64_t sourceSize;
892             if (mDataSource->getSize(&sourceSize) == OK) {
893                 chunk_size = (sourceSize - *offset);
894             } else {
895                 // XXX could we just pick a "sufficiently large" value here?
896                 ALOGE("atom size is 0, and data source has no size");
897                 return ERROR_MALFORMED;
898             }
899         } else {
900             // not allowed for non-toplevel atoms, skip it
901             *offset += 4;
902             return OK;
903         }
904     } else if (chunk_size < 8) {
905         // The smallest valid chunk is 8 bytes long.
906         ALOGE("invalid chunk size: %" PRIu64, chunk_size);
907         return ERROR_MALFORMED;
908     }
909 
910     char chunk[5];
911     MakeFourCCString(chunk_type, chunk);
912     ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
913 
914     if (kUseHexDump) {
915         static const char kWhitespace[] = "                                        ";
916         const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
917         printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
918 
919         char buffer[256];
920         size_t n = chunk_size;
921         if (n > sizeof(buffer)) {
922             n = sizeof(buffer);
923         }
924         if (mDataSource->readAt(*offset, buffer, n)
925                 < (ssize_t)n) {
926             return ERROR_IO;
927         }
928 
929         hexdump(buffer, n);
930     }
931 
932     PathAdder autoAdder(&mPath, chunk_type);
933 
934     // (data_offset - *offset) is either 8 or 16
935     off64_t chunk_data_size = chunk_size - (data_offset - *offset);
936     if (chunk_data_size < 0) {
937         ALOGE("b/23540914");
938         return ERROR_MALFORMED;
939     }
940     if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
941         char errMsg[100];
942         sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
943         ALOGE("%s (b/28615448)", errMsg);
944         android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
945         return ERROR_MALFORMED;
946     }
947 
948     if (chunk_type != FOURCC("cprt")
949             && chunk_type != FOURCC("covr")
950             && mPath.size() == 5 && underMetaDataPath(mPath)) {
951         off64_t stop_offset = *offset + chunk_size;
952         *offset = data_offset;
953         while (*offset < stop_offset) {
954             status_t err = parseChunk(offset, depth + 1);
955             if (err != OK) {
956                 return err;
957             }
958         }
959 
960         if (*offset != stop_offset) {
961             return ERROR_MALFORMED;
962         }
963 
964         return OK;
965     }
966 
967     switch(chunk_type) {
968         case FOURCC("moov"):
969         case FOURCC("trak"):
970         case FOURCC("mdia"):
971         case FOURCC("minf"):
972         case FOURCC("dinf"):
973         case FOURCC("stbl"):
974         case FOURCC("mvex"):
975         case FOURCC("moof"):
976         case FOURCC("traf"):
977         case FOURCC("mfra"):
978         case FOURCC("udta"):
979         case FOURCC("ilst"):
980         case FOURCC("sinf"):
981         case FOURCC("schi"):
982         case FOURCC("edts"):
983         case FOURCC("wave"):
984         {
985             if (chunk_type == FOURCC("moov") && depth != 0) {
986                 ALOGE("moov: depth %d", depth);
987                 return ERROR_MALFORMED;
988             }
989 
990             if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
991                 ALOGE("duplicate moov");
992                 return ERROR_MALFORMED;
993             }
994 
995             if (chunk_type == FOURCC("moof") && !mMoofFound) {
996                 // store the offset of the first segment
997                 mMoofFound = true;
998                 mMoofOffset = *offset;
999             }
1000 
1001             if (chunk_type == FOURCC("stbl")) {
1002                 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
1003 
1004                 if (mDataSource->flags()
1005                         & (DataSourceBase::kWantsPrefetching
1006                             | DataSourceBase::kIsCachingDataSource)) {
1007                     CachedRangedDataSource *cachedSource =
1008                         new CachedRangedDataSource(mDataSource);
1009 
1010                     if (cachedSource->setCachedRange(
1011                             *offset, chunk_size,
1012                             true /* assume ownership on success */) == OK) {
1013                         mDataSource = cachedSource;
1014                     } else {
1015                         delete cachedSource;
1016                     }
1017                 }
1018 
1019                 if (mLastTrack == NULL) {
1020                     return ERROR_MALFORMED;
1021                 }
1022 
1023                 mLastTrack->sampleTable = new SampleTable(mDataSource);
1024             }
1025 
1026             bool isTrack = false;
1027             if (chunk_type == FOURCC("trak")) {
1028                 if (depth != 1) {
1029                     ALOGE("trak: depth %d", depth);
1030                     return ERROR_MALFORMED;
1031                 }
1032                 isTrack = true;
1033 
1034                 ALOGV("adding new track");
1035                 Track *track = new Track;
1036                 if (mLastTrack) {
1037                     mLastTrack->next = track;
1038                 } else {
1039                     mFirstTrack = track;
1040                 }
1041                 mLastTrack = track;
1042 
1043                 track->meta = AMediaFormat_new();
1044                 AMediaFormat_setString(track->meta,
1045                         AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1046             }
1047 
1048             off64_t stop_offset = *offset + chunk_size;
1049             *offset = data_offset;
1050             while (*offset < stop_offset) {
1051 
1052                 // pass udata terminate
1053                 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1054                     // handle the case that udta terminates with terminate code x00000000
1055                     // note that 0 terminator is optional and we just handle this case.
1056                     uint32_t terminate_code = 1;
1057                     mDataSource->readAt(*offset, &terminate_code, 4);
1058                     if (0 == terminate_code) {
1059                         *offset += 4;
1060                         ALOGD("Terminal code for udta");
1061                         continue;
1062                     } else {
1063                         ALOGW("invalid udta Terminal code");
1064                     }
1065                 }
1066 
1067                 status_t err = parseChunk(offset, depth + 1);
1068                 if (err != OK) {
1069                     if (isTrack) {
1070                         mLastTrack->skipTrack = true;
1071                         break;
1072                     }
1073                     return err;
1074                 }
1075             }
1076 
1077             if (*offset != stop_offset) {
1078                 return ERROR_MALFORMED;
1079             }
1080 
1081             if (isTrack) {
1082                 int32_t trackId;
1083                 // There must be exactly one track header per track.
1084 
1085                 if (!AMediaFormat_getInt32(mLastTrack->meta,
1086                         AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1087                     mLastTrack->skipTrack = true;
1088                 }
1089 
1090                 status_t err = verifyTrack(mLastTrack);
1091                 if (err != OK) {
1092                     mLastTrack->skipTrack = true;
1093                 }
1094 
1095 
1096                 if (mLastTrack->skipTrack) {
1097                     ALOGV("skipping this track...");
1098                     Track *cur = mFirstTrack;
1099 
1100                     if (cur == mLastTrack) {
1101                         delete cur;
1102                         mFirstTrack = mLastTrack = NULL;
1103                     } else {
1104                         while (cur && cur->next != mLastTrack) {
1105                             cur = cur->next;
1106                         }
1107                         if (cur) {
1108                             cur->next = NULL;
1109                         }
1110                         delete mLastTrack;
1111                         mLastTrack = cur;
1112                     }
1113 
1114                     return OK;
1115                 }
1116 
1117                 // place things we built elsewhere into their final locations
1118 
1119                 // put aggregated tx3g data into the metadata
1120                 if (mLastTrack->mTx3gFilled > 0) {
1121                     ALOGV("Putting %zu bytes of tx3g data into meta data",
1122                           mLastTrack->mTx3gFilled);
1123                     AMediaFormat_setBuffer(mLastTrack->meta,
1124                         AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1125                         mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1126                     // drop it now to reduce our footprint
1127                     free(mLastTrack->mTx3gBuffer);
1128                     mLastTrack->mTx3gBuffer = NULL;
1129                     mLastTrack->mTx3gFilled = 0;
1130                     mLastTrack->mTx3gSize = 0;
1131                 }
1132 
1133                 const char *mime;
1134                 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1135 
1136                 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1137                     void *data;
1138                     size_t size;
1139 
1140                     if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1141                                                &data, &size)
1142                         && size >= 5) {
1143                         const uint8_t *ptr = (const uint8_t *)data;
1144                         const uint8_t profile = ptr[2] >> 1;
1145                         const uint8_t blCompatibilityId = (ptr[4]) >> 4;
1146                         bool create_two_tracks = false;
1147 
1148                         if (blCompatibilityId && blCompatibilityId != 15) {
1149                             create_two_tracks = true;
1150                         }
1151 
1152                         if (4 == profile || 7 == profile ||
1153                                 (profile >= 8 && profile < 11 && create_two_tracks)) {
1154                             // we need a backward compatible track
1155                             ALOGV("Adding new backward compatible track");
1156                             Track *track_b = new Track;
1157 
1158                             track_b->timescale = mLastTrack->timescale;
1159                             track_b->sampleTable = mLastTrack->sampleTable;
1160                             track_b->includes_expensive_metadata =
1161                                 mLastTrack->includes_expensive_metadata;
1162                             track_b->skipTrack = mLastTrack->skipTrack;
1163                             track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1164                             track_b->elst_media_time = mLastTrack->elst_media_time;
1165                             track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1166                             track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1167                             track_b->elst_initial_empty_edit_ticks =
1168                                 mLastTrack->elst_initial_empty_edit_ticks;
1169                             track_b->subsample_encryption = mLastTrack->subsample_encryption;
1170 
1171                             track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1172                             track_b->mTx3gSize = mLastTrack->mTx3gSize;
1173                             track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1174 
1175                             track_b->meta = AMediaFormat_new();
1176                             AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1177 
1178                             mLastTrack->next = track_b;
1179                             track_b->next = NULL;
1180 
1181                             // we want to remove the csd-2 key from the metadata, but
1182                             // don't have an AMediaFormat_* function to do so. Settle
1183                             // for replacing this csd-2 with an empty csd-2.
1184                             uint8_t emptybuffer[8] = {};
1185                             AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1186                                                    emptybuffer, 0);
1187 
1188                             if (4 == profile || 7 == profile || 8 == profile ) {
1189                                 AMediaFormat_setString(track_b->meta,
1190                                         AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1191                             } else if (9 == profile) {
1192                                 AMediaFormat_setString(track_b->meta,
1193                                         AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1194                             } else if (10 == profile) {
1195                                 AMediaFormat_setString(track_b->meta,
1196                                         AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1197                             } // Should never get to else part
1198 
1199                             mLastTrack = track_b;
1200                         }
1201                     }
1202                 }
1203             } else if (chunk_type == FOURCC("moov")) {
1204                 mInitCheck = OK;
1205 
1206                 return UNKNOWN_ERROR;  // Return a generic error.
1207             }
1208             break;
1209         }
1210 
1211         case FOURCC("schm"):
1212         {
1213 
1214             *offset += chunk_size;
1215             if (!mLastTrack) {
1216                 return ERROR_MALFORMED;
1217             }
1218 
1219             uint32_t scheme_type;
1220             if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1221                 return ERROR_IO;
1222             }
1223             scheme_type = ntohl(scheme_type);
1224             int32_t mode = kCryptoModeUnencrypted;
1225             switch(scheme_type) {
1226                 case FOURCC("cbc1"):
1227                 {
1228                     mode = kCryptoModeAesCbc;
1229                     break;
1230                 }
1231                 case FOURCC("cbcs"):
1232                 {
1233                     mode = kCryptoModeAesCbc;
1234                     mLastTrack->subsample_encryption = true;
1235                     break;
1236                 }
1237                 case FOURCC("cenc"):
1238                 {
1239                     mode = kCryptoModeAesCtr;
1240                     break;
1241                 }
1242                 case FOURCC("cens"):
1243                 {
1244                     mode = kCryptoModeAesCtr;
1245                     mLastTrack->subsample_encryption = true;
1246                     break;
1247                 }
1248             }
1249             if (mode != kCryptoModeUnencrypted) {
1250                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1251             }
1252             break;
1253         }
1254 
1255 
1256         case FOURCC("elst"):
1257         {
1258             *offset += chunk_size;
1259 
1260             if (!mLastTrack) {
1261                 return ERROR_MALFORMED;
1262             }
1263 
1264             // See 14496-12 8.6.6
1265             uint8_t version;
1266             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1267                 return ERROR_IO;
1268             }
1269 
1270             uint32_t entry_count;
1271             if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1272                 return ERROR_IO;
1273             }
1274 
1275             if (entry_count > 2) {
1276                 /* We support a single entry for gapless playback or negating offset for
1277                  * reordering B frames, two entries (empty edit) for start offset at the moment.
1278                  */
1279                 ALOGW("ignoring edit list with %d entries", entry_count);
1280             } else {
1281                 off64_t entriesoffset = data_offset + 8;
1282                 uint64_t segment_duration;
1283                 int64_t media_time;
1284                 bool empty_edit_present = false;
1285                 for (int i = 0; i < entry_count; ++i) {
1286                     switch (version) {
1287                     case 0: {
1288                         uint32_t sd;
1289                         int32_t mt;
1290                         if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1291                             !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1292                             return ERROR_IO;
1293                         }
1294                         segment_duration = sd;
1295                         media_time = mt;
1296                         // 4(segment duration) + 4(media time) + 4(media rate)
1297                         entriesoffset += 12;
1298                         break;
1299                     }
1300                     case 1: {
1301                         if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1302                             !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1303                             return ERROR_IO;
1304                         }
1305                         // 8(segment duration) + 8(media time) + 4(media rate)
1306                         entriesoffset += 20;
1307                         break;
1308                     }
1309                     default:
1310                         return ERROR_IO;
1311                         break;
1312                     }
1313                     // Empty edit entry would have to be first entry.
1314                     if (media_time == -1 && i == 0) {
1315                         empty_edit_present = true;
1316                         ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1317                         /* In movie header timescale, and needs to be converted to media timescale
1318                          * after we get that from a track's 'mdhd' atom,
1319                          * which at times come after 'elst'.
1320                          */
1321                         mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1322                     } else if (media_time >= 0 && i == 0) {
1323                         ALOGV("first edit list entry - from gapless playback files");
1324                         mLastTrack->elst_media_time = media_time;
1325                         mLastTrack->elst_segment_duration = segment_duration;
1326                         ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1327                               segment_duration, media_time);
1328                         // media_time is in media timescale as are STTS/CTTS entries.
1329                         mLastTrack->elst_shift_start_ticks = media_time;
1330                     } else if (empty_edit_present && i == 1) {
1331                         // Process second entry only when the first entry was an empty edit entry.
1332                         ALOGV("second edit list entry");
1333                         mLastTrack->elst_shift_start_ticks = media_time;
1334                     } else {
1335                         ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1336                     }
1337                 }
1338                 // save these for later, because the elst atom might precede
1339                 // the atoms that actually gives us the duration and sample rate
1340                 // needed to calculate the padding and delay values
1341                 mLastTrack->elst_needs_processing = true;
1342             }
1343             break;
1344         }
1345 
1346         case FOURCC("frma"):
1347         {
1348             *offset += chunk_size;
1349 
1350             uint32_t original_fourcc;
1351             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1352                 return ERROR_IO;
1353             }
1354             original_fourcc = ntohl(original_fourcc);
1355             ALOGV("read original format: %d", original_fourcc);
1356 
1357             if (mLastTrack == NULL) {
1358                 return ERROR_MALFORMED;
1359             }
1360 
1361             AMediaFormat_setString(mLastTrack->meta,
1362                     AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1363             uint32_t num_channels = 0;
1364             uint32_t sample_rate = 0;
1365             if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1366                 AMediaFormat_setInt32(mLastTrack->meta,
1367                         AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1368                 AMediaFormat_setInt32(mLastTrack->meta,
1369                         AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1370             }
1371 
1372             if (!mIsQT && original_fourcc == FOURCC("alac")) {
1373                 off64_t tmpOffset = *offset;
1374                 status_t err = parseALACSampleEntry(&tmpOffset);
1375                 if (err != OK) {
1376                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1377                     return err;
1378                 }
1379                 *offset = tmpOffset + 8;
1380             }
1381 
1382             break;
1383         }
1384 
1385         case FOURCC("tenc"):
1386         {
1387             *offset += chunk_size;
1388 
1389             if (chunk_size < 32) {
1390                 return ERROR_MALFORMED;
1391             }
1392 
1393             // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1394             // default IV size, 16 bytes default KeyID
1395             // (ISO 23001-7)
1396 
1397             uint8_t version;
1398             if (mDataSource->readAt(data_offset, &version, sizeof(version))
1399                     < (ssize_t)sizeof(version)) {
1400                 return ERROR_IO;
1401             }
1402 
1403             uint8_t buf[4];
1404             memset(buf, 0, 4);
1405             if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1406                 return ERROR_IO;
1407             }
1408 
1409             if (mLastTrack == NULL) {
1410                 return ERROR_MALFORMED;
1411             }
1412 
1413             uint8_t defaultEncryptedByteBlock = 0;
1414             uint8_t defaultSkipByteBlock = 0;
1415             uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1416             if (version == 1) {
1417                 uint32_t pattern = buf[2];
1418                 defaultEncryptedByteBlock = pattern >> 4;
1419                 defaultSkipByteBlock = pattern & 0xf;
1420                 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1421                     // use (1,0) to mean "encrypt everything"
1422                     defaultEncryptedByteBlock = 1;
1423                 }
1424             } else if (mLastTrack->subsample_encryption) {
1425                 ALOGW("subsample_encryption should be version 1");
1426             } else if (defaultAlgorithmId > 1) {
1427                 // only 0 (clear) and 1 (AES-128) are valid
1428                 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1429                 defaultAlgorithmId = 1;
1430             }
1431 
1432             memset(buf, 0, 4);
1433             if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1434                 return ERROR_IO;
1435             }
1436             uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1437 
1438             if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1439                 // only unencrypted data must have 0 IV size
1440                 return ERROR_MALFORMED;
1441             } else if (defaultIVSize != 0 &&
1442                     defaultIVSize != 8 &&
1443                     defaultIVSize != 16) {
1444                 return ERROR_MALFORMED;
1445             }
1446 
1447             uint8_t defaultKeyId[16];
1448 
1449             if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1450                 return ERROR_IO;
1451             }
1452 
1453             sp<ABuffer> defaultConstantIv;
1454             if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1455 
1456                 uint8_t ivlength;
1457                 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1458                         < (ssize_t)sizeof(ivlength)) {
1459                     return ERROR_IO;
1460                 }
1461 
1462                 if (ivlength != 8 && ivlength != 16) {
1463                     ALOGW("unsupported IV length: %u", ivlength);
1464                     return ERROR_MALFORMED;
1465                 }
1466 
1467                 defaultConstantIv = new ABuffer(ivlength);
1468                 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1469                         < (ssize_t)ivlength) {
1470                     return ERROR_IO;
1471                 }
1472 
1473                 defaultConstantIv->setRange(0, ivlength);
1474             }
1475 
1476             int32_t tmpAlgorithmId;
1477             if (!AMediaFormat_getInt32(mLastTrack->meta,
1478                     AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1479                 AMediaFormat_setInt32(mLastTrack->meta,
1480                         AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1481             }
1482 
1483             AMediaFormat_setInt32(mLastTrack->meta,
1484                     AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1485             AMediaFormat_setBuffer(mLastTrack->meta,
1486                     AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1487             AMediaFormat_setInt32(mLastTrack->meta,
1488                     AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1489             AMediaFormat_setInt32(mLastTrack->meta,
1490                     AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1491             if (defaultConstantIv != NULL) {
1492                 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1493                         defaultConstantIv->data(), defaultConstantIv->size());
1494             }
1495             break;
1496         }
1497 
1498         case FOURCC("tkhd"):
1499         {
1500             *offset += chunk_size;
1501 
1502             status_t err;
1503             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1504                 return err;
1505             }
1506 
1507             break;
1508         }
1509 
1510         case FOURCC("tref"):
1511         {
1512             off64_t stop_offset = *offset + chunk_size;
1513             *offset = data_offset;
1514             while (*offset < stop_offset) {
1515                 status_t err = parseChunk(offset, depth + 1);
1516                 if (err != OK) {
1517                     return err;
1518                 }
1519             }
1520             if (*offset != stop_offset) {
1521                 return ERROR_MALFORMED;
1522             }
1523             break;
1524         }
1525 
1526         case FOURCC("thmb"):
1527         {
1528             *offset += chunk_size;
1529 
1530             if (mLastTrack != NULL) {
1531                 // Skip thumbnail track for now since we don't have an
1532                 // API to retrieve it yet.
1533                 // The thumbnail track can't be accessed by negative index or time,
1534                 // because each timed sample has its own corresponding thumbnail
1535                 // in the thumbnail track. We'll need a dedicated API to retrieve
1536                 // thumbnail at time instead.
1537                 mLastTrack->skipTrack = true;
1538             }
1539 
1540             break;
1541         }
1542 
1543         case FOURCC("pssh"):
1544         {
1545             *offset += chunk_size;
1546 
1547             PsshInfo pssh;
1548 
1549             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1550                 return ERROR_IO;
1551             }
1552 
1553             uint32_t psshdatalen = 0;
1554             if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1555                 return ERROR_IO;
1556             }
1557             pssh.datalen = ntohl(psshdatalen);
1558             ALOGV("pssh data size: %d", pssh.datalen);
1559             if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1560                 // pssh data length exceeds size of containing box
1561                 return ERROR_MALFORMED;
1562             }
1563 
1564             pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1565             if (pssh.data == NULL) {
1566                 return ERROR_MALFORMED;
1567             }
1568             ALOGV("allocated pssh @ %p", pssh.data);
1569             ssize_t requested = (ssize_t) pssh.datalen;
1570             if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1571                 delete[] pssh.data;
1572                 return ERROR_IO;
1573             }
1574             mPssh.push_back(pssh);
1575 
1576             break;
1577         }
1578 
1579         case FOURCC("mdhd"):
1580         {
1581             *offset += chunk_size;
1582 
1583             if (chunk_data_size < 4 || mLastTrack == NULL) {
1584                 return ERROR_MALFORMED;
1585             }
1586 
1587             uint8_t version;
1588             if (mDataSource->readAt(
1589                         data_offset, &version, sizeof(version))
1590                     < (ssize_t)sizeof(version)) {
1591                 return ERROR_IO;
1592             }
1593 
1594             off64_t timescale_offset;
1595 
1596             if (version == 1) {
1597                 timescale_offset = data_offset + 4 + 16;
1598             } else if (version == 0) {
1599                 timescale_offset = data_offset + 4 + 8;
1600             } else {
1601                 return ERROR_IO;
1602             }
1603 
1604             uint32_t timescale;
1605             if (mDataSource->readAt(
1606                         timescale_offset, &timescale, sizeof(timescale))
1607                     < (ssize_t)sizeof(timescale)) {
1608                 return ERROR_IO;
1609             }
1610 
1611             if (!timescale) {
1612                 ALOGE("timescale should not be ZERO.");
1613                 return ERROR_MALFORMED;
1614             }
1615 
1616             mLastTrack->timescale = ntohl(timescale);
1617 
1618             // 14496-12 says all ones means indeterminate, but some files seem to use
1619             // 0 instead. We treat both the same.
1620             int64_t duration = 0;
1621             if (version == 1) {
1622                 if (mDataSource->readAt(
1623                             timescale_offset + 4, &duration, sizeof(duration))
1624                         < (ssize_t)sizeof(duration)) {
1625                     return ERROR_IO;
1626                 }
1627                 if (duration != -1) {
1628                     duration = ntoh64(duration);
1629                 }
1630             } else {
1631                 uint32_t duration32;
1632                 if (mDataSource->readAt(
1633                             timescale_offset + 4, &duration32, sizeof(duration32))
1634                         < (ssize_t)sizeof(duration32)) {
1635                     return ERROR_IO;
1636                 }
1637                 if (duration32 != 0xffffffff) {
1638                     duration = ntohl(duration32);
1639                 }
1640             }
1641             if (duration != 0 && mLastTrack->timescale != 0) {
1642                 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1643                 if (durationUs < 0 || durationUs > INT64_MAX) {
1644                     ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1645                           (long long) duration, (long long) mLastTrack->timescale);
1646                     return ERROR_MALFORMED;
1647                 }
1648                 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1649             }
1650 
1651             uint8_t lang[2];
1652             off64_t lang_offset;
1653             if (version == 1) {
1654                 lang_offset = timescale_offset + 4 + 8;
1655             } else if (version == 0) {
1656                 lang_offset = timescale_offset + 4 + 4;
1657             } else {
1658                 return ERROR_IO;
1659             }
1660 
1661             if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1662                     < (ssize_t)sizeof(lang)) {
1663                 return ERROR_IO;
1664             }
1665 
1666             // To get the ISO-639-2/T three character language code
1667             // 1 bit pad followed by 3 5-bits characters. Each character
1668             // is packed as the difference between its ASCII value and 0x60.
1669             char lang_code[4];
1670             lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1671             lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1672             lang_code[2] = (lang[1] & 0x1f) + 0x60;
1673             lang_code[3] = '\0';
1674 
1675             AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1676 
1677             break;
1678         }
1679 
1680         case FOURCC("stsd"):
1681         {
1682             uint8_t buffer[8];
1683             if (chunk_data_size < (off64_t)sizeof(buffer)) {
1684                 return ERROR_MALFORMED;
1685             }
1686 
1687             if (mDataSource->readAt(
1688                         data_offset, buffer, 8) < 8) {
1689                 return ERROR_IO;
1690             }
1691 
1692             if (U32_AT(buffer) != 0) {
1693                 // Should be version 0, flags 0.
1694                 return ERROR_MALFORMED;
1695             }
1696 
1697             uint32_t entry_count = U32_AT(&buffer[4]);
1698 
1699             if (entry_count > 1) {
1700                 // For 3GPP timed text, there could be multiple tx3g boxes contain
1701                 // multiple text display formats. These formats will be used to
1702                 // display the timed text.
1703                 // For encrypted files, there may also be more than one entry.
1704                 const char *mime;
1705 
1706                 if (mLastTrack == NULL)
1707                     return ERROR_MALFORMED;
1708 
1709                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1710                 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1711                         strcasecmp(mime, "application/octet-stream")) {
1712                     // For now we only support a single type of media per track.
1713                     mLastTrack->skipTrack = true;
1714                     *offset += chunk_size;
1715                     break;
1716                 }
1717             }
1718             off64_t stop_offset = *offset + chunk_size;
1719             *offset = data_offset + 8;
1720             for (uint32_t i = 0; i < entry_count; ++i) {
1721                 status_t err = parseChunk(offset, depth + 1);
1722                 if (err != OK) {
1723                     return err;
1724                 }
1725             }
1726 
1727             if (*offset != stop_offset) {
1728                 return ERROR_MALFORMED;
1729             }
1730             break;
1731         }
1732         case FOURCC("mett"):
1733         {
1734             *offset += chunk_size;
1735 
1736             // the absolute minimum size of a compliant mett box is 11 bytes:
1737             // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1738             // The resulting mime_format would be invalid at that size though.
1739             if (mLastTrack == NULL || chunk_data_size < 11) {
1740                 return ERROR_MALFORMED;
1741             }
1742 
1743             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1744             if (buffer.get() == NULL) {
1745                 return NO_MEMORY;
1746             }
1747 
1748             if (mDataSource->readAt(
1749                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1750                 return ERROR_IO;
1751             }
1752 
1753             // ISO-14496-12:
1754             // int8 reserved[6];               // should be all zeroes
1755             // int16_t data_reference_index;
1756             // char content_encoding[];        // null terminated, optional (= just the null byte)
1757             // char mime_format[];             // null terminated, mandatory
1758             // optional other boxes
1759             //
1760             // API < 29:
1761             // char mime_format[];             // null terminated
1762             //
1763             // API >= 29
1764             // char mime_format[];             // null terminated
1765             // char mime_format[];             // null terminated
1766 
1767             // Prior to API 29, the metadata track was not compliant with ISO/IEC
1768             // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1769             // metatrack. As of API 29 and onwards, a change was made to metadata track to
1770             // make it somewhat compatible with the standard. The workaround is to write the
1771             // null-terminated mime_format string twice. This allows compliant parsers to
1772             // read the missing reserved, data_reference_index, and content_encoding fields
1773             // from the first mime_type string. The actual mime_format field would then be
1774             // read correctly from the second string. The non-compliant Android frameworks
1775             // from API 28 and earlier would still be able to read the mime_format correctly
1776             // as it would only read the first null-terminated mime_format string. To enable
1777             // reading metadata tracks generated from both the non-compliant and compliant
1778             // formats, a check needs to be done to see which format is used.
1779             const char *str = (const char*) buffer.get();
1780             size_t string_length = strnlen(str, chunk_data_size);
1781 
1782             if (string_length == chunk_data_size - 1) {
1783                 // This is likely a pre API 29 file, since it's a single null terminated
1784                 // string filling the entire box.
1785                 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1786             } else {
1787                 // This might be a fully compliant metadata track, a "double mime" compatibility
1788                 // track, or anything else, including a single non-terminated string, so we need
1789                 // to determine the length of each string we want to parse out of the box.
1790                 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1791                 if (encoding_length + 8 >= chunk_data_size - 2) {
1792                     // the encoding extends to the end of the box, so there's no mime_format
1793                     return ERROR_MALFORMED;
1794                 }
1795                 String8 contentEncoding(str + 8, encoding_length);
1796                 String8 mimeFormat(str + 8 + encoding_length + 1,
1797                         chunk_data_size - 8 - encoding_length - 1);
1798                 AMediaFormat_setString(mLastTrack->meta,
1799                         AMEDIAFORMAT_KEY_MIME, mimeFormat.c_str());
1800             }
1801             break;
1802         }
1803 
1804         case FOURCC("mp4a"):
1805         case FOURCC("enca"):
1806         case FOURCC("samr"):
1807         case FOURCC("sawb"):
1808         case FOURCC("Opus"):
1809         case FOURCC("twos"):
1810         case FOURCC("sowt"):
1811         case FOURCC("alac"):
1812         case FOURCC("fLaC"):
1813         case FOURCC(".mp3"):
1814         case 0x6D730055: // "ms U" mp3 audio
1815         case FOURCC("mha1"):
1816         case FOURCC("mhm1"):
1817         case FOURCC("dtsc"):
1818         case FOURCC("dtse"):
1819         case FOURCC("dtsh"):
1820         case FOURCC("dtsl"):
1821         case FOURCC("dtsx"):
1822         {
1823             if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1824 
1825                 if (chunk_type == FOURCC("alac")) {
1826                     off64_t offsetTmp = *offset;
1827                     status_t err = parseALACSampleEntry(&offsetTmp);
1828                     if (err != OK) {
1829                         ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1830                         return err;
1831                     }
1832                 }
1833 
1834                 // Ignore all atoms embedded in QT wave atom
1835                 ALOGV("Ignore all atoms embedded in QT wave atom");
1836                 *offset += chunk_size;
1837                 break;
1838             }
1839 
1840             uint8_t buffer[8 + 20];
1841             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1842                 // Basic AudioSampleEntry size.
1843                 return ERROR_MALFORMED;
1844             }
1845 
1846             if (mDataSource->readAt(
1847                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1848                 return ERROR_IO;
1849             }
1850 
1851             // we can get data_ref_index value from U16_AT(&buffer[6])
1852             uint16_t version = U16_AT(&buffer[8]);
1853             uint32_t num_channels = U16_AT(&buffer[16]);
1854 
1855             uint16_t sample_size = U16_AT(&buffer[18]);
1856             uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1857 
1858             if (mLastTrack == NULL)
1859                 return ERROR_MALFORMED;
1860 
1861             off64_t stop_offset = *offset + chunk_size;
1862             *offset = data_offset + sizeof(buffer);
1863 
1864             if (mIsQT) {
1865                 if (version == 1) {
1866                     if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1867                         return ERROR_IO;
1868                     }
1869 
1870 #if 0
1871                     U32_AT(buffer);  // samples per packet
1872                     U32_AT(&buffer[4]);  // bytes per packet
1873                     U32_AT(&buffer[8]);  // bytes per frame
1874                     U32_AT(&buffer[12]);  // bytes per sample
1875 #endif
1876                     *offset += 16;
1877                 } else if (version == 2) {
1878                     uint8_t v2buffer[36];
1879                     if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1880                         return ERROR_IO;
1881                     }
1882 
1883 #if 0
1884                     U32_AT(v2buffer);  // size of struct only
1885                     sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1886                     num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1887                     U32_AT(&v2buffer[16]);  // always 0x7f000000
1888                     sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1889                     U32_AT(&v2buffer[24]);  // format specifc flags
1890                     U32_AT(&v2buffer[28]);  // const bytes per audio packet
1891                     U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1892 #endif
1893                     *offset += 36;
1894                 }
1895             }
1896 
1897             if (chunk_type != FOURCC("enca")) {
1898                 // if the chunk type is enca, we'll get the type from the frma box later
1899                 AMediaFormat_setString(mLastTrack->meta,
1900                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1901                 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1902 
1903                 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1904                     AMediaFormat_setInt32(mLastTrack->meta,
1905                             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1906                     if (chunk_type == FOURCC("twos")) {
1907                         AMediaFormat_setInt32(mLastTrack->meta,
1908                                 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1909                     }
1910                 }
1911             }
1912             ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1913                    chunk, num_channels, sample_size, sample_rate);
1914             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1915             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1916 
1917             if (chunk_type == FOURCC("Opus")) {
1918                 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1919                 data_offset += sizeof(buffer);
1920                 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1921 
1922                 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1923                     opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1924                     return ERROR_MALFORMED;
1925                 }
1926                 // Read Opus Header
1927                 if (mDataSource->readAt(
1928                         data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1929                     return ERROR_IO;
1930                 }
1931 
1932                 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1933                 // http://wiki.xiph.org/OggOpus#ID_Header
1934                 strncpy((char *)opusInfo, "OpusHead", 8);
1935 
1936                 // Version shall be 0 as per mp4 Opus Specific Box
1937                 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1938                 if (opusInfo[8]) {
1939                     return ERROR_MALFORMED;
1940                 }
1941                 // Force version to 1 as per OpusHead definition
1942                 // (http://wiki.xiph.org/OggOpus#ID_Header)
1943                 opusInfo[8] = 1;
1944 
1945                 // Read Opus Specific Box values
1946                 size_t opusOffset = 10;
1947                 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1948                 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1949                 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1950 
1951                 // Convert Opus Specific Box values. ParseOpusHeader expects
1952                 // the values in LE, however MP4 stores these values as BE
1953                 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1954                 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1955                 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1956                 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1957 
1958                 static const int64_t kSeekPreRollNs = 80000000;  // Fixed 80 msec
1959                 static const int32_t kOpusSampleRate = 48000;
1960                 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1961 
1962                 AMediaFormat_setBuffer(mLastTrack->meta,
1963                             AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1964                 AMediaFormat_setBuffer(mLastTrack->meta,
1965                         AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1966                 AMediaFormat_setBuffer(mLastTrack->meta,
1967                         AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1968 
1969                 data_offset += opusInfoSize;
1970                 *offset = data_offset;
1971                 CHECK_EQ(*offset, stop_offset);
1972             }
1973 
1974             if (!mIsQT && chunk_type == FOURCC("alac")) {
1975                 data_offset += sizeof(buffer);
1976 
1977                 status_t err = parseALACSampleEntry(&data_offset);
1978                 if (err != OK) {
1979                     ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1980                     return err;
1981                 }
1982                 *offset = data_offset;
1983                 CHECK_EQ(*offset, stop_offset);
1984             }
1985 
1986             if (chunk_type == FOURCC("fLaC")) {
1987                 data_offset += sizeof(buffer);
1988                 *offset = data_offset;
1989             }
1990 
1991             while (*offset < stop_offset) {
1992                 status_t err = parseChunk(offset, depth + 1);
1993                 if (err != OK) {
1994                     return err;
1995                 }
1996             }
1997 
1998             if (*offset != stop_offset) {
1999                 return ERROR_MALFORMED;
2000             }
2001             break;
2002         }
2003         case FOURCC("mhaC"):
2004         {
2005             // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2006             constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2007                     + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2008                     + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2009             uint8_t mhac_header[mhac_header_size];
2010             off64_t data_offset = *offset;
2011 
2012             if (mLastTrack == NULL || chunk_size < sizeof(mhac_header)) {
2013                 return ERROR_MALFORMED;
2014             }
2015 
2016             if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2017                     < (ssize_t)sizeof(mhac_header)) {
2018                 return ERROR_IO;
2019             }
2020 
2021             //get mpegh3daProfileLevelIndication
2022             const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2023             AMediaFormat_setInt32(mLastTrack->meta,
2024                     AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2025                     mpegh3daProfileLevelIndication);
2026 
2027              //get referenceChannelLayout
2028             const uint32_t referenceChannelLayout = mhac_header[10];
2029             AMediaFormat_setInt32(mLastTrack->meta,
2030                     AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2031                     referenceChannelLayout);
2032 
2033             // get mpegh3daConfigLength
2034             const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2035             if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2036                 return ERROR_MALFORMED;
2037             }
2038 
2039             data_offset += sizeof(mhac_header);
2040             uint8_t mhac_config[mhac_config_size];
2041             if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2042                     < (ssize_t)sizeof(mhac_config)) {
2043                 return ERROR_IO;
2044             }
2045 
2046             AMediaFormat_setBuffer(mLastTrack->meta,
2047                     AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2048             data_offset += sizeof(mhac_config);
2049             *offset = data_offset;
2050             break;
2051         }
2052         case FOURCC("mhaP"):
2053         {
2054             // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2055             constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2056                     + 1 /* numCompatibleSets */;
2057 
2058             uint8_t mhap_header[mhap_header_size];
2059             off64_t data_offset = *offset;
2060 
2061             if (chunk_size < (ssize_t)mhap_header_size) {
2062                 return ERROR_MALFORMED;
2063             }
2064 
2065             if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2066                     < (ssize_t)sizeof(mhap_header)) {
2067                 return ERROR_IO;
2068             }
2069 
2070             // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2071             const uint32_t mhap_compatible_sets_size = mhap_header[8];
2072             if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2073                 return ERROR_MALFORMED;
2074             }
2075 
2076             data_offset += sizeof(mhap_header);
2077             uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2078             if (mDataSource->readAt(
2079                     data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2080                             < (ssize_t)sizeof(mhap_compatible_sets)) {
2081                 return ERROR_IO;
2082             }
2083 
2084             AMediaFormat_setBuffer(mLastTrack->meta,
2085                     AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2086                     mhap_compatible_sets, sizeof(mhap_compatible_sets));
2087             data_offset += sizeof(mhap_compatible_sets);
2088             *offset = data_offset;
2089             break;
2090         }
2091         case FOURCC("mp4v"):
2092         case FOURCC("encv"):
2093         case FOURCC("s263"):
2094         case FOURCC("H263"):
2095         case FOURCC("h263"):
2096         case FOURCC("avc1"):
2097         case FOURCC("hvc1"):
2098         case FOURCC("hev1"):
2099         case FOURCC("dvav"):
2100         case FOURCC("dva1"):
2101         case FOURCC("dvhe"):
2102         case FOURCC("dvh1"):
2103         case FOURCC("dav1"):
2104         case FOURCC("av01"):
2105         case FOURCC("vp09"):
2106         {
2107             uint8_t buffer[78];
2108             if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2109                 // Basic VideoSampleEntry size.
2110                 return ERROR_MALFORMED;
2111             }
2112 
2113             if (mDataSource->readAt(
2114                         data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2115                 return ERROR_IO;
2116             }
2117 
2118             // we can get data_ref_index value from U16_AT(&buffer[6])
2119             uint16_t width = U16_AT(&buffer[6 + 18]);
2120             uint16_t height = U16_AT(&buffer[6 + 20]);
2121 
2122             // The video sample is not standard-compliant if it has invalid dimension.
2123             // Use some default width and height value, and
2124             // let the decoder figure out the actual width and height (and thus
2125             // be prepared for INFO_FOMRAT_CHANGED event).
2126             if (width == 0)  width  = 352;
2127             if (height == 0) height = 288;
2128 
2129             // printf("*** coding='%s' width=%d height=%d\n",
2130             //        chunk, width, height);
2131 
2132             if (mLastTrack == NULL)
2133                 return ERROR_MALFORMED;
2134 
2135             if (chunk_type != FOURCC("encv")) {
2136                 // if the chunk type is encv, we'll get the type from the frma box later
2137                 AMediaFormat_setString(mLastTrack->meta,
2138                         AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2139             }
2140             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2141             AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2142 
2143             off64_t stop_offset = *offset + chunk_size;
2144             *offset = data_offset + sizeof(buffer);
2145             while (*offset < stop_offset) {
2146                 status_t err = parseChunk(offset, depth + 1);
2147                 if (err != OK) {
2148                     return err;
2149                 }
2150             }
2151 
2152             if (*offset != stop_offset) {
2153                 return ERROR_MALFORMED;
2154             }
2155             break;
2156         }
2157 
2158         case FOURCC("stco"):
2159         case FOURCC("co64"):
2160         {
2161             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2162                 return ERROR_MALFORMED;
2163             }
2164 
2165             status_t err =
2166                 mLastTrack->sampleTable->setChunkOffsetParams(
2167                         chunk_type, data_offset, chunk_data_size);
2168 
2169             *offset += chunk_size;
2170 
2171             if (err != OK) {
2172                 return err;
2173             }
2174 
2175             break;
2176         }
2177 
2178         case FOURCC("stsc"):
2179         {
2180             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2181                 return ERROR_MALFORMED;
2182 
2183             status_t err =
2184                 mLastTrack->sampleTable->setSampleToChunkParams(
2185                         data_offset, chunk_data_size);
2186 
2187             *offset += chunk_size;
2188 
2189             if (err != OK) {
2190                 return err;
2191             }
2192 
2193             break;
2194         }
2195 
2196         case FOURCC("stsz"):
2197         case FOURCC("stz2"):
2198         {
2199             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2200                 return ERROR_MALFORMED;
2201             }
2202 
2203             status_t err =
2204                 mLastTrack->sampleTable->setSampleSizeParams(
2205                         chunk_type, data_offset, chunk_data_size);
2206 
2207             *offset += chunk_size;
2208 
2209             if (err != OK) {
2210                 return err;
2211             }
2212 
2213             adjustRawDefaultFrameSize();
2214 
2215             size_t max_size;
2216             err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2217 
2218             if (err != OK) {
2219                 return err;
2220             }
2221 
2222             if (max_size != 0) {
2223                 // Assume that a given buffer only contains at most 10 chunks,
2224                 // each chunk originally prefixed with a 2 byte length will
2225                 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2226                 // and thus will grow by 2 bytes per chunk.
2227                 if (max_size > SIZE_MAX - 10 * 2) {
2228                     ALOGE("max sample size too big: %zu", max_size);
2229                     return ERROR_MALFORMED;
2230                 }
2231                 AMediaFormat_setInt32(mLastTrack->meta,
2232                         AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2233             } else {
2234                 // No size was specified. Pick a conservatively large size.
2235                 uint32_t width, height;
2236                 if (!AMediaFormat_getInt32(mLastTrack->meta,
2237                         AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2238                     !AMediaFormat_getInt32(mLastTrack->meta,
2239                             AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2240                     ALOGE("No width or height, assuming worst case 1080p");
2241                     width = 1920;
2242                     height = 1080;
2243                 } else {
2244                     // A resolution was specified, check that it's not too big. The values below
2245                     // were chosen so that the calculations below don't cause overflows, they're
2246                     // not indicating that resolutions up to 32kx32k are actually supported.
2247                     if (width > 32768 || height > 32768) {
2248                         ALOGE("can't support %u x %u video", width, height);
2249                         return ERROR_MALFORMED;
2250                     }
2251                 }
2252 
2253                 const char *mime;
2254                 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2255                 if (!strncmp(mime, "audio/", 6)) {
2256                     // for audio, use 128KB
2257                     max_size = 1024 * 128;
2258                 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2259                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2260                         || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2261                     // AVC & HEVC requires compression ratio of at least 2, and uses
2262                     // macroblocks
2263                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2264                 } else {
2265                     // For all other formats there is no minimum compression
2266                     // ratio. Use compression ratio of 1.
2267                     max_size = width * height * 3 / 2;
2268                 }
2269                 // HACK: allow 10% overhead
2270                 // TODO: read sample size from traf atom for fragmented MPEG4.
2271                 max_size += max_size / 10;
2272                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2273             }
2274 
2275             // NOTE: setting another piece of metadata invalidates any pointers (such as the
2276             // mimetype) previously obtained, so don't cache them.
2277             const char *mime;
2278             CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2279             // Calculate average frame rate.
2280             if (!strncasecmp("video/", mime, 6)) {
2281                 size_t nSamples = mLastTrack->sampleTable->countSamples();
2282                 if (nSamples == 0) {
2283                     int32_t trackId;
2284                     if (AMediaFormat_getInt32(mLastTrack->meta,
2285                             AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2286                         for (size_t i = 0; i < mTrex.size(); i++) {
2287                             Trex *t = &mTrex.editItemAt(i);
2288                             if (t->track_ID == (uint32_t) trackId) {
2289                                 if (t->default_sample_duration > 0) {
2290                                     int32_t frameRate =
2291                                             mLastTrack->timescale / t->default_sample_duration;
2292                                     AMediaFormat_setInt32(mLastTrack->meta,
2293                                             AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2294                                 }
2295                                 break;
2296                             }
2297                         }
2298                     }
2299                 } else {
2300                     int64_t durationUs;
2301                     if (AMediaFormat_getInt64(mLastTrack->meta,
2302                             AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2303                         if (durationUs > 0) {
2304                             int32_t frameRate = (nSamples * 1000000LL +
2305                                         (durationUs >> 1)) / durationUs;
2306                             AMediaFormat_setInt32(mLastTrack->meta,
2307                                     AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2308                         }
2309                     }
2310                     ALOGV("setting frame count %zu", nSamples);
2311                     AMediaFormat_setInt32(mLastTrack->meta,
2312                             AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2313                 }
2314             }
2315 
2316             break;
2317         }
2318 
2319         case FOURCC("stts"):
2320         {
2321             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2322                 return ERROR_MALFORMED;
2323 
2324             *offset += chunk_size;
2325 
2326             if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2327                 char chunk[5];
2328                 MakeFourCCString(mPath[depth - 1], chunk);
2329                 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2330                 break;
2331             }
2332 
2333             status_t err =
2334                 mLastTrack->sampleTable->setTimeToSampleParams(
2335                         data_offset, chunk_data_size);
2336 
2337             if (err != OK) {
2338                 return err;
2339             }
2340 
2341             break;
2342         }
2343 
2344         case FOURCC("ctts"):
2345         {
2346             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2347                 return ERROR_MALFORMED;
2348 
2349             *offset += chunk_size;
2350 
2351             status_t err =
2352                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2353                         data_offset, chunk_data_size);
2354 
2355             if (err != OK) {
2356                 return err;
2357             }
2358 
2359             break;
2360         }
2361 
2362         case FOURCC("stss"):
2363         {
2364             if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2365                 return ERROR_MALFORMED;
2366 
2367             *offset += chunk_size;
2368 
2369             status_t err =
2370                 mLastTrack->sampleTable->setSyncSampleParams(
2371                         data_offset, chunk_data_size);
2372 
2373             if (err != OK) {
2374                 return err;
2375             }
2376 
2377             break;
2378         }
2379 
2380         // \xA9xyz
2381         case FOURCC("\251xyz"):
2382         {
2383             *offset += chunk_size;
2384 
2385             // Best case the total data length inside "\xA9xyz" box would
2386             // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2387             // where "\x00\x05" is the text string length with value = 5,
2388             // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2389             // location (string) value with longitude = 0 and latitude = 0.
2390             // Since some devices encountered in the wild omit the trailing
2391             // slash, we'll allow that.
2392             if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2393                 return ERROR_MALFORMED;
2394             }
2395 
2396             uint16_t len;
2397             if (!mDataSource->getUInt16(data_offset, &len)) {
2398                 return ERROR_IO;
2399             }
2400 
2401             // allow "+0+0" without trailing slash
2402             if (len < 4 || len > chunk_data_size - 4) {
2403                 return ERROR_MALFORMED;
2404             }
2405             // The location string following the language code is formatted
2406             // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2407             // Allocate 2 extra bytes, in case we need to add a trailing slash,
2408             // and to add a terminating 0.
2409             std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2410             if (!buffer) {
2411                 return NO_MEMORY;
2412             }
2413 
2414             if (mDataSource->readAt(
2415                         data_offset + 4, &buffer[0], len) < len) {
2416                 return ERROR_IO;
2417             }
2418 
2419             len = strlen(&buffer[0]);
2420             if (len < 4) {
2421                 return ERROR_MALFORMED;
2422             }
2423             // Add a trailing slash if there wasn't one.
2424             if (buffer[len - 1] != '/') {
2425                 buffer[len] = '/';
2426             }
2427             AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2428             break;
2429         }
2430 
2431         case FOURCC("esds"):
2432         {
2433             *offset += chunk_size;
2434 
2435             if (chunk_data_size < 4) {
2436                 return ERROR_MALFORMED;
2437             }
2438 
2439             auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2440             uint8_t *buffer = tmp.get();
2441             if (buffer == NULL) {
2442                 return -ENOMEM;
2443             }
2444 
2445             if (mDataSource->readAt(
2446                         data_offset, buffer, chunk_data_size) < chunk_data_size) {
2447                 return ERROR_IO;
2448             }
2449 
2450             if (U32_AT(buffer) != 0) {
2451                 // Should be version 0, flags 0.
2452                 return ERROR_MALFORMED;
2453             }
2454 
2455             if (mLastTrack == NULL)
2456                 return ERROR_MALFORMED;
2457 
2458             AMediaFormat_setBuffer(mLastTrack->meta,
2459                     AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2460 
2461             if (mPath.size() >= 2
2462                     && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2463                 // Information from the ESDS must be relied on for proper
2464                 // setup of sample rate and channel count for MPEG4 Audio.
2465                 // The generic header appears to only contain generic
2466                 // information...
2467 
2468                 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2469                         &buffer[4], chunk_data_size - 4);
2470 
2471                 if (err != OK) {
2472                     return err;
2473                 }
2474             }
2475             if (mPath.size() >= 2
2476                     && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2477                 // Check if the video is MPEG2
2478                 ESDS esds(&buffer[4], chunk_data_size - 4);
2479 
2480                 uint8_t objectTypeIndication;
2481                 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2482                     if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2483                         AMediaFormat_setString(mLastTrack->meta,
2484                                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2485                     }
2486                 }
2487             }
2488             break;
2489         }
2490 
2491         case FOURCC("btrt"):
2492         {
2493             *offset += chunk_size;
2494             if (mLastTrack == NULL) {
2495                 return ERROR_MALFORMED;
2496             }
2497 
2498             uint8_t buffer[12];
2499             if (chunk_data_size != sizeof(buffer)) {
2500                 return ERROR_MALFORMED;
2501             }
2502 
2503             if (mDataSource->readAt(
2504                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2505                 return ERROR_IO;
2506             }
2507 
2508             uint32_t maxBitrate = U32_AT(&buffer[4]);
2509             uint32_t avgBitrate = U32_AT(&buffer[8]);
2510             if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2511                 AMediaFormat_setInt32(mLastTrack->meta,
2512                         AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2513             }
2514             if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2515                 AMediaFormat_setInt32(mLastTrack->meta,
2516                         AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2517             }
2518             break;
2519         }
2520 
2521         case FOURCC("dfLa"):
2522         {
2523             *offset += chunk_size;
2524 
2525             // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
2526             // 4 for mediaType, 4 for blockType and BlockLen, 34 for metadata
2527             uint8_t flacInfo[4 + 4 + 34];
2528 
2529             if (chunk_data_size != sizeof(flacInfo)) {
2530                 return ERROR_MALFORMED;
2531             }
2532 
2533             data_offset += 4;
2534             size_t flacOffset = 4;
2535             // Add flaC header mediaType to CSD
2536             strncpy((char *)flacInfo, "fLaC", 4);
2537 
2538             ssize_t bytesToRead = sizeof(flacInfo) - flacOffset;
2539             if (mDataSource->readAt(
2540                     data_offset, flacInfo + flacOffset, bytesToRead) < bytesToRead) {
2541                 return ERROR_IO;
2542             }
2543 
2544             data_offset += bytesToRead;
2545             AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
2546                                     sizeof(flacInfo));
2547             break;
2548         }
2549 
2550         case FOURCC("avcC"):
2551         {
2552             *offset += chunk_size;
2553 
2554             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2555 
2556             if (buffer.get() == NULL) {
2557                 ALOGE("b/28471206");
2558                 return NO_MEMORY;
2559             }
2560 
2561             if (mDataSource->readAt(
2562                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2563                 return ERROR_IO;
2564             }
2565 
2566             if (mLastTrack == NULL)
2567                 return ERROR_MALFORMED;
2568 
2569             AMediaFormat_setBuffer(mLastTrack->meta,
2570                     AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2571 
2572             break;
2573         }
2574         case FOURCC("hvcC"):
2575         {
2576             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2577 
2578             if (buffer.get() == NULL) {
2579                 ALOGE("b/28471206");
2580                 return NO_MEMORY;
2581             }
2582 
2583             if (mDataSource->readAt(
2584                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2585                 return ERROR_IO;
2586             }
2587 
2588             if (mLastTrack == NULL)
2589                 return ERROR_MALFORMED;
2590 
2591             AMediaFormat_setBuffer(mLastTrack->meta,
2592                     AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2593 
2594             *offset += chunk_size;
2595             break;
2596         }
2597 
2598         case FOURCC("vpcC"):
2599         case FOURCC("av1C"):
2600         {
2601             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2602 
2603             if (buffer.get() == NULL) {
2604                 ALOGE("b/28471206");
2605                 return NO_MEMORY;
2606             }
2607 
2608             if (mDataSource->readAt(
2609                         data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2610                 return ERROR_IO;
2611             }
2612 
2613             if (mLastTrack == NULL)
2614                 return ERROR_MALFORMED;
2615 
2616             AMediaFormat_setBuffer(mLastTrack->meta,
2617                    AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2618 
2619             *offset += chunk_size;
2620             break;
2621         }
2622 
2623         case FOURCC("dvcC"):
2624         case FOURCC("dvvC"):
2625         case FOURCC("dvwC"):
2626         {
2627             if (chunk_data_size != 24) {
2628                 return ERROR_MALFORMED;
2629             }
2630 
2631             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2632 
2633             if (buffer.get() == NULL) {
2634                 ALOGE("b/28471206");
2635                 return NO_MEMORY;
2636             }
2637 
2638             if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2639                 return ERROR_IO;
2640             }
2641 
2642             if (mLastTrack == NULL)
2643                 return ERROR_MALFORMED;
2644 
2645             AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2646                                     buffer.get(), chunk_data_size);
2647             AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2648                                    MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2649 
2650             *offset += chunk_size;
2651             break;
2652         }
2653 
2654         case FOURCC("d263"):
2655         {
2656             *offset += chunk_size;
2657             /*
2658              * d263 contains a fixed 7 bytes part:
2659              *   vendor - 4 bytes
2660              *   version - 1 byte
2661              *   level - 1 byte
2662              *   profile - 1 byte
2663              * optionally, "d263" box itself may contain a 16-byte
2664              * bit rate box (bitr)
2665              *   average bit rate - 4 bytes
2666              *   max bit rate - 4 bytes
2667              */
2668             char buffer[23];
2669             if (chunk_data_size != 7 &&
2670                 chunk_data_size != 23) {
2671                 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2672                 return ERROR_MALFORMED;
2673             }
2674 
2675             if (mDataSource->readAt(
2676                     data_offset, buffer, chunk_data_size) < chunk_data_size) {
2677                 return ERROR_IO;
2678             }
2679 
2680             if (mLastTrack == NULL)
2681                 return ERROR_MALFORMED;
2682 
2683             AMediaFormat_setBuffer(mLastTrack->meta,
2684                     AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2685 
2686             break;
2687         }
2688 
2689         case FOURCC("meta"):
2690         {
2691             off64_t stop_offset = *offset + chunk_size;
2692             *offset = data_offset;
2693             bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2694             if (!isParsingMetaKeys) {
2695                 uint8_t buffer[4];
2696                 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2697                     *offset = stop_offset;
2698                     return ERROR_MALFORMED;
2699                 }
2700 
2701                 if (mDataSource->readAt(
2702                             data_offset, buffer, 4) < 4) {
2703                     *offset = stop_offset;
2704                     return ERROR_IO;
2705                 }
2706 
2707                 if (U32_AT(buffer) != 0) {
2708                     // Should be version 0, flags 0.
2709 
2710                     // If it's not, let's assume this is one of those
2711                     // apparently malformed chunks that don't have flags
2712                     // and completely different semantics than what's
2713                     // in the MPEG4 specs and skip it.
2714                     *offset = stop_offset;
2715                     return OK;
2716                 }
2717                 *offset +=  sizeof(buffer);
2718             }
2719 
2720             while (*offset < stop_offset) {
2721                 status_t err = parseChunk(offset, depth + 1);
2722                 if (err != OK) {
2723                     return err;
2724                 }
2725             }
2726 
2727             if (*offset != stop_offset) {
2728                 return ERROR_MALFORMED;
2729             }
2730             break;
2731         }
2732 
2733         case FOURCC("iloc"):
2734         case FOURCC("iinf"):
2735         case FOURCC("iprp"):
2736         case FOURCC("pitm"):
2737         case FOURCC("idat"):
2738         case FOURCC("iref"):
2739         case FOURCC("ipro"):
2740         {
2741             if (mIsHeif || mIsAvif) {
2742                 if (mItemTable == NULL) {
2743                     mItemTable = new ItemTable(mDataSource, mIsHeif);
2744                 }
2745                 status_t err = mItemTable->parse(
2746                         chunk_type, data_offset, chunk_data_size);
2747                 if (err != OK) {
2748                     return err;
2749                 }
2750             }
2751             *offset += chunk_size;
2752             break;
2753         }
2754 
2755         case FOURCC("mean"):
2756         case FOURCC("name"):
2757         case FOURCC("data"):
2758         {
2759             *offset += chunk_size;
2760 
2761             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2762                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2763 
2764                 if (err != OK) {
2765                     return err;
2766                 }
2767             }
2768 
2769             break;
2770         }
2771 
2772         case FOURCC("mvhd"):
2773         {
2774             *offset += chunk_size;
2775 
2776             if (depth != 1) {
2777                 ALOGE("mvhd: depth %d", depth);
2778                 return ERROR_MALFORMED;
2779             }
2780             if (chunk_data_size < 32) {
2781                 return ERROR_MALFORMED;
2782             }
2783 
2784             uint8_t header[32];
2785             if (mDataSource->readAt(
2786                         data_offset, header, sizeof(header))
2787                     < (ssize_t)sizeof(header)) {
2788                 return ERROR_IO;
2789             }
2790 
2791             uint64_t creationTime;
2792             uint64_t duration = 0;
2793             if (header[0] == 1) {
2794                 creationTime = U64_AT(&header[4]);
2795                 mHeaderTimescale = U32_AT(&header[20]);
2796                 duration = U64_AT(&header[24]);
2797                 if (duration == 0xffffffffffffffff) {
2798                     duration = 0;
2799                 }
2800             } else if (header[0] != 0) {
2801                 return ERROR_MALFORMED;
2802             } else {
2803                 creationTime = U32_AT(&header[4]);
2804                 mHeaderTimescale = U32_AT(&header[12]);
2805                 uint32_t d32 = U32_AT(&header[16]);
2806                 if (d32 == 0xffffffff) {
2807                     d32 = 0;
2808                 }
2809                 duration = d32;
2810             }
2811             if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2812                 AMediaFormat_setInt64(mFileMetaData,
2813                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2814             }
2815 
2816             String8 s;
2817             if (convertTimeToDate(creationTime, &s)) {
2818                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.c_str());
2819             }
2820 
2821             break;
2822         }
2823 
2824         case FOURCC("mehd"):
2825         {
2826             *offset += chunk_size;
2827 
2828             if (chunk_data_size < 8) {
2829                 return ERROR_MALFORMED;
2830             }
2831 
2832             uint8_t flags[4];
2833             if (mDataSource->readAt(
2834                         data_offset, flags, sizeof(flags))
2835                     < (ssize_t)sizeof(flags)) {
2836                 return ERROR_IO;
2837             }
2838 
2839             uint64_t duration = 0;
2840             if (flags[0] == 1) {
2841                 // 64 bit
2842                 if (chunk_data_size < 12) {
2843                     return ERROR_MALFORMED;
2844                 }
2845                 mDataSource->getUInt64(data_offset + 4, &duration);
2846                 if (duration == 0xffffffffffffffff) {
2847                     duration = 0;
2848                 }
2849             } else if (flags[0] == 0) {
2850                 // 32 bit
2851                 uint32_t d32;
2852                 mDataSource->getUInt32(data_offset + 4, &d32);
2853                 if (d32 == 0xffffffff) {
2854                     d32 = 0;
2855                 }
2856                 duration = d32;
2857             } else {
2858                 return ERROR_MALFORMED;
2859             }
2860 
2861             if (duration != 0 && mHeaderTimescale != 0) {
2862                 AMediaFormat_setInt64(mFileMetaData,
2863                         AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2864             }
2865 
2866             break;
2867         }
2868 
2869         case FOURCC("mdat"):
2870         {
2871             mMdatFound = true;
2872 
2873             *offset += chunk_size;
2874             break;
2875         }
2876 
2877         case FOURCC("hdlr"):
2878         {
2879             *offset += chunk_size;
2880 
2881             if (underQTMetaPath(mPath, 3)) {
2882                 break;
2883             }
2884 
2885             uint32_t buffer;
2886             if (mDataSource->readAt(
2887                         data_offset + 8, &buffer, 4) < 4) {
2888                 return ERROR_IO;
2889             }
2890 
2891             uint32_t type = ntohl(buffer);
2892             // For the 3GPP file format, the handler-type within the 'hdlr' box
2893             // shall be 'text'. We also want to support 'sbtl' handler type
2894             // for a practical reason as various MPEG4 containers use it.
2895             if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2896                 if (mLastTrack != NULL) {
2897                     AMediaFormat_setString(mLastTrack->meta,
2898                             AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2899                 }
2900             }
2901 
2902             break;
2903         }
2904 
2905         case FOURCC("keys"):
2906         {
2907             *offset += chunk_size;
2908 
2909             if (underQTMetaPath(mPath, 3)) {
2910                 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2911                 if (err != OK) {
2912                     return err;
2913                 }
2914             }
2915             break;
2916         }
2917 
2918         case FOURCC("trex"):
2919         {
2920             *offset += chunk_size;
2921 
2922             if (chunk_data_size < 24) {
2923                 return ERROR_IO;
2924             }
2925             Trex trex;
2926             if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2927                 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2928                 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2929                 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2930                 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2931                 return ERROR_IO;
2932             }
2933             mTrex.add(trex);
2934             break;
2935         }
2936 
2937         case FOURCC("tx3g"):
2938         {
2939             if (mLastTrack == NULL)
2940                 return ERROR_MALFORMED;
2941 
2942             // complain about ridiculous chunks
2943             if (chunk_size > kMaxAtomSize) {
2944                 return ERROR_MALFORMED;
2945             }
2946 
2947             // complain about empty atoms
2948             if (chunk_data_size <= 0) {
2949                 ALOGE("b/124330204");
2950                 android_errorWriteLog(0x534e4554, "124330204");
2951                 return ERROR_MALFORMED;
2952             }
2953 
2954             // should fill buffer based on "data_offset" and "chunk_data_size"
2955             // instead of *offset and chunk_size;
2956             // but we've been feeding the extra data to consumers for multiple releases and
2957             // if those apps are compensating for it, we'd break them with such a change
2958             //
2959 
2960             if (mLastTrack->mTx3gBuffer == NULL) {
2961                 mLastTrack->mTx3gSize = 0;
2962                 mLastTrack->mTx3gFilled = 0;
2963             }
2964             if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2965                 size_t growth = kTx3gGrowth;
2966                 if (growth < chunk_size) {
2967                     growth = chunk_size;
2968                 }
2969                 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2970                 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2971                     ALOGE("b/124330204 - too much space");
2972                     android_errorWriteLog(0x534e4554, "124330204");
2973                     return ERROR_MALFORMED;
2974                 }
2975                 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2976                                                 mLastTrack->mTx3gSize + growth);
2977                 if (updated == NULL) {
2978                     return ERROR_MALFORMED;
2979                 }
2980                 mLastTrack->mTx3gBuffer = updated;
2981                 mLastTrack->mTx3gSize += growth;
2982             }
2983 
2984             if ((size_t)(mDataSource->readAt(*offset,
2985                                              mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2986                                              chunk_size))
2987                     < chunk_size) {
2988 
2989                 // advance read pointer so we don't end up reading this again
2990                 *offset += chunk_size;
2991                 return ERROR_IO;
2992             }
2993 
2994             mLastTrack->mTx3gFilled += chunk_size;
2995             *offset += chunk_size;
2996             break;
2997         }
2998 
2999         case FOURCC("covr"):
3000         {
3001             *offset += chunk_size;
3002 
3003             ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
3004                   chunk_data_size, data_offset);
3005 
3006             if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
3007                 return ERROR_MALFORMED;
3008             }
3009             auto buffer = heapbuffer<uint8_t>(chunk_data_size);
3010             if (buffer.get() == NULL) {
3011                 ALOGE("b/28471206");
3012                 return NO_MEMORY;
3013             }
3014             if (mDataSource->readAt(
3015                 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
3016                 return ERROR_IO;
3017             }
3018             const int kSkipBytesOfDataBox = 16;
3019             if (chunk_data_size <= kSkipBytesOfDataBox) {
3020                 return ERROR_MALFORMED;
3021             }
3022 
3023             AMediaFormat_setBuffer(mFileMetaData,
3024                 AMEDIAFORMAT_KEY_ALBUMART,
3025                 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
3026 
3027             break;
3028         }
3029 
3030         case FOURCC("colr"):
3031         {
3032             *offset += chunk_size;
3033             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3034             // ignore otherwise
3035             if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3036                 status_t err = parseColorInfo(data_offset, chunk_data_size);
3037                 if (err != OK) {
3038                     return err;
3039                 }
3040             }
3041 
3042             break;
3043         }
3044 
3045         case FOURCC("pasp"):
3046         {
3047             *offset += chunk_size;
3048             // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3049             // ignore otherwise
3050             if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3051                 status_t err = parsePaspBox(data_offset, chunk_data_size);
3052                 if (err != OK) {
3053                     return err;
3054                 }
3055             }
3056 
3057             break;
3058         }
3059 
3060         case FOURCC("titl"):
3061         case FOURCC("perf"):
3062         case FOURCC("auth"):
3063         case FOURCC("gnre"):
3064         case FOURCC("albm"):
3065         case FOURCC("yrrc"):
3066         {
3067             *offset += chunk_size;
3068 
3069             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3070 
3071             if (err != OK) {
3072                 return err;
3073             }
3074 
3075             break;
3076         }
3077 
3078         case FOURCC("ID32"):
3079         {
3080             *offset += chunk_size;
3081 
3082             if (chunk_data_size < 6) {
3083                 return ERROR_MALFORMED;
3084             }
3085 
3086             parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3087 
3088             break;
3089         }
3090 
3091         case FOURCC("----"):
3092         {
3093             mLastCommentMean.clear();
3094             mLastCommentName.clear();
3095             mLastCommentData.clear();
3096             *offset += chunk_size;
3097             break;
3098         }
3099 
3100         case FOURCC("sidx"):
3101         {
3102             status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3103             if (err != OK) {
3104                 return err;
3105             }
3106             *offset += chunk_size;
3107             return UNKNOWN_ERROR; // stop parsing after sidx
3108         }
3109 
3110         case FOURCC("ac-3"):
3111         {
3112             *offset += chunk_size;
3113             // bypass ac-3 if parse fail
3114             if (parseAC3SpecificBox(data_offset) != OK) {
3115                 if (mLastTrack != NULL) {
3116                     ALOGW("Fail to parse ac-3");
3117                     mLastTrack->skipTrack = true;
3118                 }
3119             }
3120             return OK;
3121         }
3122 
3123         case FOURCC("ec-3"):
3124         {
3125             *offset += chunk_size;
3126             // bypass ec-3 if parse fail
3127             if (parseEAC3SpecificBox(data_offset) != OK) {
3128                 if (mLastTrack != NULL) {
3129                     ALOGW("Fail to parse ec-3");
3130                     mLastTrack->skipTrack = true;
3131                 }
3132             }
3133             return OK;
3134         }
3135 
3136         case FOURCC("ac-4"):
3137         {
3138             *offset += chunk_size;
3139             // bypass ac-4 if parse fail
3140             if (parseAC4SpecificBox(data_offset) != OK) {
3141                 if (mLastTrack != NULL) {
3142                     ALOGW("Fail to parse ac-4");
3143                     mLastTrack->skipTrack = true;
3144                 }
3145             }
3146             return OK;
3147         }
3148 
3149         case FOURCC("ftyp"):
3150         {
3151             if (chunk_data_size < 8 || depth != 0) {
3152                 return ERROR_MALFORMED;
3153             }
3154 
3155             off64_t stop_offset = *offset + chunk_size;
3156             uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3157             std::set<uint32_t> brandSet;
3158             for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3159                 if (i == 1) {
3160                     // Skip this index, it refers to the minorVersion,
3161                     // not a brand.
3162                     continue;
3163                 }
3164 
3165                 uint32_t brand;
3166                 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3167                     return ERROR_MALFORMED;
3168                 }
3169 
3170                 brand = ntohl(brand);
3171                 brandSet.insert(brand);
3172             }
3173 
3174             if (brandSet.count(FOURCC("qt  ")) > 0) {
3175                 mIsQT = true;
3176             } else {
3177                 if (brandSet.count(FOURCC("mif1")) > 0
3178                  && brandSet.count(FOURCC("heic")) > 0) {
3179                     ALOGV("identified HEIF image");
3180 
3181                     mIsHeif = true;
3182                     brandSet.erase(FOURCC("mif1"));
3183                     brandSet.erase(FOURCC("heic"));
3184                 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3185                        brandSet.count(FOURCC("avis")) > 0) {
3186                     ALOGV("identified AVIF image");
3187                     mIsAvif = true;
3188                     brandSet.erase(FOURCC("avif"));
3189                     brandSet.erase(FOURCC("avis"));
3190                 }
3191 
3192                 if (!brandSet.empty()) {
3193                     // This means that the file should have moov box.
3194                     // It could be any iso files (mp4, heifs, etc.)
3195                     mHasMoovBox = true;
3196                     if (mIsHeif || mIsAvif) {
3197                         ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3198                     }
3199                 }
3200             }
3201 
3202             *offset = stop_offset;
3203 
3204             break;
3205         }
3206 
3207         default:
3208         {
3209             // check if we're parsing 'ilst' for meta keys
3210             // if so, treat type as a number (key-id).
3211             if (underQTMetaPath(mPath, 3)) {
3212                 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3213                 if (err != OK) {
3214                     return err;
3215                 }
3216             }
3217 
3218             *offset += chunk_size;
3219             break;
3220         }
3221     }
3222 
3223     return OK;
3224 }
3225 
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3226 status_t MPEG4Extractor::parseChannelCountSampleRate(
3227         off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3228     // skip 16 bytes:
3229     //  + 6-byte reserved,
3230     //  + 2-byte data reference index,
3231     //  + 8-byte reserved
3232     *offset += 16;
3233     if (!mDataSource->getUInt16(*offset, channelCount)) {
3234         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3235         return ERROR_MALFORMED;
3236     }
3237     // skip 8 bytes:
3238     //  + 2-byte channelCount,
3239     //  + 2-byte sample size,
3240     //  + 4-byte reserved
3241     *offset += 8;
3242     if (!mDataSource->getUInt16(*offset, sampleRate)) {
3243         ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3244         return ERROR_MALFORMED;
3245     }
3246     // skip 4 bytes:
3247     //  + 2-byte sampleRate,
3248     //  + 2-byte reserved
3249     *offset += 4;
3250     return OK;
3251 }
3252 
parseAC4SpecificBox(off64_t offset)3253 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3254     if (mLastTrack == NULL) {
3255         return ERROR_MALFORMED;
3256     }
3257 
3258     uint16_t sampleRate, channelCount;
3259     status_t status;
3260     if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3261         return status;
3262     }
3263     uint32_t size;
3264     // + 4-byte size
3265     // + 4-byte type
3266     // + 3-byte payload
3267     const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3268     if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3269         ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3270         return ERROR_MALFORMED;
3271     }
3272 
3273     // + 4-byte size
3274     offset += 4;
3275     uint32_t type;
3276     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3277         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3278         return ERROR_MALFORMED;
3279     }
3280 
3281     // + 4-byte type
3282     offset += 4;
3283     const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3284     uint8_t chunk[kAC4SpecificBoxPayloadSize];
3285     ssize_t dsiSize = size - 8; // size of box - size and type fields
3286     if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3287         mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3288         ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3289         return ERROR_MALFORMED;
3290     }
3291     // + size-byte payload
3292     offset += dsiSize;
3293     ABitReader br(chunk, dsiSize);
3294     AC4DSIParser parser(br);
3295     if (!parser.parse()){
3296         ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3297         return ERROR_MALFORMED;
3298     }
3299 
3300     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3301     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3302     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3303 
3304     AudioPresentationCollection presentations;
3305     // translate the AC4 presentation information to audio presentations for this track
3306     AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3307     if (!ac4Presentations.empty()) {
3308         for (const auto& ac4Presentation : ac4Presentations) {
3309             auto& presentation = ac4Presentation.second;
3310             if (!presentation.mEnabled) {
3311                 continue;
3312             }
3313             AudioPresentationV1 ap;
3314             ap.mPresentationId = presentation.mGroupIndex;
3315             ap.mProgramId = presentation.mProgramID;
3316             ap.mLanguage = presentation.mLanguage;
3317             if (presentation.mPreVirtualized) {
3318                 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3319             } else {
3320                 switch (presentation.mChannelMode) {
3321                     case AC4Parser::AC4Presentation::kChannelMode_Mono:
3322                     case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3323                         ap.mMasteringIndication = MASTERED_FOR_STEREO;
3324                         break;
3325                     case AC4Parser::AC4Presentation::kChannelMode_3_0:
3326                     case AC4Parser::AC4Presentation::kChannelMode_5_0:
3327                     case AC4Parser::AC4Presentation::kChannelMode_5_1:
3328                     case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3329                     case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3330                     case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3331                     case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3332                         ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3333                         break;
3334                     case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3335                     case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3336                     case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3337                     case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3338                     case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3339                     case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3340                     case AC4Parser::AC4Presentation::kChannelMode_22_2:
3341                         ap.mMasteringIndication = MASTERED_FOR_3D;
3342                         break;
3343                     default:
3344                         ALOGE("Invalid channel mode in AC4 presentation");
3345                         return ERROR_MALFORMED;
3346                 }
3347             }
3348 
3349             ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3350                     AC4Parser::AC4Presentation::kVisuallyImpaired);
3351             ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3352                     AC4Parser::AC4Presentation::kVoiceOver);
3353             ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3354             if (!ap.mLanguage.empty()) {
3355                 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3356             }
3357             presentations.push_back(std::move(ap));
3358         }
3359     }
3360 
3361     if (presentations.empty()) {
3362         // Clear audio presentation info in metadata.
3363         AMediaFormat_setBuffer(
3364                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3365     } else {
3366         std::ostringstream outStream(std::ios::out);
3367         serializeAudioPresentations(presentations, &outStream);
3368         AMediaFormat_setBuffer(
3369                 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3370                 outStream.str().data(), outStream.str().size());
3371     }
3372     return OK;
3373 }
3374 
parseEAC3SpecificBox(off64_t offset)3375 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3376     if (mLastTrack == NULL) {
3377         return ERROR_MALFORMED;
3378     }
3379 
3380     uint16_t sampleRate, channels;
3381     status_t status;
3382     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3383         return status;
3384     }
3385     uint32_t size;
3386     // + 4-byte size
3387     // + 4-byte type
3388     // + 3-byte payload
3389     const uint32_t kEAC3SpecificBoxMinSize = 11;
3390     // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3391     // calculated from the required bits read below as well as the maximum number of independent
3392     // and dependant sub streams you can have
3393     const uint32_t kEAC3SpecificBoxMaxSize = 152;
3394     if (!mDataSource->getUInt32(offset, &size) ||
3395         size < kEAC3SpecificBoxMinSize ||
3396         size > kEAC3SpecificBoxMaxSize) {
3397         ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3398         return ERROR_MALFORMED;
3399     }
3400 
3401     offset += 4;
3402     uint32_t type;
3403     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3404         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3405         return ERROR_MALFORMED;
3406     }
3407 
3408     offset += 4;
3409     uint8_t* chunk = new (std::nothrow) uint8_t[size];
3410     if (chunk == NULL) {
3411         return ERROR_MALFORMED;
3412     }
3413 
3414     if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3415         ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3416         delete[] chunk;
3417         return ERROR_MALFORMED;
3418     }
3419 
3420     ABitReader br(chunk, size);
3421     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3422     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3423 
3424     if (br.numBitsLeft() < 16) {
3425         delete[] chunk;
3426         return ERROR_MALFORMED;
3427     }
3428     unsigned data_rate = br.getBits(13);
3429     ALOGV("EAC3 data rate = %d", data_rate);
3430 
3431     unsigned num_ind_sub = br.getBits(3) + 1;
3432     ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3433     if (br.numBitsLeft() < (num_ind_sub * 23)) {
3434         delete[] chunk;
3435         return ERROR_MALFORMED;
3436     }
3437 
3438     unsigned channelCount = 0;
3439     for (unsigned i = 0; i < num_ind_sub; i++) {
3440         unsigned fscod = br.getBits(2);
3441         if (fscod == 3) {
3442             ALOGE("Incorrect fscod (3) in EAC3 header");
3443             delete[] chunk;
3444             return ERROR_MALFORMED;
3445         }
3446         unsigned boxSampleRate = sampleRateTable[fscod];
3447         if (boxSampleRate != sampleRate) {
3448             ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3449                 boxSampleRate, sampleRate);
3450             delete[] chunk;
3451             return ERROR_MALFORMED;
3452         }
3453 
3454         unsigned bsid = br.getBits(5);
3455         if (bsid == 9 || bsid == 10) {
3456             ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3457         } else if (bsid > 16) {
3458             ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3459             delete[] chunk;
3460             return ERROR_MALFORMED;
3461         }
3462 
3463         // skip
3464         br.skipBits(2);
3465         unsigned bsmod = br.getBits(3);
3466         unsigned acmod = br.getBits(3);
3467         unsigned lfeon = br.getBits(1);
3468         // we currently only support the first stream
3469         if (i == 0)
3470             channelCount = channelCountTable[acmod] + lfeon;
3471         ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3472 
3473         br.skipBits(3);
3474         unsigned num_dep_sub = br.getBits(4);
3475         ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3476         if (num_dep_sub != 0) {
3477             if (br.numBitsLeft() < 9) {
3478                 delete[] chunk;
3479                 return ERROR_MALFORMED;
3480             }
3481             static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3482                 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3483             unsigned chan_loc = br.getBits(9);
3484             unsigned mask = 1;
3485             for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3486                 if ((chan_loc & mask) != 0) {
3487                     // we currently only support the first stream
3488                     if (i == 0) {
3489                         channelCount++;
3490                         // these are 2 channels in the mask
3491                         if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3492                             channelCount++;
3493                         }
3494                     }
3495                     ALOGV(" %s", chan_loc_tbl[j]);
3496                 }
3497             }
3498         } else {
3499             if (br.numBitsLeft() == 0) {
3500                 delete[] chunk;
3501                 return ERROR_MALFORMED;
3502             }
3503             br.skipBits(1);
3504         }
3505     }
3506 
3507     if (br.numBitsLeft() != 0) {
3508         if (br.numBitsLeft() < 8) {
3509             delete[] chunk;
3510             return ERROR_MALFORMED;
3511         }
3512         unsigned mask = br.getBits(8);
3513         for (unsigned i = 0; i < 8; i++) {
3514             if (((0x1 << i) & mask) == 0)
3515                 continue;
3516 
3517             if (br.numBitsLeft() < 8) {
3518                 delete[] chunk;
3519                 return ERROR_MALFORMED;
3520             }
3521             switch (i) {
3522                 case 0: {
3523                     unsigned complexity = br.getBits(8);
3524                     ALOGV("Found a JOC stream with complexity = %d", complexity);
3525                 }break;
3526                 default: {
3527                     br.skipBits(8);
3528                 }break;
3529             }
3530         }
3531     }
3532     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3533     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3534     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3535 
3536     delete[] chunk;
3537     return OK;
3538 }
3539 
parseAC3SpecificBox(off64_t offset)3540 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3541     if (mLastTrack == NULL) {
3542         return ERROR_MALFORMED;
3543     }
3544 
3545     uint16_t sampleRate, channels;
3546     status_t status;
3547     if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3548         return status;
3549     }
3550     uint32_t size;
3551     // + 4-byte size
3552     // + 4-byte type
3553     // + 3-byte payload
3554     const uint32_t kAC3SpecificBoxSize = 11;
3555     if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3556         ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3557         return ERROR_MALFORMED;
3558     }
3559 
3560     offset += 4;
3561     uint32_t type;
3562     if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3563         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3564         return ERROR_MALFORMED;
3565     }
3566 
3567     offset += 4;
3568     const uint32_t kAC3SpecificBoxPayloadSize = 3;
3569     uint8_t chunk[kAC3SpecificBoxPayloadSize];
3570     if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3571         ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3572         return ERROR_MALFORMED;
3573     }
3574 
3575     ABitReader br(chunk, sizeof(chunk));
3576     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3577     static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3578 
3579     unsigned fscod = br.getBits(2);
3580     if (fscod == 3) {
3581         ALOGE("Incorrect fscod (3) in AC3 header");
3582         return ERROR_MALFORMED;
3583     }
3584     unsigned boxSampleRate = sampleRateTable[fscod];
3585     if (boxSampleRate != sampleRate) {
3586         ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3587             boxSampleRate, sampleRate);
3588         return ERROR_MALFORMED;
3589     }
3590 
3591     unsigned bsid = br.getBits(5);
3592     if (bsid > 8) {
3593         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3594         return ERROR_MALFORMED;
3595     }
3596 
3597     // skip
3598     br.skipBits(3); // bsmod
3599 
3600     unsigned acmod = br.getBits(3);
3601     unsigned lfeon = br.getBits(1);
3602     unsigned channelCount = channelCountTable[acmod] + lfeon;
3603 
3604     AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3605     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3606     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3607     return OK;
3608 }
3609 
parseALACSampleEntry(off64_t * offset)3610 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3611     // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3612     // Store ALAC magic cookie (decoder needs it).
3613     uint8_t alacInfo[12];
3614     off64_t data_offset = *offset;
3615 
3616     if (mDataSource->readAt(
3617             data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3618         return ERROR_IO;
3619     }
3620     uint32_t size = U32_AT(&alacInfo[0]);
3621     if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3622             (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3623             (U32_AT(&alacInfo[8]) != 0)) {
3624         ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3625             size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3626         return ERROR_MALFORMED;
3627     }
3628     data_offset += sizeof(alacInfo);
3629     uint8_t cookie[size - sizeof(alacInfo)];
3630     if (mDataSource->readAt(
3631             data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3632         return ERROR_IO;
3633     }
3634 
3635     uint8_t bitsPerSample = cookie[5];
3636     AMediaFormat_setInt32(mLastTrack->meta,
3637             AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3638     AMediaFormat_setInt32(mLastTrack->meta,
3639             AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3640     AMediaFormat_setInt32(mLastTrack->meta,
3641             AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3642     AMediaFormat_setBuffer(mLastTrack->meta,
3643             AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3644     data_offset += sizeof(cookie);
3645     *offset = data_offset;
3646     return OK;
3647 }
3648 
parseSegmentIndex(off64_t offset,size_t size)3649 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3650   ALOGV("MPEG4Extractor::parseSegmentIndex");
3651 
3652     if (size < 12) {
3653       return -EINVAL;
3654     }
3655 
3656     uint32_t flags;
3657     if (!mDataSource->getUInt32(offset, &flags)) {
3658         return ERROR_MALFORMED;
3659     }
3660 
3661     uint32_t version = flags >> 24;
3662     flags &= 0xffffff;
3663 
3664     ALOGV("sidx version %d", version);
3665 
3666     uint32_t referenceId;
3667     if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3668         return ERROR_MALFORMED;
3669     }
3670 
3671     uint32_t timeScale;
3672     if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3673         return ERROR_MALFORMED;
3674     }
3675     ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3676     if (timeScale == 0)
3677         return ERROR_MALFORMED;
3678 
3679     uint64_t earliestPresentationTime;
3680     uint64_t firstOffset;
3681 
3682     offset += 12;
3683     size -= 12;
3684 
3685     if (version == 0) {
3686         if (size < 8) {
3687             return -EINVAL;
3688         }
3689         uint32_t tmp;
3690         if (!mDataSource->getUInt32(offset, &tmp)) {
3691             return ERROR_MALFORMED;
3692         }
3693         earliestPresentationTime = tmp;
3694         if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3695             return ERROR_MALFORMED;
3696         }
3697         firstOffset = tmp;
3698         offset += 8;
3699         size -= 8;
3700     } else {
3701         if (size < 16) {
3702             return -EINVAL;
3703         }
3704         if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3705             return ERROR_MALFORMED;
3706         }
3707         if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3708             return ERROR_MALFORMED;
3709         }
3710         offset += 16;
3711         size -= 16;
3712     }
3713     ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3714 
3715     if (size < 4) {
3716         return -EINVAL;
3717     }
3718 
3719     uint16_t referenceCount;
3720     if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3721         return ERROR_MALFORMED;
3722     }
3723     offset += 4;
3724     size -= 4;
3725     ALOGV("refcount: %d", referenceCount);
3726 
3727     if (size < referenceCount * 12) {
3728         return -EINVAL;
3729     }
3730 
3731     uint64_t total_duration = 0;
3732     for (unsigned int i = 0; i < referenceCount; i++) {
3733         uint32_t d1, d2, d3;
3734 
3735         if (!mDataSource->getUInt32(offset, &d1) ||     // size
3736             !mDataSource->getUInt32(offset + 4, &d2) || // duration
3737             !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3738             return ERROR_MALFORMED;
3739         }
3740 
3741         if (d1 & 0x80000000) {
3742             ALOGW("sub-sidx boxes not supported yet");
3743         }
3744         bool sap = d3 & 0x80000000;
3745         uint32_t saptype = (d3 >> 28) & 7;
3746         if (!sap || (saptype != 1 && saptype != 2)) {
3747             // type 1 and 2 are sync samples
3748             ALOGW("not a stream access point, or unsupported type: %08x", d3);
3749         }
3750         total_duration += d2;
3751         offset += 12;
3752         ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3753         SidxEntry se;
3754         se.mSize = d1 & 0x7fffffff;
3755         se.mDurationUs = 1000000LL * d2 / timeScale;
3756         mSidxEntries.add(se);
3757     }
3758 
3759     uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3760 
3761     if (mLastTrack == NULL)
3762         return ERROR_MALFORMED;
3763 
3764     int64_t metaDuration;
3765     if (!AMediaFormat_getInt64(mLastTrack->meta,
3766                 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3767         AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3768     }
3769     return OK;
3770 }
3771 
parseQTMetaKey(off64_t offset,size_t size)3772 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3773     if (size < 8) {
3774         return ERROR_MALFORMED;
3775     }
3776 
3777     uint32_t count;
3778     if (!mDataSource->getUInt32(offset + 4, &count)) {
3779         return ERROR_MALFORMED;
3780     }
3781 
3782     if (mMetaKeyMap.size() > 0) {
3783         ALOGW("'keys' atom seen again, discarding existing entries");
3784         mMetaKeyMap.clear();
3785     }
3786 
3787     off64_t keyOffset = offset + 8;
3788     off64_t stopOffset = offset + size;
3789     for (size_t i = 1; i <= count; i++) {
3790         if (keyOffset + 8 > stopOffset) {
3791             return ERROR_MALFORMED;
3792         }
3793 
3794         uint32_t keySize;
3795         if (!mDataSource->getUInt32(keyOffset, &keySize)
3796                 || keySize < 8
3797                 || keyOffset + keySize > stopOffset) {
3798             return ERROR_MALFORMED;
3799         }
3800 
3801         uint32_t type;
3802         if (!mDataSource->getUInt32(keyOffset + 4, &type)
3803                 || type != FOURCC("mdta")) {
3804             return ERROR_MALFORMED;
3805         }
3806 
3807         keySize -= 8;
3808         keyOffset += 8;
3809 
3810         auto keyData = heapbuffer<uint8_t>(keySize);
3811         if (keyData.get() == NULL) {
3812             return ERROR_MALFORMED;
3813         }
3814         if (mDataSource->readAt(
3815                 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3816             return ERROR_MALFORMED;
3817         }
3818 
3819         AString key((const char *)keyData.get(), keySize);
3820         mMetaKeyMap.add(i, key);
3821 
3822         keyOffset += keySize;
3823     }
3824     return OK;
3825 }
3826 
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3827 status_t MPEG4Extractor::parseQTMetaVal(
3828         int32_t keyId, off64_t offset, size_t size) {
3829     ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3830     if (index < 0) {
3831         // corresponding key is not present, ignore
3832         return ERROR_MALFORMED;
3833     }
3834 
3835     if (size <= 16) {
3836         return ERROR_MALFORMED;
3837     }
3838     uint32_t dataSize;
3839     if (!mDataSource->getUInt32(offset, &dataSize)
3840             || dataSize > size || dataSize <= 16) {
3841         return ERROR_MALFORMED;
3842     }
3843     uint32_t atomFourCC;
3844     if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3845             || atomFourCC != FOURCC("data")) {
3846         return ERROR_MALFORMED;
3847     }
3848     uint32_t dataType;
3849     if (!mDataSource->getUInt32(offset + 8, &dataType)
3850             || ((dataType & 0xff000000) != 0)) {
3851         // not well-known type
3852         return ERROR_MALFORMED;
3853     }
3854 
3855     dataSize -= 16;
3856     offset += 16;
3857 
3858     if (dataType == 23 && dataSize >= 4) {
3859         // BE Float32
3860         uint32_t val;
3861         if (!mDataSource->getUInt32(offset, &val)) {
3862             return ERROR_MALFORMED;
3863         }
3864         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3865             AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3866         }
3867     } else if (dataType == 67 && dataSize >= 4) {
3868         // BE signed int32
3869         uint32_t val;
3870         if (!mDataSource->getUInt32(offset, &val)) {
3871             return ERROR_MALFORMED;
3872         }
3873         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3874             AMediaFormat_setInt32(mFileMetaData,
3875                     AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3876         }
3877     } else {
3878         // add more keys if needed
3879         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3880     }
3881 
3882     return OK;
3883 }
3884 
parseTrackHeader(off64_t data_offset,off64_t data_size)3885 status_t MPEG4Extractor::parseTrackHeader(
3886         off64_t data_offset, off64_t data_size) {
3887     if (data_size < 4) {
3888         return ERROR_MALFORMED;
3889     }
3890 
3891     uint8_t version;
3892     if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3893         return ERROR_IO;
3894     }
3895 
3896     size_t dynSize = (version == 1) ? 36 : 24;
3897 
3898     uint8_t buffer[36 + 60];
3899 
3900     if (data_size != (off64_t)dynSize + 60) {
3901         return ERROR_MALFORMED;
3902     }
3903 
3904     if (mDataSource->readAt(
3905                 data_offset, buffer, data_size) < (ssize_t)data_size) {
3906         return ERROR_IO;
3907     }
3908 
3909     int32_t id;
3910 
3911     if (version == 1) {
3912         // we can get ctime value from U64_AT(&buffer[4])
3913         // we can get mtime value from U64_AT(&buffer[12])
3914         id = U32_AT(&buffer[20]);
3915         // we can get duration value from U64_AT(&buffer[28])
3916     } else if (version == 0) {
3917         // we can get ctime value from U32_AT(&buffer[4])
3918         // we can get mtime value from U32_AT(&buffer[8])
3919         id = U32_AT(&buffer[12]);
3920         // we can get duration value from U32_AT(&buffer[20])
3921     } else {
3922         return ERROR_UNSUPPORTED;
3923     }
3924 
3925     if (mLastTrack == NULL)
3926         return ERROR_MALFORMED;
3927 
3928     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3929 
3930     size_t matrixOffset = dynSize + 16;
3931     int32_t a00 = U32_AT(&buffer[matrixOffset]);
3932     int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3933     int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3934     int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3935 
3936 #if 0
3937     int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3938     int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3939 
3940     ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3941          a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3942     ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3943          a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3944 #endif
3945 
3946     uint32_t rotationDegrees;
3947 
3948     static const int32_t kFixedOne = 0x10000;
3949     if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3950         // Identity, no rotation
3951         rotationDegrees = 0;
3952     } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3953         rotationDegrees = 90;
3954     } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3955         rotationDegrees = 270;
3956     } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3957         rotationDegrees = 180;
3958     } else {
3959         ALOGW("We only support 0,90,180,270 degree rotation matrices");
3960         rotationDegrees = 0;
3961     }
3962 
3963     if (rotationDegrees != 0) {
3964         AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3965     }
3966 
3967     // Handle presentation display size, which could be different
3968     // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3969     uint32_t width = U32_AT(&buffer[dynSize + 52]);
3970     uint32_t height = U32_AT(&buffer[dynSize + 56]);
3971     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3972     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3973 
3974     return OK;
3975 }
3976 
parseITunesMetaData(off64_t offset,size_t size)3977 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3978     if (size == 0) {
3979         return OK;
3980     }
3981 
3982     if (size < 4 || size == SIZE_MAX) {
3983         return ERROR_MALFORMED;
3984     }
3985 
3986     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3987     if (buffer == NULL) {
3988         return ERROR_MALFORMED;
3989     }
3990     if (mDataSource->readAt(
3991                 offset, buffer, size) != (ssize_t)size) {
3992         delete[] buffer;
3993         buffer = NULL;
3994 
3995         return ERROR_IO;
3996     }
3997 
3998     uint32_t flags = U32_AT(buffer);
3999 
4000     const char *metadataKey = nullptr;
4001     char chunk[5];
4002     MakeFourCCString(mPath[4], chunk);
4003     ALOGV("meta: %s @ %lld", chunk, (long long)offset);
4004     switch ((int32_t)mPath[4]) {
4005         case FOURCC("\251alb"):
4006         {
4007             metadataKey = AMEDIAFORMAT_KEY_ALBUM;
4008             break;
4009         }
4010         case FOURCC("\251ART"):
4011         {
4012             metadataKey = AMEDIAFORMAT_KEY_ARTIST;
4013             break;
4014         }
4015         case FOURCC("aART"):
4016         {
4017             metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
4018             break;
4019         }
4020         case FOURCC("\251day"):
4021         {
4022             metadataKey = AMEDIAFORMAT_KEY_YEAR;
4023             break;
4024         }
4025         case FOURCC("\251nam"):
4026         {
4027             metadataKey = AMEDIAFORMAT_KEY_TITLE;
4028             break;
4029         }
4030         case FOURCC("\251wrt"):
4031         {
4032             // various open source taggers agree that the "©wrt" tag is for composer, not writer
4033             metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4034             break;
4035         }
4036         case FOURCC("covr"):
4037         {
4038             metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4039             break;
4040         }
4041         case FOURCC("gnre"):
4042         case FOURCC("\251gen"):
4043         {
4044             metadataKey = AMEDIAFORMAT_KEY_GENRE;
4045             break;
4046         }
4047         case FOURCC("cpil"):
4048         {
4049             if (size == 9 && flags == 21) {
4050                 char tmp[16];
4051                 sprintf(tmp, "%d",
4052                         (int)buffer[size - 1]);
4053 
4054                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4055             }
4056             break;
4057         }
4058         case FOURCC("trkn"):
4059         {
4060             if (size == 16 && flags == 0) {
4061                 char tmp[16];
4062                 uint16_t* pTrack = (uint16_t*)&buffer[10];
4063                 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4064                 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4065 
4066                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4067             }
4068             break;
4069         }
4070         case FOURCC("disk"):
4071         {
4072             if ((size == 14 || size == 16) && flags == 0) {
4073                 char tmp[16];
4074                 uint16_t* pDisc = (uint16_t*)&buffer[10];
4075                 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4076                 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4077 
4078                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4079             }
4080             break;
4081         }
4082         case FOURCC("----"):
4083         {
4084             buffer[size] = '\0';
4085             switch (mPath[5]) {
4086                 case FOURCC("mean"):
4087                     mLastCommentMean = ((const char *)buffer + 4);
4088                     break;
4089                 case FOURCC("name"):
4090                     mLastCommentName = ((const char *)buffer + 4);
4091                     break;
4092                 case FOURCC("data"):
4093                     if (size < 8) {
4094                         delete[] buffer;
4095                         buffer = NULL;
4096                         ALOGE("b/24346430");
4097                         return ERROR_MALFORMED;
4098                     }
4099                     mLastCommentData = ((const char *)buffer + 8);
4100                     break;
4101             }
4102 
4103             // Once we have a set of mean/name/data info, go ahead and process
4104             // it to see if its something we are interested in.  Whether or not
4105             // were are interested in the specific tag, make sure to clear out
4106             // the set so we can be ready to process another tuple should one
4107             // show up later in the file.
4108             if ((mLastCommentMean.length() != 0) &&
4109                 (mLastCommentName.length() != 0) &&
4110                 (mLastCommentData.length() != 0)) {
4111 
4112                 if (mLastCommentMean == "com.apple.iTunes"
4113                         && mLastCommentName == "iTunSMPB") {
4114                     int32_t delay, padding;
4115                     if (sscanf(mLastCommentData,
4116                                " %*x %x %x %*x", &delay, &padding) == 2) {
4117                         if (mLastTrack == NULL) {
4118                             delete[] buffer;
4119                             return ERROR_MALFORMED;
4120                         }
4121 
4122                         AMediaFormat_setInt32(mLastTrack->meta,
4123                                 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4124                         AMediaFormat_setInt32(mLastTrack->meta,
4125                                 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4126                     }
4127                 }
4128 
4129                 mLastCommentMean.clear();
4130                 mLastCommentName.clear();
4131                 mLastCommentData.clear();
4132             }
4133             break;
4134         }
4135 
4136         default:
4137             break;
4138     }
4139 
4140     void *tmpData;
4141     size_t tmpDataSize;
4142     const char *s;
4143     if (size >= 8 && metadataKey &&
4144             !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4145             !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4146         if (!strcmp(metadataKey, "albumart")) {
4147             AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4148                     buffer + 8, size - 8);
4149         } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4150             if (flags == 0) {
4151                 // uint8_t genre code, iTunes genre codes are
4152                 // the standard id3 codes, except they start
4153                 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4154                 // We use standard id3 numbering, so subtract 1.
4155                 int genrecode = (int)buffer[size - 1];
4156                 genrecode--;
4157                 if (genrecode < 0) {
4158                     genrecode = 255; // reserved for 'unknown genre'
4159                 }
4160                 char genre[10];
4161                 sprintf(genre, "%d", genrecode);
4162 
4163                 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4164             } else if (flags == 1) {
4165                 // custom genre string
4166                 buffer[size] = '\0';
4167 
4168                 AMediaFormat_setString(mFileMetaData,
4169                         metadataKey, (const char *)buffer + 8);
4170             }
4171         } else {
4172             buffer[size] = '\0';
4173 
4174             AMediaFormat_setString(mFileMetaData,
4175                     metadataKey, (const char *)buffer + 8);
4176         }
4177     }
4178 
4179     delete[] buffer;
4180     buffer = NULL;
4181 
4182     return OK;
4183 }
4184 
parseColorInfo(off64_t offset,size_t size)4185 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4186     if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4187         return ERROR_MALFORMED;
4188     }
4189 
4190     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4191     if (buffer == NULL) {
4192         return ERROR_MALFORMED;
4193     }
4194     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4195         delete[] buffer;
4196         buffer = NULL;
4197 
4198         return ERROR_IO;
4199     }
4200 
4201     int32_t type = U32_AT(&buffer[0]);
4202     if ((type == FOURCC("nclx") && size >= 11)
4203             || (type == FOURCC("nclc") && size >= 10)) {
4204         // only store the first color specification
4205         int32_t existingColor;
4206         if (!AMediaFormat_getInt32(mLastTrack->meta,
4207                 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4208             int32_t primaries = U16_AT(&buffer[4]);
4209             int32_t isotransfer = U16_AT(&buffer[6]);
4210             int32_t coeffs = U16_AT(&buffer[8]);
4211             bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4212 
4213             int32_t range = 0;
4214             int32_t standard = 0;
4215             int32_t transfer = 0;
4216             ColorUtils::convertIsoColorAspectsToPlatformAspects(
4217                     primaries, isotransfer, coeffs, fullRange,
4218                     &range, &standard, &transfer);
4219 
4220             if (range != 0) {
4221                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4222             }
4223             if (standard != 0) {
4224                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4225             }
4226             if (transfer != 0) {
4227                 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4228             }
4229         }
4230     }
4231 
4232     delete[] buffer;
4233     buffer = NULL;
4234 
4235     return OK;
4236 }
4237 
parsePaspBox(off64_t offset,size_t size)4238 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4239     if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4240         return ERROR_MALFORMED;
4241     }
4242 
4243     uint32_t data[2]; // hSpacing, vSpacing
4244     if (mDataSource->readAt(offset, data, 8) < 8) {
4245         return ERROR_IO;
4246     }
4247     uint32_t hSpacing = ntohl(data[0]);
4248     uint32_t vSpacing = ntohl(data[1]);
4249 
4250     if (hSpacing != 0 && vSpacing != 0) {
4251         AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4252         AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4253     }
4254 
4255     return OK;
4256 }
4257 
parse3GPPMetaData(off64_t offset,size_t size,int depth)4258 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4259     if (size < 4 || size == SIZE_MAX) {
4260         return ERROR_MALFORMED;
4261     }
4262 
4263     uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4264     if (buffer == NULL) {
4265         return ERROR_MALFORMED;
4266     }
4267     if (mDataSource->readAt(
4268                 offset, buffer, size) != (ssize_t)size) {
4269         delete[] buffer;
4270         buffer = NULL;
4271 
4272         return ERROR_IO;
4273     }
4274 
4275     const char *metadataKey = nullptr;
4276     switch (mPath[depth]) {
4277         case FOURCC("titl"):
4278         {
4279             metadataKey = "title";
4280             break;
4281         }
4282         case FOURCC("perf"):
4283         {
4284             metadataKey = "artist";
4285             break;
4286         }
4287         case FOURCC("auth"):
4288         {
4289             metadataKey = "writer";
4290             break;
4291         }
4292         case FOURCC("gnre"):
4293         {
4294             metadataKey = "genre";
4295             break;
4296         }
4297         case FOURCC("albm"):
4298         {
4299             if (buffer[size - 1] != '\0') {
4300               char tmp[4];
4301               sprintf(tmp, "%u", buffer[size - 1]);
4302 
4303               AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4304             }
4305 
4306             metadataKey = "album";
4307             break;
4308         }
4309         case FOURCC("yrrc"):
4310         {
4311             if (size < 6) {
4312                 delete[] buffer;
4313                 buffer = NULL;
4314                 ALOGE("b/62133227");
4315                 android_errorWriteLog(0x534e4554, "62133227");
4316                 return ERROR_MALFORMED;
4317             }
4318             char tmp[5];
4319             uint16_t year = U16_AT(&buffer[4]);
4320 
4321             if (year < 10000) {
4322                 sprintf(tmp, "%u", year);
4323 
4324                 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4325             }
4326             break;
4327         }
4328 
4329         default:
4330             break;
4331     }
4332 
4333     if (metadataKey) {
4334         bool isUTF8 = true; // Common case
4335         char16_t *framedata = NULL;
4336         int len16 = 0; // Number of UTF-16 characters
4337 
4338         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4339         if (size < 6) {
4340             delete[] buffer;
4341             buffer = NULL;
4342             return ERROR_MALFORMED;
4343         }
4344 
4345         if (size - 6 >= 4) {
4346             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4347             framedata = (char16_t *)(buffer + 6);
4348             if (0xfffe == *framedata) {
4349                 // endianness marker (BOM) doesn't match host endianness
4350                 for (int i = 0; i < len16; i++) {
4351                     framedata[i] = bswap_16(framedata[i]);
4352                 }
4353                 // BOM is now swapped to 0xfeff, we will execute next block too
4354             }
4355 
4356             if (0xfeff == *framedata) {
4357                 // Remove the BOM
4358                 framedata++;
4359                 len16--;
4360                 isUTF8 = false;
4361             }
4362             // else normal non-zero-length UTF-8 string
4363             // we can't handle UTF-16 without BOM as there is no other
4364             // indication of encoding.
4365         }
4366 
4367         if (isUTF8) {
4368             buffer[size] = 0;
4369             AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4370         } else {
4371             // Convert from UTF-16 string to UTF-8 string.
4372             String8 tmpUTF8str(framedata, len16);
4373             AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.c_str());
4374         }
4375     }
4376 
4377     delete[] buffer;
4378     buffer = NULL;
4379 
4380     return OK;
4381 }
4382 
parseID3v2MetaData(off64_t offset,uint64_t size)4383 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4384     uint8_t *buffer = new (std::nothrow) uint8_t[size];
4385     if (buffer == NULL) {
4386         return;
4387     }
4388     if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4389         delete[] buffer;
4390         buffer = NULL;
4391         return;
4392     }
4393 
4394     ID3 id3(buffer, size, true /* ignorev1 */);
4395     delete[] buffer;
4396 
4397     if (id3.isValid()) {
4398         struct Map {
4399             const char *key;
4400             const char *tag1;
4401             const char *tag2;
4402         };
4403         static const Map kMap[] = {
4404             { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4405             { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4406             { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4407             { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4408             { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4409             { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4410             { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4411             { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4412             { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4413             { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4414             { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4415         };
4416         static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4417 
4418         for (size_t i = 0; i < kNumMapEntries; ++i) {
4419             const char *ss;
4420             if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4421                 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4422                 if (it->done()) {
4423                     delete it;
4424                     it = new ID3::Iterator(id3, kMap[i].tag2);
4425                 }
4426 
4427                 if (it->done()) {
4428                     delete it;
4429                     continue;
4430                 }
4431 
4432                 String8 s;
4433                 it->getString(&s);
4434                 delete it;
4435 
4436                 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4437             }
4438         }
4439 
4440         size_t dataSize;
4441         String8 mime;
4442         const void *data = id3.getAlbumArt(&dataSize, &mime);
4443 
4444         if (data) {
4445             AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4446         }
4447     }
4448 }
4449 
getTrack(size_t index)4450 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4451     status_t err;
4452     if ((err = readMetaData()) != OK) {
4453         return NULL;
4454     }
4455 
4456     Track *track = mFirstTrack;
4457     while (index > 0) {
4458         if (track == NULL) {
4459             return NULL;
4460         }
4461 
4462         track = track->next;
4463         --index;
4464     }
4465 
4466     if (track == NULL) {
4467         return NULL;
4468     }
4469 
4470 
4471     Trex *trex = NULL;
4472     int32_t trackId;
4473     if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4474         for (size_t i = 0; i < mTrex.size(); i++) {
4475             Trex *t = &mTrex.editItemAt(i);
4476             if (t->track_ID == (uint32_t) trackId) {
4477                 trex = t;
4478                 break;
4479             }
4480         }
4481     } else {
4482         ALOGE("b/21657957");
4483         return NULL;
4484     }
4485 
4486     ALOGV("getTrack called, pssh: %zu", mPssh.size());
4487 
4488     const char *mime;
4489     if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4490         return NULL;
4491     }
4492     sp<ItemTable> itemTable;
4493     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4494         void *data;
4495         size_t size;
4496         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4497             return NULL;
4498         }
4499 
4500         const uint8_t *ptr = (const uint8_t *)data;
4501 
4502         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
4503             return NULL;
4504         }
4505     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4506             || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4507         void *data;
4508         size_t size;
4509         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4510             return NULL;
4511         }
4512 
4513         const uint8_t *ptr = (const uint8_t *)data;
4514 
4515         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
4516             return NULL;
4517         }
4518         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4519             itemTable = mItemTable;
4520         }
4521     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4522         void *data;
4523         size_t size;
4524         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)
4525                 || size != 24) {
4526             return NULL;
4527         }
4528 
4529         const uint8_t *ptr = (const uint8_t *)data;
4530         // dv_major.dv_minor Should be 1.0 or 2.1
4531         if ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)) {
4532             return NULL;
4533         }
4534    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4535            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4536         void *data;
4537         size_t size;
4538         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4539             return NULL;
4540         }
4541 
4542         const uint8_t *ptr = (const uint8_t *)data;
4543 
4544         if (size < 4 || ptr[0] != 0x81) {  // configurationVersion == 1
4545             return NULL;
4546         }
4547         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4548             itemTable = mItemTable;
4549         }
4550     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4551         void *data;
4552         size_t size;
4553         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4554             return NULL;
4555         }
4556 
4557         const uint8_t *ptr = (const uint8_t *)data;
4558 
4559         if (size < 5 || ptr[0] != 0x01) {  // configurationVersion == 1
4560             return NULL;
4561         }
4562     }
4563 
4564     ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4565 
4566     uint64_t elst_initial_empty_edit_ticks = 0;
4567     if (mHeaderTimescale != 0) {
4568         // Convert empty_edit_ticks from movie timescale to media timescale.
4569         uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4570         if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4571                                    &elst_initial_empty_edit_ticks_mul) ||
4572             __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4573                                    &elst_initial_empty_edit_ticks_add)) {
4574             ALOGE("track->elst_initial_empty_edit_ticks overflow");
4575             return nullptr;
4576         }
4577         elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4578     }
4579     ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4580           elst_initial_empty_edit_ticks);
4581 
4582     MPEG4Source* source =
4583             new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4584                             mSidxEntries, trex, mMoofOffset, itemTable,
4585                             track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4586     if (source->init() != OK) {
4587         delete source;
4588         return NULL;
4589     }
4590     return source;
4591 }
4592 
4593 // static
verifyTrack(Track * track)4594 status_t MPEG4Extractor::verifyTrack(Track *track) {
4595     const char *mime;
4596     CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4597 
4598     void *data;
4599     size_t size;
4600     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4601         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4602             return ERROR_MALFORMED;
4603         }
4604     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4605         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4606             return ERROR_MALFORMED;
4607         }
4608     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4609         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4610             return ERROR_MALFORMED;
4611         }
4612     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4613         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4614             return ERROR_MALFORMED;
4615         }
4616     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4617         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4618             return ERROR_MALFORMED;
4619         }
4620     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4621             || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4622             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4623         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4624             return ERROR_MALFORMED;
4625         }
4626     }
4627 
4628     if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4629         // Make sure we have all the metadata we need.
4630         ALOGE("stbl atom missing/invalid.");
4631         return ERROR_MALFORMED;
4632     }
4633 
4634     if (track->timescale == 0) {
4635         ALOGE("timescale invalid.");
4636         return ERROR_MALFORMED;
4637     }
4638 
4639     return OK;
4640 }
4641 
4642 typedef enum {
4643     //AOT_NONE             = -1,
4644     //AOT_NULL_OBJECT      = 0,
4645     //AOT_AAC_MAIN         = 1, /**< Main profile                              */
4646     AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
4647     //AOT_AAC_SSR          = 3,
4648     //AOT_AAC_LTP          = 4,
4649     AOT_SBR              = 5,
4650     //AOT_AAC_SCAL         = 6,
4651     //AOT_TWIN_VQ          = 7,
4652     //AOT_CELP             = 8,
4653     //AOT_HVXC             = 9,
4654     //AOT_RSVD_10          = 10, /**< (reserved)                                */
4655     //AOT_RSVD_11          = 11, /**< (reserved)                                */
4656     //AOT_TTSI             = 12, /**< TTSI Object                               */
4657     //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
4658     //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
4659     //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
4660     //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4661     AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
4662     //AOT_RSVD_18          = 18, /**< (reserved)                                */
4663     //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
4664     AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
4665     //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
4666     AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
4667     AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
4668     //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
4669     //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
4670     //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
4671     //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
4672     //AOT_RSVD_28          = 28, /**< might become SSC                          */
4673     AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
4674     //AOT_MPEGS            = 30, /**< MPEG Surround                             */
4675 
4676     AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
4677 
4678     //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
4679     //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
4680     //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
4681     //AOT_RSVD_35          = 35, /**< might become DST                          */
4682     //AOT_RSVD_36          = 36, /**< might become ALS                          */
4683     //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
4684     //AOT_SLS              = 38, /**< SLS                                       */
4685     //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
4686 
4687     AOT_USAC               = 42, /**< USAC                                      */
4688     //AOT_SAOC             = 43, /**< SAOC                                      */
4689     //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
4690 
4691     //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
4692 } AUDIO_OBJECT_TYPE;
4693 
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4694 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4695         const void *esds_data, size_t esds_size) {
4696     ESDS esds(esds_data, esds_size);
4697 
4698     uint8_t objectTypeIndication;
4699     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4700         return ERROR_MALFORMED;
4701     }
4702 
4703     if (objectTypeIndication == 0xe1) {
4704         // This isn't MPEG4 audio at all, it's QCELP 14k...
4705         if (mLastTrack == NULL)
4706             return ERROR_MALFORMED;
4707 
4708         AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4709         return OK;
4710     }
4711 
4712     if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4713         // mp3 audio
4714         if (mLastTrack == NULL)
4715             return ERROR_MALFORMED;
4716 
4717         AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4718         return OK;
4719     }
4720 
4721     if (mLastTrack != NULL) {
4722         uint32_t maxBitrate = 0;
4723         uint32_t avgBitrate = 0;
4724         esds.getBitRate(&maxBitrate, &avgBitrate);
4725         if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4726             AMediaFormat_setInt32(mLastTrack->meta,
4727                     AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4728         }
4729         if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4730             AMediaFormat_setInt32(mLastTrack->meta,
4731                     AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4732         }
4733     }
4734 
4735     const uint8_t *csd;
4736     size_t csd_size;
4737     if (esds.getCodecSpecificInfo(
4738                 (const void **)&csd, &csd_size) != OK) {
4739         return ERROR_MALFORMED;
4740     }
4741 
4742     if (kUseHexDump) {
4743         printf("ESD of size %zu\n", csd_size);
4744         hexdump(csd, csd_size);
4745     }
4746 
4747     if (csd_size == 0) {
4748         // There's no further information, i.e. no codec specific data
4749         // Let's assume that the information provided in the mpeg4 headers
4750         // is accurate and hope for the best.
4751 
4752         return OK;
4753     }
4754 
4755     if (csd_size < 2) {
4756         return ERROR_MALFORMED;
4757     }
4758 
4759     if (objectTypeIndication == 0xdd) {
4760         // vorbis audio
4761         if (csd[0] != 0x02) {
4762             return ERROR_MALFORMED;
4763         }
4764 
4765         // codecInfo starts with two lengths, len1 and len2, that are
4766         // "Xiph-style-lacing encoded"..
4767 
4768         size_t offset = 1;
4769         size_t len1 = 0;
4770         while (offset < csd_size && csd[offset] == 0xff) {
4771             if (__builtin_add_overflow(len1, 0xff, &len1)) {
4772                 return ERROR_MALFORMED;
4773             }
4774             ++offset;
4775         }
4776         if (offset >= csd_size) {
4777             return ERROR_MALFORMED;
4778         }
4779         if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4780             return ERROR_MALFORMED;
4781         }
4782         ++offset;
4783         if (len1 == 0) {
4784             return ERROR_MALFORMED;
4785         }
4786 
4787         size_t len2 = 0;
4788         while (offset < csd_size && csd[offset] == 0xff) {
4789             if (__builtin_add_overflow(len2, 0xff, &len2)) {
4790                 return ERROR_MALFORMED;
4791             }
4792             ++offset;
4793         }
4794         if (offset >= csd_size) {
4795             return ERROR_MALFORMED;
4796         }
4797         if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4798             return ERROR_MALFORMED;
4799         }
4800         ++offset;
4801         if (len2 == 0) {
4802             return ERROR_MALFORMED;
4803         }
4804         if (offset + len1 > csd_size || csd[offset] != 0x01) {
4805             return ERROR_MALFORMED;
4806         }
4807 
4808         if (mLastTrack == NULL) {
4809             return ERROR_MALFORMED;
4810         }
4811         // formerly kKeyVorbisInfo
4812         AMediaFormat_setBuffer(mLastTrack->meta,
4813                 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4814 
4815         if (__builtin_add_overflow(offset, len1, &offset) ||
4816                 offset >= csd_size || csd[offset] != 0x03) {
4817             return ERROR_MALFORMED;
4818         }
4819 
4820         if (__builtin_add_overflow(offset, len2, &offset) ||
4821                 offset >= csd_size || csd[offset] != 0x05) {
4822             return ERROR_MALFORMED;
4823         }
4824 
4825         // formerly kKeyVorbisBooks
4826         AMediaFormat_setBuffer(mLastTrack->meta,
4827                 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4828         AMediaFormat_setString(mLastTrack->meta,
4829                 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4830 
4831         return OK;
4832     }
4833 
4834     static uint32_t kSamplingRate[] = {
4835         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4836         16000, 12000, 11025, 8000, 7350
4837     };
4838 
4839     ABitReader br(csd, csd_size);
4840     uint32_t objectType = br.getBits(5);
4841 
4842     if (objectType == AOT_ESCAPE) {  // AAC-ELD => additional 6 bits
4843         objectType = 32 + br.getBits(6);
4844     }
4845 
4846     if (mLastTrack == NULL)
4847         return ERROR_MALFORMED;
4848 
4849     //keep AOT type
4850     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4851 
4852     uint32_t freqIndex = br.getBits(4);
4853 
4854     int32_t sampleRate = 0;
4855     int32_t numChannels = 0;
4856     if (freqIndex == 15) {
4857         if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4858         sampleRate = br.getBits(24);
4859         numChannels = br.getBits(4);
4860     } else {
4861         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4862         numChannels = br.getBits(4);
4863 
4864         if (freqIndex == 13 || freqIndex == 14) {
4865             return ERROR_MALFORMED;
4866         }
4867 
4868         sampleRate = kSamplingRate[freqIndex];
4869     }
4870 
4871     if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4872         if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4873         uint32_t extFreqIndex = br.getBits(4);
4874         if (extFreqIndex == 15) {
4875             if (csd_size < 8) {
4876                 return ERROR_MALFORMED;
4877             }
4878             if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4879             br.skipBits(24); // extSampleRate
4880         } else {
4881             if (extFreqIndex == 13 || extFreqIndex == 14) {
4882                 return ERROR_MALFORMED;
4883             }
4884             //extSampleRate = kSamplingRate[extFreqIndex];
4885         }
4886         //TODO: save the extension sampling rate value in meta data =>
4887         //      AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4888     }
4889 
4890     switch (numChannels) {
4891         // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4892         case 0:
4893         case 1:// FC
4894         case 2:// FL FR
4895         case 3:// FC, FL FR
4896         case 4:// FC, FL FR, RC
4897         case 5:// FC, FL FR, SL SR
4898         case 6:// FC, FL FR, SL SR, LFE
4899             //numChannels already contains the right value
4900             break;
4901         case 11:// FC, FL FR, SL SR, RC, LFE
4902             numChannels = 7;
4903             break;
4904         case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4905         case 12:// FC, FL  FR,  SL SR, RL RR, LFE
4906         case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
4907             numChannels = 8;
4908             break;
4909         default:
4910             return ERROR_UNSUPPORTED;
4911     }
4912 
4913     {
4914         if (objectType == AOT_SBR || objectType == AOT_PS) {
4915             if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4916             objectType = br.getBits(5);
4917 
4918             if (objectType == AOT_ESCAPE) {
4919                 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4920                 objectType = 32 + br.getBits(6);
4921             }
4922         }
4923         if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4924                 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4925                 objectType == AOT_ER_BSAC) {
4926             if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4927             br.skipBits(1); // frameLengthFlag
4928 
4929             const int32_t dependsOnCoreCoder = br.getBits(1);
4930 
4931             if (dependsOnCoreCoder ) {
4932                 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4933                 br.skipBits(14); // coreCoderDelay
4934             }
4935 
4936             int32_t extensionFlag = -1;
4937             if (br.numBitsLeft() > 0) {
4938                 extensionFlag = br.getBits(1);
4939             } else {
4940                 switch (objectType) {
4941                 // 14496-3 4.5.1.1 extensionFlag
4942                 case AOT_AAC_LC:
4943                     extensionFlag = 0;
4944                     break;
4945                 case AOT_ER_AAC_LC:
4946                 case AOT_ER_AAC_SCAL:
4947                 case AOT_ER_BSAC:
4948                 case AOT_ER_AAC_LD:
4949                     extensionFlag = 1;
4950                     break;
4951                 default:
4952                     return ERROR_MALFORMED;
4953                     break;
4954                 }
4955                 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4956                         extensionFlag, objectType);
4957             }
4958 
4959             if (numChannels == 0) {
4960                 int32_t channelsEffectiveNum = 0;
4961                 int32_t channelsNum = 0;
4962                 if (br.numBitsLeft() < 32) {
4963                     return ERROR_MALFORMED;
4964                 }
4965                 br.skipBits(4); // ElementInstanceTag
4966                 br.skipBits(2); // Profile
4967                 br.skipBits(4); // SamplingFrequencyIndex
4968                 const int32_t NumFrontChannelElements = br.getBits(4);
4969                 const int32_t NumSideChannelElements = br.getBits(4);
4970                 const int32_t NumBackChannelElements = br.getBits(4);
4971                 const int32_t NumLfeChannelElements = br.getBits(2);
4972                 br.skipBits(3); // NumAssocDataElements
4973                 br.skipBits(4); // NumValidCcElements
4974 
4975                 const int32_t MonoMixdownPresent = br.getBits(1);
4976 
4977                 if (MonoMixdownPresent != 0) {
4978                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4979                     br.skipBits(4); // MonoMixdownElementNumber
4980                 }
4981 
4982                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4983                 const int32_t StereoMixdownPresent = br.getBits(1);
4984                 if (StereoMixdownPresent != 0) {
4985                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4986                     br.skipBits(4); // StereoMixdownElementNumber
4987                 }
4988 
4989                 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4990                 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4991                 if (MatrixMixdownIndexPresent != 0) {
4992                     if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4993                     br.skipBits(2); // MatrixMixdownIndex
4994                     br.skipBits(1); // PseudoSurroundEnable
4995                 }
4996 
4997                 int i;
4998                 for (i=0; i < NumFrontChannelElements; i++) {
4999                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5000                     const int32_t FrontElementIsCpe = br.getBits(1);
5001                     br.skipBits(4); // FrontElementTagSelect
5002                     channelsNum += FrontElementIsCpe ? 2 : 1;
5003                 }
5004 
5005                 for (i=0; i < NumSideChannelElements; i++) {
5006                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5007                     const int32_t SideElementIsCpe = br.getBits(1);
5008                     br.skipBits(4); // SideElementTagSelect
5009                     channelsNum += SideElementIsCpe ? 2 : 1;
5010                 }
5011 
5012                 for (i=0; i < NumBackChannelElements; i++) {
5013                     if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5014                     const int32_t BackElementIsCpe = br.getBits(1);
5015                     br.skipBits(4); // BackElementTagSelect
5016                     channelsNum += BackElementIsCpe ? 2 : 1;
5017                 }
5018                 channelsEffectiveNum = channelsNum;
5019 
5020                 for (i=0; i < NumLfeChannelElements; i++) {
5021                     if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5022                     br.skipBits(4); // LfeElementTagSelect
5023                     channelsNum += 1;
5024                 }
5025                 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
5026                 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
5027                 numChannels = channelsNum;
5028             }
5029         }
5030     }
5031 
5032     if (numChannels == 0) {
5033         return ERROR_UNSUPPORTED;
5034     }
5035 
5036     if (mLastTrack == NULL)
5037         return ERROR_MALFORMED;
5038 
5039     int32_t prevSampleRate;
5040     CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5041 
5042     if (prevSampleRate != sampleRate) {
5043         ALOGV("mpeg4 audio sample rate different from previous setting. "
5044              "was: %d, now: %d", prevSampleRate, sampleRate);
5045     }
5046 
5047     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5048 
5049     int32_t prevChannelCount;
5050     CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5051             AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5052 
5053     if (prevChannelCount != numChannels) {
5054         ALOGV("mpeg4 audio channel count different from previous setting. "
5055              "was: %d, now: %d", prevChannelCount, numChannels);
5056     }
5057 
5058     AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5059 
5060     return OK;
5061 }
5062 
adjustRawDefaultFrameSize()5063 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5064     int32_t chanCount = 0;
5065     int32_t bitWidth = 0;
5066     const char *mimeStr = NULL;
5067 
5068     if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5069         !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5070         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5071         AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5072         // samplesize in stsz may not right , so updade default samplesize
5073         mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5074     }
5075 }
5076 
5077 ////////////////////////////////////////////////////////////////////////////////
5078 
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5079 MPEG4Source::MPEG4Source(
5080         AMediaFormat *format,
5081         DataSourceHelper *dataSource,
5082         int32_t timeScale,
5083         const sp<SampleTable> &sampleTable,
5084         Vector<SidxEntry> &sidx,
5085         const Trex *trex,
5086         off64_t firstMoofOffset,
5087         const sp<ItemTable> &itemTable,
5088         uint64_t elstShiftStartTicks,
5089         uint64_t elstInitialEmptyEditTicks)
5090     : mFormat(format),
5091       mDataSource(dataSource),
5092       mTimescale(timeScale),
5093       mSampleTable(sampleTable),
5094       mCurrentSampleIndex(0),
5095       mCurrentFragmentIndex(0),
5096       mSegments(sidx),
5097       mTrex(trex),
5098       mFirstMoofOffset(firstMoofOffset),
5099       mCurrentMoofOffset(firstMoofOffset),
5100       mCurrentMoofSize(0),
5101       mNextMoofOffset(-1),
5102       mCurrentTime(0),
5103       mDefaultEncryptedByteBlock(0),
5104       mDefaultSkipByteBlock(0),
5105       mCurrentSampleInfoAllocSize(0),
5106       mCurrentSampleInfoSizes(NULL),
5107       mCurrentSampleInfoOffsetsAllocSize(0),
5108       mCurrentSampleInfoOffsets(NULL),
5109       mIsAVC(false),
5110       mIsHEVC(false),
5111       mIsDolbyVision(false),
5112       mIsAC4(false),
5113       mIsPcm(false),
5114       mNALLengthSize(0),
5115       mStarted(false),
5116       mBuffer(NULL),
5117       mSrcBufferSize(0),
5118       mSrcBuffer(NULL),
5119       mItemTable(itemTable),
5120       mElstShiftStartTicks(elstShiftStartTicks),
5121       mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5122 
5123     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5124 
5125     AMediaFormat_getInt32(mFormat,
5126             AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5127     mDefaultIVSize = 0;
5128     AMediaFormat_getInt32(mFormat,
5129             AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5130     void *key;
5131     size_t keysize;
5132     if (AMediaFormat_getBuffer(mFormat,
5133             AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5134         CHECK(keysize <= 16);
5135         memset(mCryptoKey, 0, 16);
5136         memcpy(mCryptoKey, key, keysize);
5137     }
5138 
5139     AMediaFormat_getInt32(mFormat,
5140             AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5141     AMediaFormat_getInt32(mFormat,
5142             AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5143 
5144     const char *mime;
5145     bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5146     CHECK(success);
5147 
5148     mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5149                !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5150     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5151     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5152               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5153     mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5154     mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5155     mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5156     mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5157 
5158     if (mIsAVC) {
5159         void *data;
5160         size_t size;
5161         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5162 
5163         mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5164     } else if (mIsHEVC) {
5165         void *data;
5166         size_t size;
5167         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5168 
5169         mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5170     } else if (mIsDolbyVision) {
5171         ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5172         void *data;
5173         size_t size;
5174         CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5175 
5176         const uint8_t *ptr = (const uint8_t *)data;
5177 
5178         CHECK(size == 24);
5179 
5180         // dv_major.dv_minor Should be 1.0 or 2.1
5181         CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5182 
5183         const uint8_t profile = ptr[2] >> 1;
5184         // profile == (4,5,6,7,8) --> HEVC; profile == (9) --> AVC; profile == (10) --> AV1
5185         if (profile > 3 && profile < 9) {
5186             CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5187 
5188             mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5189         } else if (9 == profile) {
5190             CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5191 
5192             mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5193         } else if (10 == profile) {
5194             /* AV1 profile nothing to do */
5195         } else {
5196             if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
5197                 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5198             } else if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
5199                 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5200             } else {
5201                 LOG_ALWAYS_FATAL("Invalid Dolby Vision profile = %d", profile);
5202             }
5203         }
5204     }
5205 
5206     mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5207     mIsAudio = !strncasecmp(mime, "audio/", 6);
5208 
5209     int32_t aacObjectType = -1;
5210 
5211     if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5212         mIsUsac = (aacObjectType == AOT_USAC);
5213     }
5214 
5215     if (mIsPcm) {
5216         int32_t numChannels = 0;
5217         int32_t bitsPerSample = 0;
5218         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5219         CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5220 
5221         int32_t bytesPerSample = bitsPerSample >> 3;
5222         int32_t pcmSampleSize = bytesPerSample * numChannels;
5223 
5224         size_t maxSampleSize;
5225         status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5226         if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5227                || bitsPerSample != 16) {
5228             // Not supported
5229             mIsPcm = false;
5230         } else {
5231             AMediaFormat_setInt32(mFormat,
5232                     AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5233         }
5234     }
5235 
5236     CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5237 }
5238 
init()5239 status_t MPEG4Source::init() {
5240     if (mFirstMoofOffset != 0) {
5241         off64_t offset = mFirstMoofOffset;
5242         return parseChunk(&offset);
5243     }
5244     return OK;
5245 }
5246 
~MPEG4Source()5247 MPEG4Source::~MPEG4Source() {
5248     if (mStarted) {
5249         stop();
5250     }
5251     free(mCurrentSampleInfoSizes);
5252     free(mCurrentSampleInfoOffsets);
5253 }
5254 
start()5255 media_status_t MPEG4Source::start() {
5256     Mutex::Autolock autoLock(mLock);
5257 
5258     CHECK(!mStarted);
5259 
5260     int32_t tmp;
5261     CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5262     size_t max_size = tmp;
5263 
5264     // A somewhat arbitrary limit that should be sufficient for 8k video frames
5265     // If you see the message below for a valid input stream: increase the limit
5266     const size_t kMaxBufferSize = 64 * 1024 * 1024;
5267     if (max_size > kMaxBufferSize) {
5268         ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5269         return AMEDIA_ERROR_MALFORMED;
5270     }
5271     if (max_size == 0) {
5272         ALOGE("zero max input size");
5273         return AMEDIA_ERROR_MALFORMED;
5274     }
5275 
5276     // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5277     const size_t kInitialBuffers = 2;
5278     const size_t kMaxBuffers = 8;
5279     const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5280     mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5281     mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5282     if (mSrcBuffer == NULL) {
5283         // file probably specified a bad max size
5284         return AMEDIA_ERROR_MALFORMED;
5285     }
5286     mSrcBufferSize = max_size;
5287 
5288     mStarted = true;
5289 
5290     return AMEDIA_OK;
5291 }
5292 
stop()5293 media_status_t MPEG4Source::stop() {
5294     Mutex::Autolock autoLock(mLock);
5295 
5296     CHECK(mStarted);
5297 
5298     if (mBuffer != NULL) {
5299         mBuffer->release();
5300         mBuffer = NULL;
5301     }
5302 
5303     mSrcBufferSize = 0;
5304     delete[] mSrcBuffer;
5305     mSrcBuffer = NULL;
5306 
5307     mStarted = false;
5308     mCurrentSampleIndex = 0;
5309 
5310     return AMEDIA_OK;
5311 }
5312 
parseChunk(off64_t * offset)5313 status_t MPEG4Source::parseChunk(off64_t *offset) {
5314     uint32_t hdr[2];
5315     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5316         return ERROR_IO;
5317     }
5318     uint64_t chunk_size = ntohl(hdr[0]);
5319     uint32_t chunk_type = ntohl(hdr[1]);
5320     off64_t data_offset = *offset + 8;
5321 
5322     if (chunk_size == 1) {
5323         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5324             return ERROR_IO;
5325         }
5326         chunk_size = ntoh64(chunk_size);
5327         data_offset += 8;
5328 
5329         if (chunk_size < 16) {
5330             // The smallest valid chunk is 16 bytes long in this case.
5331             return ERROR_MALFORMED;
5332         }
5333     } else if (chunk_size < 8) {
5334         // The smallest valid chunk is 8 bytes long.
5335         return ERROR_MALFORMED;
5336     }
5337 
5338     char chunk[5];
5339     MakeFourCCString(chunk_type, chunk);
5340     ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5341 
5342     off64_t chunk_data_size = *offset + chunk_size - data_offset;
5343 
5344     switch(chunk_type) {
5345 
5346         case FOURCC("traf"):
5347         case FOURCC("moof"): {
5348             off64_t stop_offset = *offset + chunk_size;
5349             *offset = data_offset;
5350             if (chunk_type == FOURCC("moof")) {
5351                 mCurrentMoofSize = chunk_data_size;
5352             }
5353             while (*offset < stop_offset) {
5354                 status_t err = parseChunk(offset);
5355                 if (err != OK) {
5356                     return err;
5357                 }
5358             }
5359             if (chunk_type == FOURCC("moof")) {
5360                 // *offset points to the box following this moof. Find the next moof from there.
5361 
5362                 while (true) {
5363                     if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5364                         // no more box to the end of file.
5365                         break;
5366                     }
5367                     chunk_size = ntohl(hdr[0]);
5368                     chunk_type = ntohl(hdr[1]);
5369                     if (chunk_size == 1) {
5370                         // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5371                         // which is defined in 4.2 Object Structure.
5372                         // When chunk_size==1, 8 bytes follows as "largesize".
5373                         if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5374                             return ERROR_IO;
5375                         }
5376                         chunk_size = ntoh64(chunk_size);
5377                         if (chunk_size < 16) {
5378                             // The smallest valid chunk is 16 bytes long in this case.
5379                             return ERROR_MALFORMED;
5380                         }
5381                     } else if (chunk_size == 0) {
5382                         // next box extends to end of file.
5383                     } else if (chunk_size < 8) {
5384                         // The smallest valid chunk is 8 bytes long in this case.
5385                         return ERROR_MALFORMED;
5386                     }
5387 
5388                     if (chunk_type == FOURCC("moof")) {
5389                         mNextMoofOffset = *offset;
5390                         break;
5391                     } else if (chunk_type == FOURCC("mdat")) {
5392                         parseChunk(offset);
5393                         continue;
5394                     } else if (chunk_size == 0) {
5395                         break;
5396                     }
5397                     *offset += chunk_size;
5398                 }
5399             }
5400             break;
5401         }
5402 
5403         case FOURCC("tfhd"): {
5404                 status_t err;
5405                 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5406                     return err;
5407                 }
5408                 *offset += chunk_size;
5409                 break;
5410         }
5411 
5412         case FOURCC("trun"): {
5413                 status_t err;
5414                 if (mLastParsedTrackId == mTrackId) {
5415                     if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5416                         return err;
5417                     }
5418                 }
5419 
5420                 *offset += chunk_size;
5421                 break;
5422         }
5423 
5424         case FOURCC("saiz"): {
5425             status_t err;
5426             if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5427                 return err;
5428             }
5429             *offset += chunk_size;
5430             break;
5431         }
5432         case FOURCC("saio"): {
5433             status_t err;
5434             if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5435                     != OK) {
5436                 return err;
5437             }
5438             *offset += chunk_size;
5439             break;
5440         }
5441 
5442         case FOURCC("senc"): {
5443             status_t err;
5444             if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5445                 return err;
5446             }
5447             *offset += chunk_size;
5448             break;
5449         }
5450 
5451         case FOURCC("mdat"): {
5452             // parse DRM info if present
5453             ALOGV("MPEG4Source::parseChunk mdat");
5454             // if saiz/saoi was previously observed, do something with the sampleinfos
5455             status_t err = OK;
5456             auto kv = mDrmOffsets.lower_bound(*offset);
5457             if (kv != mDrmOffsets.end()) {
5458                 auto drmoffset = kv->first;
5459                 auto flags = kv->second;
5460                 mDrmOffsets.erase(kv);
5461                 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5462                         chunk_size, drmoffset, *offset);
5463                 if (chunk_size >= drmoffset - *offset) {
5464                     err = parseClearEncryptedSizes(drmoffset, false, flags,
5465                         chunk_size - (drmoffset - *offset));
5466                 }
5467             }
5468             if (err != OK) {
5469                 return err;
5470             }
5471             *offset += chunk_size;
5472             break;
5473         }
5474 
5475         default: {
5476             *offset += chunk_size;
5477             break;
5478         }
5479     }
5480     return OK;
5481 }
5482 
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5483 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5484         off64_t offset, off64_t size) {
5485     ALOGV("parseSampleAuxiliaryInformationSizes");
5486     if (size < 9) {
5487         return -EINVAL;
5488     }
5489     // 14496-12 8.7.12
5490     uint8_t version;
5491     if (mDataSource->readAt(
5492             offset, &version, sizeof(version))
5493             < (ssize_t)sizeof(version)) {
5494         return ERROR_IO;
5495     }
5496 
5497     if (version != 0) {
5498         return ERROR_UNSUPPORTED;
5499     }
5500     offset++;
5501     size--;
5502 
5503     uint32_t flags;
5504     if (!mDataSource->getUInt24(offset, &flags)) {
5505         return ERROR_IO;
5506     }
5507     offset += 3;
5508     size -= 3;
5509 
5510     if (flags & 1) {
5511         if (size < 13) {
5512             return -EINVAL;
5513         }
5514         uint32_t tmp;
5515         if (!mDataSource->getUInt32(offset, &tmp)) {
5516             return ERROR_MALFORMED;
5517         }
5518         mCurrentAuxInfoType = tmp;
5519         offset += 4;
5520         size -= 4;
5521         if (!mDataSource->getUInt32(offset, &tmp)) {
5522             return ERROR_MALFORMED;
5523         }
5524         mCurrentAuxInfoTypeParameter = tmp;
5525         offset += 4;
5526         size -= 4;
5527     }
5528 
5529     uint8_t defsize;
5530     if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5531         return ERROR_MALFORMED;
5532     }
5533     mCurrentDefaultSampleInfoSize = defsize;
5534     offset++;
5535     size--;
5536 
5537     uint32_t smplcnt;
5538     if (!mDataSource->getUInt32(offset, &smplcnt)) {
5539         return ERROR_MALFORMED;
5540     }
5541     mCurrentSampleInfoCount = smplcnt;
5542     offset += 4;
5543     size -= 4;
5544     if (mCurrentDefaultSampleInfoSize != 0) {
5545         ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5546         return OK;
5547     }
5548     if(smplcnt > size) {
5549         ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5550         android_errorWriteLog(0x534e4554, "124525515");
5551         return -EINVAL;
5552     }
5553     if (smplcnt > mCurrentSampleInfoAllocSize) {
5554         uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5555         if (newPtr == NULL) {
5556             ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5557             return NO_MEMORY;
5558         }
5559         mCurrentSampleInfoSizes = newPtr;
5560         mCurrentSampleInfoAllocSize = smplcnt;
5561     }
5562 
5563     mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5564     return OK;
5565 }
5566 
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5567 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5568         off64_t offset, off64_t size) {
5569     ALOGV("parseSampleAuxiliaryInformationOffsets");
5570     if (size < 8) {
5571         return -EINVAL;
5572     }
5573     // 14496-12 8.7.13
5574     uint8_t version;
5575     if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5576         return ERROR_IO;
5577     }
5578     offset++;
5579     size--;
5580 
5581     uint32_t flags;
5582     if (!mDataSource->getUInt24(offset, &flags)) {
5583         return ERROR_IO;
5584     }
5585     offset += 3;
5586     size -= 3;
5587 
5588     uint32_t entrycount;
5589     if (!mDataSource->getUInt32(offset, &entrycount)) {
5590         return ERROR_IO;
5591     }
5592     offset += 4;
5593     size -= 4;
5594     if (entrycount == 0) {
5595         return OK;
5596     }
5597     if (entrycount > UINT32_MAX / 8) {
5598         return ERROR_MALFORMED;
5599     }
5600 
5601     if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5602         uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5603         if (newPtr == NULL) {
5604             ALOGE("failed to realloc %u -> %u",
5605                     mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5606             return NO_MEMORY;
5607         }
5608         mCurrentSampleInfoOffsets = newPtr;
5609         mCurrentSampleInfoOffsetsAllocSize = entrycount;
5610     }
5611     mCurrentSampleInfoOffsetCount = entrycount;
5612 
5613     if (mCurrentSampleInfoOffsets == NULL) {
5614         return OK;
5615     }
5616 
5617     for (size_t i = 0; i < entrycount; i++) {
5618         if (version == 0) {
5619             if (size < 4) {
5620                 ALOGW("b/124526959");
5621                 android_errorWriteLog(0x534e4554, "124526959");
5622                 return -EINVAL;
5623             }
5624             uint32_t tmp;
5625             if (!mDataSource->getUInt32(offset, &tmp)) {
5626                 return ERROR_IO;
5627             }
5628             mCurrentSampleInfoOffsets[i] = tmp;
5629             offset += 4;
5630             size -= 4;
5631         } else {
5632             if (size < 8) {
5633                 ALOGW("b/124526959");
5634                 android_errorWriteLog(0x534e4554, "124526959");
5635                 return -EINVAL;
5636             }
5637             uint64_t tmp;
5638             if (!mDataSource->getUInt64(offset, &tmp)) {
5639                 return ERROR_IO;
5640             }
5641             mCurrentSampleInfoOffsets[i] = tmp;
5642             offset += 8;
5643             size -= 8;
5644         }
5645     }
5646 
5647     // parse clear/encrypted data
5648 
5649     off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5650 
5651     drmoffset += mCurrentMoofOffset;
5652     mDrmOffsets[drmoffset] = flags;
5653     ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5654 
5655     return OK;
5656 }
5657 
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5658 status_t MPEG4Source::parseClearEncryptedSizes(
5659         off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5660 
5661     int32_t ivlength;
5662     if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5663         return ERROR_MALFORMED;
5664     }
5665 
5666     // only 0, 8 and 16 byte initialization vectors are supported
5667     if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5668         ALOGW("unsupported IV length: %d", ivlength);
5669         return ERROR_MALFORMED;
5670     }
5671 
5672     uint32_t sampleCount = mCurrentSampleInfoCount;
5673     if (isSampleEncryption) {
5674         if (size < 4) {
5675             return ERROR_MALFORMED;
5676         }
5677         if (!mDataSource->getUInt32(offset, &sampleCount)) {
5678             return ERROR_IO;
5679         }
5680         offset += 4;
5681         size -= 4;
5682     }
5683 
5684     // read CencSampleAuxiliaryDataFormats
5685     for (size_t i = 0; i < sampleCount; i++) {
5686         if (i >= mCurrentSamples.size()) {
5687             ALOGW("too few samples");
5688             break;
5689         }
5690         Sample *smpl = &mCurrentSamples.editItemAt(i);
5691         if (!smpl->clearsizes.isEmpty()) {
5692             continue;
5693         }
5694 
5695         memset(smpl->iv, 0, 16);
5696         if (size < ivlength) {
5697             return ERROR_MALFORMED;
5698         }
5699         if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5700             return ERROR_IO;
5701         }
5702 
5703         offset += ivlength;
5704         size -= ivlength;
5705 
5706         bool readSubsamples;
5707         if (isSampleEncryption) {
5708             readSubsamples = flags & 2;
5709         } else {
5710             int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5711             if (smplinfosize == 0) {
5712                 smplinfosize = mCurrentSampleInfoSizes[i];
5713             }
5714             readSubsamples = smplinfosize > ivlength;
5715         }
5716 
5717         if (readSubsamples) {
5718             uint16_t numsubsamples;
5719             if (size < 2) {
5720                 return ERROR_MALFORMED;
5721             }
5722             if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5723                 return ERROR_IO;
5724             }
5725             offset += 2;
5726             size -= 2;
5727             for (size_t j = 0; j < numsubsamples; j++) {
5728                 uint16_t numclear;
5729                 uint32_t numencrypted;
5730                 if (size < 6) {
5731                     return ERROR_MALFORMED;
5732                 }
5733                 if (!mDataSource->getUInt16(offset, &numclear)) {
5734                     return ERROR_IO;
5735                 }
5736                 offset += 2;
5737                 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5738                     return ERROR_IO;
5739                 }
5740                 offset += 4;
5741                 size -= 6;
5742                 smpl->clearsizes.add(numclear);
5743                 smpl->encryptedsizes.add(numencrypted);
5744             }
5745         } else {
5746             smpl->clearsizes.add(0);
5747             smpl->encryptedsizes.add(smpl->size);
5748         }
5749     }
5750 
5751     return OK;
5752 }
5753 
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5754 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5755     uint32_t flags;
5756     if (chunk_data_size < 4) {
5757         return ERROR_MALFORMED;
5758     }
5759     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5760         return ERROR_MALFORMED;
5761     }
5762     return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5763 }
5764 
parseTrackFragmentHeader(off64_t offset,off64_t size)5765 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5766 
5767     if (size < 8) {
5768         return -EINVAL;
5769     }
5770 
5771     uint32_t flags;
5772     if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5773         return ERROR_MALFORMED;
5774     }
5775 
5776     if (flags & 0xff000000) {
5777         return -EINVAL;
5778     }
5779 
5780     if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5781         return ERROR_MALFORMED;
5782     }
5783 
5784     if (mLastParsedTrackId != mTrackId) {
5785         // this is not the right track, skip it
5786         return OK;
5787     }
5788 
5789     mTrackFragmentHeaderInfo.mFlags = flags;
5790     mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5791     offset += 8;
5792     size -= 8;
5793 
5794     ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5795 
5796     if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5797         if (size < 8) {
5798             return -EINVAL;
5799         }
5800 
5801         if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5802             return ERROR_MALFORMED;
5803         }
5804         offset += 8;
5805         size -= 8;
5806     }
5807 
5808     if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5809         if (size < 4) {
5810             return -EINVAL;
5811         }
5812 
5813         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5814             return ERROR_MALFORMED;
5815         }
5816         offset += 4;
5817         size -= 4;
5818     }
5819 
5820     if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5821         if (size < 4) {
5822             return -EINVAL;
5823         }
5824 
5825         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5826             return ERROR_MALFORMED;
5827         }
5828         offset += 4;
5829         size -= 4;
5830     }
5831 
5832     if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5833         if (size < 4) {
5834             return -EINVAL;
5835         }
5836 
5837         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5838             return ERROR_MALFORMED;
5839         }
5840         offset += 4;
5841         size -= 4;
5842     }
5843 
5844     if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5845         if (size < 4) {
5846             return -EINVAL;
5847         }
5848 
5849         if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5850             return ERROR_MALFORMED;
5851         }
5852         offset += 4;
5853         size -= 4;
5854     }
5855 
5856     if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5857         mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5858     }
5859 
5860     mTrackFragmentHeaderInfo.mDataOffset = 0;
5861     return OK;
5862 }
5863 
parseTrackFragmentRun(off64_t offset,off64_t size)5864 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5865 
5866     ALOGV("MPEG4Source::parseTrackFragmentRun");
5867     if (size < 8) {
5868         return -EINVAL;
5869     }
5870 
5871     enum {
5872         kDataOffsetPresent                  = 0x01,
5873         kFirstSampleFlagsPresent            = 0x04,
5874         kSampleDurationPresent              = 0x100,
5875         kSampleSizePresent                  = 0x200,
5876         kSampleFlagsPresent                 = 0x400,
5877         kSampleCompositionTimeOffsetPresent = 0x800,
5878     };
5879 
5880     uint32_t flags;
5881     if (!mDataSource->getUInt32(offset, &flags)) {
5882         return ERROR_MALFORMED;
5883     }
5884     // |version| only affects SampleCompositionTimeOffset field.
5885     // If version == 0, SampleCompositionTimeOffset is uint32_t;
5886     // Otherwise, SampleCompositionTimeOffset is int32_t.
5887     // Sample.compositionOffset is defined as int32_t.
5888     uint8_t version = flags >> 24;
5889     flags &= 0xffffff;
5890     ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5891 
5892     if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5893         // These two shall not be used together.
5894         return -EINVAL;
5895     }
5896 
5897     uint32_t sampleCount;
5898     if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5899         return ERROR_MALFORMED;
5900     }
5901     offset += 8;
5902     size -= 8;
5903 
5904     uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5905 
5906     uint32_t firstSampleFlags = 0;
5907 
5908     if (flags & kDataOffsetPresent) {
5909         if (size < 4) {
5910             return -EINVAL;
5911         }
5912 
5913         uint32_t dataOffsetDelta;
5914         if (!mDataSource->getUInt32(offset, &dataOffsetDelta)) {
5915             return ERROR_MALFORMED;
5916         }
5917 
5918         if (__builtin_add_overflow(
5919                 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta, &dataOffset)) {
5920             ALOGW("b/232242894 mBaseDataOffset(%" PRIu64 ") + dataOffsetDelta(%u) overflows uint64",
5921                     mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta);
5922             android_errorWriteLog(0x534e4554, "232242894");
5923             return ERROR_MALFORMED;
5924         }
5925 
5926         offset += 4;
5927         size -= 4;
5928     }
5929 
5930     if (flags & kFirstSampleFlagsPresent) {
5931         if (size < 4) {
5932             return -EINVAL;
5933         }
5934 
5935         if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5936             return ERROR_MALFORMED;
5937         }
5938         offset += 4;
5939         size -= 4;
5940     }
5941 
5942     uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5943              sampleCtsOffset = 0;
5944 
5945     size_t bytesPerSample = 0;
5946     if (flags & kSampleDurationPresent) {
5947         bytesPerSample += 4;
5948     } else if (mTrackFragmentHeaderInfo.mFlags
5949             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5950         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5951     } else if (mTrex) {
5952         sampleDuration = mTrex->default_sample_duration;
5953     }
5954 
5955     if (flags & kSampleSizePresent) {
5956         bytesPerSample += 4;
5957     } else {
5958         sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5959 #ifdef VERY_VERY_VERBOSE_LOGGING
5960         // We don't expect this, but also want to avoid spamming the log if
5961         // we hit this case.
5962         if (!(mTrackFragmentHeaderInfo.mFlags
5963               & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5964             ALOGW("No sample size specified");
5965         }
5966 #endif
5967     }
5968 
5969     if (flags & kSampleFlagsPresent) {
5970         bytesPerSample += 4;
5971     } else {
5972         sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5973 #ifdef VERY_VERY_VERBOSE_LOGGING
5974         // We don't expect this, but also want to avoid spamming the log if
5975         // we hit this case.
5976         if (!(mTrackFragmentHeaderInfo.mFlags
5977               & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5978             ALOGW("No sample flags specified");
5979         }
5980 #endif
5981     }
5982 
5983     if (flags & kSampleCompositionTimeOffsetPresent) {
5984         bytesPerSample += 4;
5985     } else {
5986         sampleCtsOffset = 0;
5987     }
5988 
5989     if (bytesPerSample != 0) {
5990         if (size < (off64_t)sampleCount * bytesPerSample) {
5991             return -EINVAL;
5992         }
5993     } else {
5994         if (sampleDuration == 0) {
5995             ALOGW("b/123389881 sampleDuration == 0");
5996             android_errorWriteLog(0x534e4554, "124389881 zero");
5997             return -EINVAL;
5998         }
5999 
6000         // apply some quick (vs strict legality) checks
6001         //
6002         static constexpr uint32_t kMaxTrunSampleCount = 10000;
6003         if (sampleCount > kMaxTrunSampleCount) {
6004             ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
6005                   sampleCount, kMaxTrunSampleCount);
6006             android_errorWriteLog(0x534e4554, "124389881 count");
6007             return -EINVAL;
6008         }
6009     }
6010 
6011     Sample tmp;
6012     for (uint32_t i = 0; i < sampleCount; ++i) {
6013         if (flags & kSampleDurationPresent) {
6014             if (!mDataSource->getUInt32(offset, &sampleDuration)) {
6015                 return ERROR_MALFORMED;
6016             }
6017             offset += 4;
6018         }
6019 
6020         if (flags & kSampleSizePresent) {
6021             if (!mDataSource->getUInt32(offset, &sampleSize)) {
6022                 return ERROR_MALFORMED;
6023             }
6024             offset += 4;
6025         }
6026 
6027         if (flags & kSampleFlagsPresent) {
6028             if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6029                 return ERROR_MALFORMED;
6030             }
6031             offset += 4;
6032         }
6033 
6034         if (flags & kSampleCompositionTimeOffsetPresent) {
6035             if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6036                 return ERROR_MALFORMED;
6037             }
6038             offset += 4;
6039         }
6040 
6041         ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6042               " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6043                 dataOffset, sampleSize, sampleDuration,
6044                 (flags & kFirstSampleFlagsPresent) && i == 0
6045                     ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6046         tmp.offset = dataOffset;
6047         tmp.size = sampleSize;
6048         tmp.duration = sampleDuration;
6049         tmp.compositionOffset = sampleCtsOffset;
6050         memset(tmp.iv, 0, sizeof(tmp.iv));
6051         if (mCurrentSamples.add(tmp) < 0) {
6052             ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6053             android_errorWriteLog(0x534e4554, "124389881 allocation");
6054             mCurrentSamples.clear();
6055             return NO_MEMORY;
6056         }
6057 
6058         if (__builtin_add_overflow(dataOffset, sampleSize, &dataOffset)) {
6059             ALOGW("b/232242894 dataOffset(%" PRIu64 ") + sampleSize(%u) overflows uint64",
6060                     dataOffset, sampleSize);
6061             android_errorWriteLog(0x534e4554, "232242894");
6062             return ERROR_MALFORMED;
6063         }
6064     }
6065 
6066     mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6067 
6068     return OK;
6069 }
6070 
getFormat(AMediaFormat * meta)6071 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6072     Mutex::Autolock autoLock(mLock);
6073     AMediaFormat_copy(meta, mFormat);
6074     return AMEDIA_OK;
6075 }
6076 
parseNALSize(const uint8_t * data) const6077 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6078     switch (mNALLengthSize) {
6079         case 1:
6080             return *data;
6081         case 2:
6082             return U16_AT(data);
6083         case 3:
6084             return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6085         case 4:
6086             return U32_AT(data);
6087     }
6088 
6089     // This cannot happen, mNALLengthSize springs to life by adding 1 to
6090     // a 2-bit integer.
6091     CHECK(!"Should not be here.");
6092 
6093     return 0;
6094 }
6095 
parseHEVCLayerId(const uint8_t * data,size_t size)6096 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6097     if (data == nullptr || size < mNALLengthSize + 2) {
6098         return -1;
6099     }
6100 
6101     // HEVC NAL-header (16-bit)
6102     //  1   6      6     3
6103     // |-|uuuuuu|------|iii|
6104     //      ^            ^
6105     //  NAL_type        layer_id + 1
6106     //
6107     // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6108     enum {
6109         TSA_N = 2,
6110         TSA_R = 3,
6111         STSA_N = 4,
6112         STSA_R = 5,
6113     };
6114 
6115     data += mNALLengthSize;
6116     uint16_t nalHeader = data[0] << 8 | data[1];
6117 
6118     uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6119     if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6120         int32_t layerIdPlusOne = nalHeader & 0x7u;
6121         ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6122         return layerIdPlusOne - 1;
6123     }
6124     return 0;
6125 }
6126 
getNALLengthSizeFromAvcCsd(const uint8_t * data,const size_t size) const6127 size_t MPEG4Source::getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const {
6128     CHECK(data != nullptr);
6129     CHECK(size >= 7);
6130     CHECK_EQ((unsigned)data[0], 1u);  // configurationVersion == 1
6131 
6132     // The number of bytes used to encode the length of a NAL unit.
6133     return 1 + (data[4] & 3);
6134 }
6135 
getNALLengthSizeFromHevcCsd(const uint8_t * data,const size_t size) const6136 size_t MPEG4Source::getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const {
6137     CHECK(data != nullptr);
6138     CHECK(size >= 22);
6139     CHECK_EQ((unsigned)data[0], 1u);  // configurationVersion == 1
6140 
6141     // The number of bytes used to encode the length of a NAL unit.
6142     return 1 + (data[14 + 7] & 3);
6143 }
6144 
read(MediaBufferHelper ** out,const ReadOptions * options)6145 media_status_t MPEG4Source::read(
6146         MediaBufferHelper **out, const ReadOptions *options) {
6147     Mutex::Autolock autoLock(mLock);
6148 
6149     CHECK(mStarted);
6150 
6151     if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6152         *out = nullptr;
6153         return AMEDIA_ERROR_WOULD_BLOCK;
6154     }
6155 
6156     if (mFirstMoofOffset > 0) {
6157         return fragmentedRead(out, options);
6158     }
6159 
6160     *out = NULL;
6161 
6162     int64_t targetSampleTimeUs = -1;
6163 
6164     int64_t seekTimeUs;
6165     ReadOptions::SeekMode mode;
6166 
6167     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6168         ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6169         if (mIsHeif || mIsAvif) {
6170             CHECK(mSampleTable == NULL);
6171             CHECK(mItemTable != NULL);
6172             int32_t imageIndex;
6173             if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6174                 return AMEDIA_ERROR_MALFORMED;
6175             }
6176 
6177             status_t err;
6178             if (seekTimeUs >= 0) {
6179                 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6180             } else {
6181                 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6182             }
6183             if (err != OK) {
6184                 return AMEDIA_ERROR_UNKNOWN;
6185             }
6186         } else {
6187             uint32_t findFlags = 0;
6188             switch (mode) {
6189                 case ReadOptions::SEEK_PREVIOUS_SYNC:
6190                     findFlags = SampleTable::kFlagBefore;
6191                     break;
6192                 case ReadOptions::SEEK_NEXT_SYNC:
6193                     findFlags = SampleTable::kFlagAfter;
6194                     break;
6195                 case ReadOptions::SEEK_CLOSEST_SYNC:
6196                 case ReadOptions::SEEK_CLOSEST:
6197                     findFlags = SampleTable::kFlagClosest;
6198                     break;
6199                 case ReadOptions::SEEK_FRAME_INDEX:
6200                     findFlags = SampleTable::kFlagFrameIndex;
6201                     break;
6202                 default:
6203                     CHECK(!"Should not be here.");
6204                     break;
6205             }
6206             if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6207                 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6208                 if (mElstInitialEmptyEditTicks > 0) {
6209                     elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6210                                              mTimescale;
6211                     /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6212                      * Hence, lower bound on seekTimeUs is 0.
6213                      */
6214                     seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6215                 }
6216                 if (mElstShiftStartTicks > 0) {
6217                     elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6218                     seekTimeUs += elstShiftStartUs;
6219                 }
6220                 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6221                       ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6222                       elstShiftStartUs);
6223             }
6224 
6225             uint32_t sampleIndex;
6226             status_t err = mSampleTable->findSampleAtTime(
6227                     seekTimeUs, 1000000, mTimescale,
6228                     &sampleIndex, findFlags);
6229 
6230             if (mode == ReadOptions::SEEK_CLOSEST
6231                     || mode == ReadOptions::SEEK_FRAME_INDEX) {
6232                 // We found the closest sample already, now we want the sync
6233                 // sample preceding it (or the sample itself of course), even
6234                 // if the subsequent sync sample is closer.
6235                 findFlags = SampleTable::kFlagBefore;
6236             }
6237 
6238             uint32_t syncSampleIndex = sampleIndex;
6239             // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6240             // This works around
6241             // seek issues with files that were incorrectly written with an
6242             // empty or single-sample stss block for the audio track
6243             if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6244                 err = mSampleTable->findSyncSampleNear(
6245                         sampleIndex, &syncSampleIndex, findFlags);
6246             }
6247 
6248             uint64_t sampleTime;
6249             if (err == OK) {
6250                 err = mSampleTable->getMetaDataForSample(
6251                         sampleIndex, NULL, NULL, &sampleTime);
6252             }
6253 
6254             if (err != OK) {
6255                 if (err == ERROR_OUT_OF_RANGE) {
6256                     // An attempt to seek past the end of the stream would
6257                     // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6258                     // this all the way to the MediaPlayer would cause abnormal
6259                     // termination. Legacy behaviour appears to be to behave as if
6260                     // we had seeked to the end of stream, ending normally.
6261                     return AMEDIA_ERROR_END_OF_STREAM;
6262                 }
6263                 ALOGV("end of stream");
6264                 return AMEDIA_ERROR_UNKNOWN;
6265             }
6266 
6267             if (mode == ReadOptions::SEEK_CLOSEST
6268                 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6269                 if (mElstInitialEmptyEditTicks > 0) {
6270                     sampleTime += mElstInitialEmptyEditTicks;
6271                 }
6272                 if (mElstShiftStartTicks > 0){
6273                     if (sampleTime > mElstShiftStartTicks) {
6274                         sampleTime -= mElstShiftStartTicks;
6275                     } else {
6276                         sampleTime = 0;
6277                     }
6278                 }
6279                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6280             }
6281 
6282 #if 0
6283             uint32_t syncSampleTime;
6284             CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6285                         syncSampleIndex, NULL, NULL, &syncSampleTime));
6286 
6287             ALOGI("seek to time %lld us => sample at time %lld us, "
6288                  "sync sample at time %lld us",
6289                  seekTimeUs,
6290                  sampleTime * 1000000ll / mTimescale,
6291                  syncSampleTime * 1000000ll / mTimescale);
6292 #endif
6293 
6294             mCurrentSampleIndex = syncSampleIndex;
6295         }
6296 
6297         if (mBuffer != NULL) {
6298             mBuffer->release();
6299             mBuffer = NULL;
6300         }
6301 
6302         // fall through
6303     }
6304 
6305     off64_t offset = 0;
6306     size_t size = 0;
6307     int64_t cts;
6308     uint64_t stts;
6309     bool isSyncSample;
6310     bool newBuffer = false;
6311     if (mBuffer == NULL) {
6312         newBuffer = true;
6313 
6314         status_t err;
6315         if (!mIsHeif && !mIsAvif) {
6316             err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6317                                                     (uint64_t*)&cts, &isSyncSample, &stts);
6318             if(err == OK) {
6319                 if (mElstInitialEmptyEditTicks > 0) {
6320                     cts += mElstInitialEmptyEditTicks;
6321                 }
6322                 if (mElstShiftStartTicks > 0) {
6323                     // cts can be negative. for example, initial audio samples for gapless playback.
6324                     cts -= (int64_t)mElstShiftStartTicks;
6325                 }
6326             }
6327         } else {
6328             err = mItemTable->getImageOffsetAndSize(
6329                     options && options->getSeekTo(&seekTimeUs, &mode) ?
6330                             &mCurrentSampleIndex : NULL, &offset, &size);
6331 
6332             cts = stts = 0;
6333             isSyncSample = 0;
6334             ALOGV("image offset %lld, size %zu", (long long)offset, size);
6335         }
6336 
6337         if (err != OK) {
6338             if (err == ERROR_END_OF_STREAM) {
6339                 return AMEDIA_ERROR_END_OF_STREAM;
6340             }
6341             return AMEDIA_ERROR_UNKNOWN;
6342         }
6343 
6344         err = mBufferGroup->acquire_buffer(&mBuffer);
6345 
6346         if (err != OK || mBuffer == nullptr) {
6347             CHECK(mBuffer == NULL);
6348             return AMEDIA_ERROR_UNKNOWN;
6349         }
6350         if (size > mBuffer->size()) {
6351             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6352             mBuffer->release();
6353             mBuffer = NULL;
6354             return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6355         }
6356     }
6357 
6358     if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6359         if (newBuffer) {
6360             if (mIsPcm) {
6361                 // The twos' PCM block reader assumes that all samples has the same size.
6362                 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6363                 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6364                     mBuffer->release();
6365                     mBuffer = nullptr;
6366                     return AMEDIA_ERROR_UNKNOWN;
6367                 }
6368                 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6369                 if (samplesToRead > kMaxPcmFrameSize) {
6370                     samplesToRead = kMaxPcmFrameSize;
6371                 }
6372 
6373                 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6374                       samplesToRead, size, mCurrentSampleIndex,
6375                       mSampleTable->getLastSampleIndexInChunk());
6376 
6377                 size_t totalSize = samplesToRead * size;
6378                 if (mBuffer->size() < totalSize) {
6379                     mBuffer->release();
6380                     mBuffer = nullptr;
6381                     return AMEDIA_ERROR_UNKNOWN;
6382                 }
6383                 uint8_t* buf = (uint8_t *)mBuffer->data();
6384                 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6385                 if (bytesRead < (ssize_t)totalSize) {
6386                     mBuffer->release();
6387                     mBuffer = NULL;
6388                     return AMEDIA_ERROR_IO;
6389                 }
6390 
6391                 AMediaFormat *meta = mBuffer->meta_data();
6392                 AMediaFormat_clear(meta);
6393                 AMediaFormat_setInt64(
6394                       meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6395                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6396 
6397                 int32_t byteOrder = 0;
6398                 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6399                         AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6400 
6401                 if (isGetBigEndian && byteOrder == 1) {
6402                     // Big-endian -> little-endian
6403                     uint16_t *dstData = (uint16_t *)buf;
6404                     uint16_t *srcData = (uint16_t *)buf;
6405 
6406                     for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6407                          dstData[j] = ntohs(srcData[j]);
6408                     }
6409                 }
6410 
6411                 mCurrentSampleIndex += samplesToRead;
6412                 mBuffer->set_range(0, totalSize);
6413             } else {
6414                 ssize_t num_bytes_read =
6415                     mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6416 
6417                 if (num_bytes_read < (ssize_t)size) {
6418                     mBuffer->release();
6419                     mBuffer = NULL;
6420 
6421                     return AMEDIA_ERROR_IO;
6422                 }
6423 
6424                 CHECK(mBuffer != NULL);
6425                 mBuffer->set_range(0, size);
6426                 AMediaFormat *meta = mBuffer->meta_data();
6427                 AMediaFormat_clear(meta);
6428                 AMediaFormat_setInt64(
6429                         meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6430                 AMediaFormat_setInt64(
6431                         meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6432 
6433                 if (targetSampleTimeUs >= 0) {
6434                     AMediaFormat_setInt64(
6435                             meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6436                 }
6437 
6438                 if (isSyncSample) {
6439                     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6440                 }
6441 
6442                 AMediaFormat_setInt64(
6443                         meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6444                         offset);
6445 
6446                 if (mSampleTable != nullptr &&
6447                         mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6448                     AMediaFormat_setInt64(
6449                     meta,
6450                     "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6451                     mSampleTable->getLastSampleIndexInChunk());
6452                 }
6453 
6454                 ++mCurrentSampleIndex;
6455             }
6456         }
6457 
6458         *out = mBuffer;
6459         mBuffer = NULL;
6460 
6461         return AMEDIA_OK;
6462 
6463     } else if (mIsAC4) {
6464         CHECK(mBuffer != NULL);
6465         // Make sure there is enough space to write the sync header and the raw frame
6466         if (mBuffer->range_length() < (7 + size)) {
6467             mBuffer->release();
6468             mBuffer = NULL;
6469 
6470             return AMEDIA_ERROR_IO;
6471         }
6472 
6473         uint8_t *dstData = (uint8_t *)mBuffer->data();
6474         size_t dstOffset = 0;
6475         // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6476         // AC40 sync word, meaning no CRC at the end of the frame
6477         dstData[dstOffset++] = 0xAC;
6478         dstData[dstOffset++] = 0x40;
6479         dstData[dstOffset++] = 0xFF;
6480         dstData[dstOffset++] = 0xFF;
6481         dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6482         dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6483         dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6484 
6485         ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6486         if (numBytesRead != (ssize_t)size) {
6487             mBuffer->release();
6488             mBuffer = NULL;
6489 
6490             return AMEDIA_ERROR_IO;
6491         }
6492 
6493         mBuffer->set_range(0, dstOffset + size);
6494         AMediaFormat *meta = mBuffer->meta_data();
6495         AMediaFormat_clear(meta);
6496         AMediaFormat_setInt64(
6497                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6498         AMediaFormat_setInt64(
6499                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6500 
6501         if (targetSampleTimeUs >= 0) {
6502             AMediaFormat_setInt64(
6503                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6504         }
6505 
6506         if (isSyncSample) {
6507             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6508         }
6509 
6510         void *presentationsData;
6511         size_t presentationsSize;
6512         if (AMediaFormat_getBuffer(
6513                     mFormat, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6514                     &presentationsData, &presentationsSize)) {
6515             AMediaFormat_setBuffer(
6516                     meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6517                     presentationsData, presentationsSize);
6518         }
6519 
6520         ++mCurrentSampleIndex;
6521 
6522         *out = mBuffer;
6523         mBuffer = NULL;
6524 
6525         return AMEDIA_OK;
6526     } else {
6527         // Whole NAL units are returned but each fragment is prefixed by
6528         // the start code (0x00 00 00 01).
6529         ssize_t num_bytes_read = 0;
6530         bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6531         if (mSrcBufferFitsDataToRead) {
6532           num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6533         } else {
6534           // We are trying to read a sample larger than the expected max sample size.
6535           // Fall through and let the failure be handled by the following if.
6536           android_errorWriteLog(0x534e4554, "188893559");
6537         }
6538 
6539         if (num_bytes_read < (ssize_t)size) {
6540             mBuffer->release();
6541             mBuffer = NULL;
6542             return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6543         }
6544 
6545         uint8_t *dstData = (uint8_t *)mBuffer->data();
6546         size_t srcOffset = 0;
6547         size_t dstOffset = 0;
6548 
6549         while (srcOffset < size) {
6550             bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6551             size_t nalLength = 0;
6552             if (!isMalFormed) {
6553                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6554                 srcOffset += mNALLengthSize;
6555                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6556             }
6557 
6558             if (isMalFormed) {
6559                 //if nallength abnormal,ignore it.
6560                 ALOGW("abnormal nallength, ignore this NAL");
6561                 srcOffset = size;
6562                 break;
6563             }
6564 
6565             if (nalLength == 0) {
6566                 continue;
6567             }
6568 
6569             if (dstOffset > SIZE_MAX - 4 ||
6570                     dstOffset + 4 > SIZE_MAX - nalLength ||
6571                     dstOffset + 4 + nalLength > mBuffer->size()) {
6572                 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6573                 android_errorWriteLog(0x534e4554, "27208621");
6574                 mBuffer->release();
6575                 mBuffer = NULL;
6576                 return AMEDIA_ERROR_MALFORMED;
6577             }
6578 
6579             dstData[dstOffset++] = 0;
6580             dstData[dstOffset++] = 0;
6581             dstData[dstOffset++] = 0;
6582             dstData[dstOffset++] = 1;
6583             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6584             srcOffset += nalLength;
6585             dstOffset += nalLength;
6586         }
6587         CHECK_EQ(srcOffset, size);
6588         CHECK(mBuffer != NULL);
6589         mBuffer->set_range(0, dstOffset);
6590 
6591         AMediaFormat *meta = mBuffer->meta_data();
6592         AMediaFormat_clear(meta);
6593         AMediaFormat_setInt64(
6594                 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6595         AMediaFormat_setInt64(
6596                 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6597 
6598         if (targetSampleTimeUs >= 0) {
6599             AMediaFormat_setInt64(
6600                     meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6601         }
6602 
6603         if (mIsAVC) {
6604             uint32_t layerId = FindAVCLayerId(
6605                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6606             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6607         } else if (mIsHEVC) {
6608             int32_t layerId = parseHEVCLayerId(
6609                     (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6610             if (layerId >= 0) {
6611                 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6612             }
6613         }
6614 
6615         if (isSyncSample) {
6616             AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6617         }
6618 
6619         AMediaFormat_setInt64(
6620                 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6621 
6622         if (mSampleTable != nullptr &&
6623                 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6624             AMediaFormat_setInt64(
6625                     meta,
6626                     "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6627                     mSampleTable->getLastSampleIndexInChunk());
6628         }
6629 
6630         ++mCurrentSampleIndex;
6631 
6632         *out = mBuffer;
6633         mBuffer = NULL;
6634 
6635         return AMEDIA_OK;
6636     }
6637 }
6638 
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6639 media_status_t MPEG4Source::fragmentedRead(
6640         MediaBufferHelper **out, const ReadOptions *options) {
6641 
6642     ALOGV("MPEG4Source::fragmentedRead");
6643 
6644     CHECK(mStarted);
6645 
6646     *out = NULL;
6647 
6648     int64_t targetSampleTimeUs = -1;
6649 
6650     int64_t seekTimeUs;
6651     ReadOptions::SeekMode mode;
6652     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6653         ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6654         int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6655         if (mElstInitialEmptyEditTicks > 0) {
6656             elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6657                                      mTimescale;
6658             /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6659              * Hence, lower bound on seekTimeUs is 0.
6660              */
6661             seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6662         }
6663         if (mElstShiftStartTicks > 0){
6664             elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6665             seekTimeUs += elstShiftStartUs;
6666         }
6667         ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6668               ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6669               elstShiftStartUs);
6670 
6671         int numSidxEntries = mSegments.size();
6672         if (numSidxEntries != 0) {
6673             int64_t totalTime = 0;
6674             off64_t totalOffset = mFirstMoofOffset;
6675             for (int i = 0; i < numSidxEntries; i++) {
6676                 const SidxEntry *se = &mSegments[i];
6677                 if (totalTime + se->mDurationUs > seekTimeUs) {
6678                     // The requested time is somewhere in this segment
6679                     if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6680                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6681                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6682                         // requested next sync, or closest sync and it was closer to the end of
6683                         // this segment
6684                         totalTime += se->mDurationUs;
6685                         totalOffset += se->mSize;
6686                     }
6687                     break;
6688                 }
6689                 totalTime += se->mDurationUs;
6690                 totalOffset += se->mSize;
6691             }
6692             mCurrentMoofOffset = totalOffset;
6693             mNextMoofOffset = -1;
6694             mCurrentSamples.clear();
6695             mCurrentSampleIndex = 0;
6696             status_t err = parseChunk(&totalOffset);
6697             if (err != OK) {
6698                 return AMEDIA_ERROR_UNKNOWN;
6699             }
6700             mCurrentTime = totalTime * mTimescale / 1000000ll;
6701         } else {
6702             // without sidx boxes, we can only seek to 0
6703             mCurrentMoofOffset = mFirstMoofOffset;
6704             mNextMoofOffset = -1;
6705             mCurrentSamples.clear();
6706             mCurrentSampleIndex = 0;
6707             off64_t tmp = mCurrentMoofOffset;
6708             status_t err = parseChunk(&tmp);
6709             if (err != OK) {
6710                 return AMEDIA_ERROR_UNKNOWN;
6711             }
6712             mCurrentTime = 0;
6713         }
6714 
6715         if (mBuffer != NULL) {
6716             mBuffer->release();
6717             mBuffer = NULL;
6718         }
6719 
6720         // fall through
6721     }
6722 
6723     off64_t offset = 0;
6724     size_t size = 0;
6725     int64_t cts = 0;
6726     bool isSyncSample = false;
6727     bool newBuffer = false;
6728     if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6729         newBuffer = true;
6730 
6731         if (mBuffer != NULL) {
6732             mBuffer->release();
6733             mBuffer = NULL;
6734         }
6735         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6736             // move to next fragment if there is one
6737             if (mNextMoofOffset <= mCurrentMoofOffset) {
6738                 return AMEDIA_ERROR_END_OF_STREAM;
6739             }
6740             off64_t nextMoof = mNextMoofOffset;
6741             mCurrentMoofOffset = nextMoof;
6742             mCurrentSamples.clear();
6743             mCurrentSampleIndex = 0;
6744             status_t err = parseChunk(&nextMoof);
6745             if (err != OK) {
6746                 return AMEDIA_ERROR_UNKNOWN;
6747             }
6748             if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6749                 return AMEDIA_ERROR_END_OF_STREAM;
6750             }
6751         }
6752 
6753         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6754         offset = smpl->offset;
6755         size = smpl->size;
6756         cts = (int64_t)mCurrentTime + (int64_t)smpl->compositionOffset;
6757 
6758         if (mElstInitialEmptyEditTicks > 0) {
6759             cts += mElstInitialEmptyEditTicks;
6760         }
6761         if (mElstShiftStartTicks > 0) {
6762             // cts can be negative. for example, initial audio samples for gapless playback.
6763             cts -= (int64_t)mElstShiftStartTicks;
6764         }
6765 
6766         mCurrentTime += smpl->duration;
6767         isSyncSample = (mCurrentSampleIndex == 0);
6768 
6769         status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6770 
6771         if (err != OK) {
6772             CHECK(mBuffer == NULL);
6773             ALOGV("acquire_buffer returned %d", err);
6774             return AMEDIA_ERROR_UNKNOWN;
6775         }
6776         if (size > mBuffer->size()) {
6777             ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6778             mBuffer->release();
6779             mBuffer = NULL;
6780             return AMEDIA_ERROR_UNKNOWN;
6781         }
6782     }
6783 
6784     const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6785     AMediaFormat *bufmeta = mBuffer->meta_data();
6786     AMediaFormat_clear(bufmeta);
6787     if (smpl->encryptedsizes.size()) {
6788         // store clear/encrypted lengths in metadata
6789         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6790                 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6791         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6792                 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6793         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6794         AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6795         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6796         AMediaFormat_setInt32(bufmeta,
6797                 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6798         AMediaFormat_setInt32(bufmeta,
6799                 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6800 
6801         void *iv = NULL;
6802         size_t ivlength = 0;
6803         if (!AMediaFormat_getBuffer(mFormat,
6804                 "crypto-iv", &iv, &ivlength)) {
6805             iv = (void *) smpl->iv;
6806             ivlength = 16; // use 16 or the actual size?
6807         }
6808         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6809     }
6810 
6811     if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6812         if (newBuffer) {
6813             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6814                 mBuffer->release();
6815                 mBuffer = NULL;
6816 
6817                 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6818                 return AMEDIA_ERROR_MALFORMED;
6819             }
6820 
6821             ssize_t num_bytes_read =
6822                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6823 
6824             if (num_bytes_read < (ssize_t)size) {
6825                 mBuffer->release();
6826                 mBuffer = NULL;
6827 
6828                 ALOGE("i/o error");
6829                 return AMEDIA_ERROR_IO;
6830             }
6831 
6832             CHECK(mBuffer != NULL);
6833             mBuffer->set_range(0, size);
6834             AMediaFormat_setInt64(bufmeta,
6835                     AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6836             AMediaFormat_setInt64(bufmeta,
6837                     AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6838 
6839             if (targetSampleTimeUs >= 0) {
6840                 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6841             }
6842 
6843             if (mIsAVC) {
6844                 uint32_t layerId = FindAVCLayerId(
6845                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6846                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6847             } else if (mIsHEVC) {
6848                 int32_t layerId = parseHEVCLayerId(
6849                         (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6850                 if (layerId >= 0) {
6851                     AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6852                 }
6853             }
6854 
6855             if (isSyncSample) {
6856                 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6857             }
6858 
6859             ++mCurrentSampleIndex;
6860         }
6861 
6862         *out = mBuffer;
6863         mBuffer = NULL;
6864 
6865         return AMEDIA_OK;
6866 
6867     } else {
6868         ALOGV("whole NAL");
6869         // Whole NAL units are returned but each fragment is prefixed by
6870         // the start code (0x00 00 00 01).
6871         ssize_t num_bytes_read = 0;
6872         void *data = NULL;
6873         bool isMalFormed = false;
6874         int32_t max_size;
6875         if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6876                 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6877             isMalFormed = true;
6878         } else {
6879             data = mSrcBuffer;
6880         }
6881 
6882         if (isMalFormed || data == NULL) {
6883             ALOGE("isMalFormed size %zu", size);
6884             if (mBuffer != NULL) {
6885                 mBuffer->release();
6886                 mBuffer = NULL;
6887             }
6888             return AMEDIA_ERROR_MALFORMED;
6889         }
6890         num_bytes_read = mDataSource->readAt(offset, data, size);
6891 
6892         if (num_bytes_read < (ssize_t)size) {
6893             mBuffer->release();
6894             mBuffer = NULL;
6895 
6896             ALOGE("i/o error");
6897             return AMEDIA_ERROR_IO;
6898         }
6899 
6900         uint8_t *dstData = (uint8_t *)mBuffer->data();
6901         size_t srcOffset = 0;
6902         size_t dstOffset = 0;
6903 
6904         while (srcOffset < size) {
6905             isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6906             size_t nalLength = 0;
6907             if (!isMalFormed) {
6908                 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6909                 srcOffset += mNALLengthSize;
6910                 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6911                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6912                         || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6913             }
6914 
6915             if (isMalFormed) {
6916                 ALOGE("Video is malformed; nalLength %zu", nalLength);
6917                 mBuffer->release();
6918                 mBuffer = NULL;
6919                 return AMEDIA_ERROR_MALFORMED;
6920             }
6921 
6922             if (nalLength == 0) {
6923                 continue;
6924             }
6925 
6926             if (dstOffset > SIZE_MAX - 4 ||
6927                     dstOffset + 4 > SIZE_MAX - nalLength ||
6928                     dstOffset + 4 + nalLength > mBuffer->size()) {
6929                 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6930                 android_errorWriteLog(0x534e4554, "26365349");
6931                 mBuffer->release();
6932                 mBuffer = NULL;
6933                 return AMEDIA_ERROR_MALFORMED;
6934             }
6935 
6936             dstData[dstOffset++] = 0;
6937             dstData[dstOffset++] = 0;
6938             dstData[dstOffset++] = 0;
6939             dstData[dstOffset++] = 1;
6940             memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6941             srcOffset += nalLength;
6942             dstOffset += nalLength;
6943         }
6944         CHECK_EQ(srcOffset, size);
6945         CHECK(mBuffer != NULL);
6946         mBuffer->set_range(0, dstOffset);
6947 
6948         AMediaFormat *bufmeta = mBuffer->meta_data();
6949         AMediaFormat_setInt64(bufmeta,
6950                 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6951         AMediaFormat_setInt64(bufmeta,
6952                 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6953 
6954         if (targetSampleTimeUs >= 0) {
6955             AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6956         }
6957 
6958         if (isSyncSample) {
6959             AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6960         }
6961 
6962         ++mCurrentSampleIndex;
6963 
6964         *out = mBuffer;
6965         mBuffer = NULL;
6966 
6967         return AMEDIA_OK;
6968     }
6969 
6970     return AMEDIA_OK;
6971 }
6972 
findTrackByMimePrefix(const char * mimePrefix)6973 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6974         const char *mimePrefix) {
6975     for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6976         const char *mime;
6977         if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6978                 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6979             return track;
6980         }
6981     }
6982 
6983     return NULL;
6984 }
6985 
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6986 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6987     uint8_t header[8];
6988 
6989     ssize_t n = source->readAt(4, header, sizeof(header));
6990     if (n < (ssize_t)sizeof(header)) {
6991         return false;
6992     }
6993 
6994     if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6995         || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6996         || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6997         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6998         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6999         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
7000         || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
7001         || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
7002         || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
7003         *confidence = 0.4;
7004 
7005         return true;
7006     }
7007 
7008     return false;
7009 }
7010 
isCompatibleBrand(uint32_t fourcc)7011 static bool isCompatibleBrand(uint32_t fourcc) {
7012     static const uint32_t kCompatibleBrands[] = {
7013         FOURCC("isom"),
7014         FOURCC("iso2"),
7015         FOURCC("avc1"),
7016         FOURCC("hvc1"),
7017         FOURCC("hev1"),
7018         FOURCC("av01"),
7019         FOURCC("vp09"),
7020         FOURCC("3gp4"),
7021         FOURCC("mp41"),
7022         FOURCC("mp42"),
7023         FOURCC("dash"),
7024         FOURCC("nvr1"),
7025 
7026         // Won't promise that the following file types can be played.
7027         // Just give these file types a chance.
7028         FOURCC("qt  "),  // Apple's QuickTime
7029         FOURCC("MSNV"),  // Sony's PSP
7030         FOURCC("wmf "),
7031 
7032         FOURCC("3g2a"),  // 3GPP2
7033         FOURCC("3g2b"),
7034         FOURCC("mif1"),  // HEIF image
7035         FOURCC("heic"),  // HEIF image
7036         FOURCC("msf1"),  // HEIF image sequence
7037         FOURCC("hevc"),  // HEIF image sequence
7038         FOURCC("avif"),  // AVIF image
7039         FOURCC("avis"),  // AVIF image sequence
7040     };
7041 
7042     for (size_t i = 0;
7043          i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
7044          ++i) {
7045         if (kCompatibleBrands[i] == fourcc) {
7046             return true;
7047         }
7048     }
7049 
7050     return false;
7051 }
7052 
7053 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
7054 // compatible brand is present.
7055 // Also try to identify where this file's metadata ends
7056 // (end of the 'moov' atom) and report it to the caller as part of
7057 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7058 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7059     // We scan up to 128 bytes to identify this file as an MP4.
7060     static const off64_t kMaxScanOffset = 128ll;
7061 
7062     off64_t offset = 0ll;
7063     bool foundGoodFileType = false;
7064     off64_t moovAtomEndOffset = -1ll;
7065     bool done = false;
7066 
7067     while (!done && offset < kMaxScanOffset) {
7068         uint32_t hdr[2];
7069         if (source->readAt(offset, hdr, 8) < 8) {
7070             return false;
7071         }
7072 
7073         uint64_t chunkSize = ntohl(hdr[0]);
7074         uint32_t chunkType = ntohl(hdr[1]);
7075         off64_t chunkDataOffset = offset + 8;
7076 
7077         if (chunkSize == 1) {
7078             if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7079                 return false;
7080             }
7081 
7082             chunkSize = ntoh64(chunkSize);
7083             chunkDataOffset += 8;
7084 
7085             if (chunkSize < 16) {
7086                 // The smallest valid chunk is 16 bytes long in this case.
7087                 return false;
7088             }
7089             if (chunkSize > INT64_MAX) {
7090                 // reject overly large chunk sizes that could
7091                 // be interpreted as negative
7092                 ALOGE("chunk size too large");
7093                 return false;
7094             }
7095 
7096         } else if (chunkSize < 8) {
7097             // The smallest valid chunk is 8 bytes long.
7098             return false;
7099         }
7100 
7101         // (data_offset - offset) is either 8 or 16
7102         off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7103         if (chunkDataSize < 0) {
7104             ALOGE("b/23540914");
7105             return false;
7106         }
7107 
7108         char chunkstring[5];
7109         MakeFourCCString(chunkType, chunkstring);
7110         ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7111                 chunkstring, chunkSize, (long long)offset);
7112         switch (chunkType) {
7113             case FOURCC("ftyp"):
7114             {
7115                 if (chunkDataSize < 8) {
7116                     return false;
7117                 }
7118 
7119                 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7120                 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7121                     if (i == 1) {
7122                         // Skip this index, it refers to the minorVersion,
7123                         // not a brand.
7124                         continue;
7125                     }
7126 
7127                     uint32_t brand;
7128                     if (source->readAt(
7129                                 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7130                         return false;
7131                     }
7132 
7133                     brand = ntohl(brand);
7134 
7135                     if (isCompatibleBrand(brand)) {
7136                         foundGoodFileType = true;
7137                         break;
7138                     }
7139                 }
7140 
7141                 if (!foundGoodFileType) {
7142                     return false;
7143                 }
7144 
7145                 break;
7146             }
7147 
7148             case FOURCC("moov"):
7149             {
7150                 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7151                     ALOGE("chunk size + offset would overflow");
7152                     return false;
7153                 }
7154 
7155                 done = true;
7156                 break;
7157             }
7158 
7159             default:
7160                 break;
7161         }
7162 
7163         if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7164             ALOGE("chunk size + offset would overflow");
7165             return false;
7166         }
7167     }
7168 
7169     if (!foundGoodFileType) {
7170         return false;
7171     }
7172 
7173     *confidence = 0.4f;
7174 
7175     return true;
7176 }
7177 
CreateExtractor(CDataSource * source,void *)7178 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7179     return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7180 }
7181 
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7182 static CreatorFunc Sniff(
7183         CDataSource *source, float *confidence, void **,
7184         FreeMetaFunc *) {
7185     DataSourceHelper helper(source);
7186     if (BetterSniffMPEG4(&helper, confidence)) {
7187         return CreateExtractor;
7188     }
7189 
7190     if (LegacySniffMPEG4(&helper, confidence)) {
7191         ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7192         return CreateExtractor;
7193     }
7194 
7195     return NULL;
7196 }
7197 
7198 static const char *extensions[] = {
7199     "3g2",
7200     "3ga",
7201     "3gp",
7202     "3gpp",
7203     "3gpp2",
7204     "m4a",
7205     "m4r",
7206     "m4v",
7207     "mov",
7208     "mp4",
7209     "qt",
7210     NULL
7211 };
7212 
7213 extern "C" {
7214 // This is the only symbol that needs to be exported
7215 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7216 ExtractorDef GETEXTRACTORDEF() {
7217     return {
7218         EXTRACTORDEF_VERSION,
7219         UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7220         2, // version
7221         "MP4 Extractor",
7222         { .v3 = {Sniff, extensions} },
7223     };
7224 }
7225 
7226 } // extern "C"
7227 
7228 }  // namespace android
7229