1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <utils/Log.h>
30
31 #include "AC4Parser.h"
32 #include "MPEG4Extractor.h"
33 #include "SampleTable.h"
34 #include "ItemTable.h"
35
36 #include <media/esds/ESDS.h>
37 #include <ID3.h>
38 #include <media/stagefright/DataSourceBase.h>
39 #include <media/ExtractorUtils.h>
40 #include <media/stagefright/foundation/ABitReader.h>
41 #include <media/stagefright/foundation/ABuffer.h>
42 #include <media/stagefright/foundation/ADebug.h>
43 #include <media/stagefright/foundation/AMessage.h>
44 #include <media/stagefright/foundation/AudioPresentationInfo.h>
45 #include <media/stagefright/foundation/AUtils.h>
46 #include <media/stagefright/foundation/ByteUtils.h>
47 #include <media/stagefright/foundation/ColorUtils.h>
48 #include <media/stagefright/foundation/avc_utils.h>
49 #include <media/stagefright/foundation/hexdump.h>
50 #include <media/stagefright/foundation/OpusHeader.h>
51 #include <media/stagefright/MediaBufferGroup.h>
52 #include <media/stagefright/MediaDefs.h>
53 #include <media/stagefright/MetaDataBase.h>
54 #include <utils/String8.h>
55
56 #include <byteswap.h>
57
58 #ifndef UINT32_MAX
59 #define UINT32_MAX (4294967295U)
60 #endif
61
62 #define ALAC_SPECIFIC_INFO_SIZE (36)
63
64 // TODO : Remove the defines once mainline media is built against NDK >= 31.
65 // The mp4 extractor is part of mainline and builds against NDK 29 as of
66 // writing. These keys are available only from NDK 31:
67 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
68 "mpegh-profile-level-indication"
69 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
70 "mpegh-reference-channel-layout"
71 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
72 "mpegh-compatible-sets"
73
74 namespace android {
75
76 enum {
77 // max track header chunk to return
78 kMaxTrackHeaderSize = 32,
79
80 // maximum size of an atom. Some atoms can be bigger according to the spec,
81 // but we only allow up to this size.
82 kMaxAtomSize = 64 * 1024 * 1024,
83 };
84
85 class MPEG4Source : public MediaTrackHelper {
86 static const size_t kMaxPcmFrameSize = 8192;
87 public:
88 // Caller retains ownership of both "dataSource" and "sampleTable".
89 MPEG4Source(AMediaFormat *format,
90 DataSourceHelper *dataSource,
91 int32_t timeScale,
92 const sp<SampleTable> &sampleTable,
93 Vector<SidxEntry> &sidx,
94 const Trex *trex,
95 off64_t firstMoofOffset,
96 const sp<ItemTable> &itemTable,
97 uint64_t elstShiftStartTicks,
98 uint64_t elstInitialEmptyEditTicks);
99 virtual status_t init();
100
101 virtual media_status_t start();
102 virtual media_status_t stop();
103
104 virtual media_status_t getFormat(AMediaFormat *);
105
106 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()107 bool supportsNonBlockingRead() override { return true; }
108 virtual media_status_t fragmentedRead(
109 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
110
111 virtual ~MPEG4Source();
112
113 private:
114 Mutex mLock;
115
116 AMediaFormat *mFormat;
117 DataSourceHelper *mDataSource;
118 int32_t mTimescale;
119 sp<SampleTable> mSampleTable;
120 uint32_t mCurrentSampleIndex;
121 uint32_t mCurrentFragmentIndex;
122 Vector<SidxEntry> &mSegments;
123 const Trex *mTrex;
124 off64_t mFirstMoofOffset;
125 off64_t mCurrentMoofOffset;
126 off64_t mCurrentMoofSize;
127 off64_t mNextMoofOffset;
128 uint32_t mCurrentTime; // in media timescale ticks
129 int32_t mLastParsedTrackId;
130 int32_t mTrackId;
131
132 int32_t mCryptoMode; // passed in from extractor
133 int32_t mDefaultIVSize; // passed in from extractor
134 uint8_t mCryptoKey[16]; // passed in from extractor
135 int32_t mDefaultEncryptedByteBlock;
136 int32_t mDefaultSkipByteBlock;
137 uint32_t mCurrentAuxInfoType;
138 uint32_t mCurrentAuxInfoTypeParameter;
139 int32_t mCurrentDefaultSampleInfoSize;
140 uint32_t mCurrentSampleInfoCount;
141 uint32_t mCurrentSampleInfoAllocSize;
142 uint8_t* mCurrentSampleInfoSizes;
143 uint32_t mCurrentSampleInfoOffsetCount;
144 uint32_t mCurrentSampleInfoOffsetsAllocSize;
145 uint64_t* mCurrentSampleInfoOffsets;
146
147 bool mIsAVC;
148 bool mIsHEVC;
149 bool mIsDolbyVision;
150 bool mIsAC4;
151 bool mIsMpegH = false;
152 bool mIsPcm;
153 size_t mNALLengthSize;
154
155 bool mStarted;
156
157 MediaBufferHelper *mBuffer;
158
159 size_t mSrcBufferSize;
160 uint8_t *mSrcBuffer;
161
162 bool mIsHeif;
163 bool mIsAvif;
164 bool mIsAudio;
165 bool mIsUsac = false;
166 sp<ItemTable> mItemTable;
167
168 /* Shift start offset (move to earlier time) when media_time > 0,
169 * in media time scale.
170 */
171 uint64_t mElstShiftStartTicks;
172 /* Initial start offset (move to later time), empty edit list entry
173 * in media time scale.
174 */
175 uint64_t mElstInitialEmptyEditTicks;
176
177 size_t parseNALSize(const uint8_t *data) const;
178 status_t parseChunk(off64_t *offset);
179 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
180 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
181 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
182 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
183 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
184 uint32_t flags, off64_t size);
185 status_t parseSampleEncryption(off64_t offset, off64_t size);
186 // returns -1 for invalid layer ID
187 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
188 size_t getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const;
189 size_t getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const;
190
191 struct TrackFragmentHeaderInfo {
192 enum Flags {
193 kBaseDataOffsetPresent = 0x01,
194 kSampleDescriptionIndexPresent = 0x02,
195 kDefaultSampleDurationPresent = 0x08,
196 kDefaultSampleSizePresent = 0x10,
197 kDefaultSampleFlagsPresent = 0x20,
198 kDurationIsEmpty = 0x10000,
199 };
200
201 uint32_t mTrackID;
202 uint32_t mFlags;
203 uint64_t mBaseDataOffset;
204 uint32_t mSampleDescriptionIndex;
205 uint32_t mDefaultSampleDuration;
206 uint32_t mDefaultSampleSize;
207 uint32_t mDefaultSampleFlags;
208
209 uint64_t mDataOffset;
210 };
211 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
212
213 struct Sample {
214 off64_t offset;
215 size_t size;
216 uint32_t duration;
217 int32_t compositionOffset;
218 uint8_t iv[16];
219 Vector<uint32_t> clearsizes;
220 Vector<uint32_t> encryptedsizes;
221 };
222 Vector<Sample> mCurrentSamples;
223 std::map<off64_t, uint32_t> mDrmOffsets;
224
225 MPEG4Source(const MPEG4Source &);
226 MPEG4Source &operator=(const MPEG4Source &);
227 };
228
229 // This custom data source wraps an existing one and satisfies requests
230 // falling entirely within a cached range from the cache while forwarding
231 // all remaining requests to the wrapped datasource.
232 // This is used to cache the full sampletable metadata for a single track,
233 // possibly wrapping multiple times to cover all tracks, i.e.
234 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
235
236 class CachedRangedDataSource : public DataSourceHelper {
237 public:
238 explicit CachedRangedDataSource(DataSourceHelper *source);
239 virtual ~CachedRangedDataSource();
240
241 ssize_t readAt(off64_t offset, void *data, size_t size) override;
242 status_t getSize(off64_t *size) override;
243 uint32_t flags() override;
244
245 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
246
247
248 private:
249 Mutex mLock;
250
251 DataSourceHelper *mSource;
252 bool mOwnsDataSource;
253 off64_t mCachedOffset;
254 size_t mCachedSize;
255 uint8_t *mCache;
256
257 void clearCache();
258
259 CachedRangedDataSource(const CachedRangedDataSource &);
260 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
261 };
262
CachedRangedDataSource(DataSourceHelper * source)263 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
264 : DataSourceHelper(source),
265 mSource(source),
266 mOwnsDataSource(false),
267 mCachedOffset(0),
268 mCachedSize(0),
269 mCache(NULL) {
270 }
271
~CachedRangedDataSource()272 CachedRangedDataSource::~CachedRangedDataSource() {
273 clearCache();
274 if (mOwnsDataSource) {
275 delete mSource;
276 }
277 }
278
clearCache()279 void CachedRangedDataSource::clearCache() {
280 if (mCache) {
281 free(mCache);
282 mCache = NULL;
283 }
284
285 mCachedOffset = 0;
286 mCachedSize = 0;
287 }
288
readAt(off64_t offset,void * data,size_t size)289 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
290 Mutex::Autolock autoLock(mLock);
291
292 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
293 memcpy(data, &mCache[offset - mCachedOffset], size);
294 return size;
295 }
296
297 return mSource->readAt(offset, data, size);
298 }
299
getSize(off64_t * size)300 status_t CachedRangedDataSource::getSize(off64_t *size) {
301 return mSource->getSize(size);
302 }
303
flags()304 uint32_t CachedRangedDataSource::flags() {
305 return mSource->flags();
306 }
307
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)308 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
309 size_t size,
310 bool assumeSourceOwnershipOnSuccess) {
311 Mutex::Autolock autoLock(mLock);
312
313 clearCache();
314
315 mCache = (uint8_t *)malloc(size);
316
317 if (mCache == NULL) {
318 return -ENOMEM;
319 }
320
321 mCachedOffset = offset;
322 mCachedSize = size;
323
324 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
325
326 if (err < (ssize_t)size) {
327 clearCache();
328
329 return ERROR_IO;
330 }
331 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
332 return OK;
333 }
334
335 ////////////////////////////////////////////////////////////////////////////////
336
337 static const bool kUseHexDump = false;
338
FourCC2MIME(uint32_t fourcc)339 static const char *FourCC2MIME(uint32_t fourcc) {
340 switch (fourcc) {
341 case FOURCC("mp4a"):
342 return MEDIA_MIMETYPE_AUDIO_AAC;
343
344 case FOURCC("samr"):
345 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
346
347 case FOURCC("sawb"):
348 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
349
350 case FOURCC("ec-3"):
351 return MEDIA_MIMETYPE_AUDIO_EAC3;
352
353 case FOURCC("mp4v"):
354 return MEDIA_MIMETYPE_VIDEO_MPEG4;
355
356 case FOURCC("s263"):
357 case FOURCC("h263"):
358 case FOURCC("H263"):
359 return MEDIA_MIMETYPE_VIDEO_H263;
360
361 case FOURCC("avc1"):
362 return MEDIA_MIMETYPE_VIDEO_AVC;
363
364 case FOURCC("hvc1"):
365 case FOURCC("hev1"):
366 return MEDIA_MIMETYPE_VIDEO_HEVC;
367
368 case FOURCC("dvav"):
369 case FOURCC("dva1"):
370 case FOURCC("dvhe"):
371 case FOURCC("dvh1"):
372 case FOURCC("dav1"):
373 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
374
375 case FOURCC("ac-4"):
376 return MEDIA_MIMETYPE_AUDIO_AC4;
377 case FOURCC("Opus"):
378 return MEDIA_MIMETYPE_AUDIO_OPUS;
379
380 case FOURCC("twos"):
381 case FOURCC("sowt"):
382 return MEDIA_MIMETYPE_AUDIO_RAW;
383 case FOURCC("alac"):
384 return MEDIA_MIMETYPE_AUDIO_ALAC;
385 case FOURCC("fLaC"):
386 return MEDIA_MIMETYPE_AUDIO_FLAC;
387 case FOURCC("av01"):
388 return MEDIA_MIMETYPE_VIDEO_AV1;
389 case FOURCC("vp09"):
390 return MEDIA_MIMETYPE_VIDEO_VP9;
391 case FOURCC(".mp3"):
392 case 0x6D730055: // "ms U" mp3 audio
393 return MEDIA_MIMETYPE_AUDIO_MPEG;
394 case FOURCC("mha1"):
395 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
396 case FOURCC("mhm1"):
397 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
398 case FOURCC("dtsc"):
399 return MEDIA_MIMETYPE_AUDIO_DTS;
400 case FOURCC("dtse"):
401 case FOURCC("dtsh"):
402 return MEDIA_MIMETYPE_AUDIO_DTS_HD;
403 case FOURCC("dtsl"):
404 return MEDIA_MIMETYPE_AUDIO_DTS_HD_MA;
405 case FOURCC("dtsx"):
406 return MEDIA_MIMETYPE_AUDIO_DTS_UHD_P2;
407 default:
408 ALOGW("Unknown fourcc: %c%c%c%c",
409 (fourcc >> 24) & 0xff,
410 (fourcc >> 16) & 0xff,
411 (fourcc >> 8) & 0xff,
412 fourcc & 0xff
413 );
414 return "application/octet-stream";
415 }
416 }
417
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)418 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
419 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
420 // AMR NB audio is always mono, 8kHz
421 *channels = 1;
422 *rate = 8000;
423 return true;
424 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
425 // AMR WB audio is always mono, 16kHz
426 *channels = 1;
427 *rate = 16000;
428 return true;
429 }
430 return false;
431 }
432
MPEG4Extractor(DataSourceHelper * source,const char * mime)433 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
434 : mMoofOffset(0),
435 mMoofFound(false),
436 mMdatFound(false),
437 mDataSource(source),
438 mInitCheck(NO_INIT),
439 mHeaderTimescale(0),
440 mIsQT(false),
441 mIsHeif(false),
442 mHasMoovBox(false),
443 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
444 mIsAvif(false),
445 mFirstTrack(NULL),
446 mLastTrack(NULL) {
447 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
448 mFileMetaData = AMediaFormat_new();
449 }
450
~MPEG4Extractor()451 MPEG4Extractor::~MPEG4Extractor() {
452 Track *track = mFirstTrack;
453 while (track) {
454 Track *next = track->next;
455
456 delete track;
457 track = next;
458 }
459 mFirstTrack = mLastTrack = NULL;
460
461 for (size_t i = 0; i < mPssh.size(); i++) {
462 delete [] mPssh[i].data;
463 }
464 mPssh.clear();
465
466 delete mDataSource;
467 AMediaFormat_delete(mFileMetaData);
468 }
469
flags() const470 uint32_t MPEG4Extractor::flags() const {
471 return CAN_PAUSE |
472 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
473 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
474 }
475
getMetaData(AMediaFormat * meta)476 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
477 status_t err;
478 if ((err = readMetaData()) != OK) {
479 return AMEDIA_ERROR_UNKNOWN;
480 }
481 AMediaFormat_copy(meta, mFileMetaData);
482 return AMEDIA_OK;
483 }
484
countTracks()485 size_t MPEG4Extractor::countTracks() {
486 status_t err;
487 if ((err = readMetaData()) != OK) {
488 ALOGV("MPEG4Extractor::countTracks: no tracks");
489 return 0;
490 }
491
492 size_t n = 0;
493 Track *track = mFirstTrack;
494 while (track) {
495 ++n;
496 track = track->next;
497 }
498
499 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
500 return n;
501 }
502
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)503 media_status_t MPEG4Extractor::getTrackMetaData(
504 AMediaFormat *meta,
505 size_t index, uint32_t flags) {
506 status_t err;
507 if ((err = readMetaData()) != OK) {
508 return AMEDIA_ERROR_UNKNOWN;
509 }
510
511 Track *track = mFirstTrack;
512 while (index > 0) {
513 if (track == NULL) {
514 return AMEDIA_ERROR_UNKNOWN;
515 }
516
517 track = track->next;
518 --index;
519 }
520
521 if (track == NULL) {
522 return AMEDIA_ERROR_UNKNOWN;
523 }
524
525 [this, &track] {
526 int64_t duration;
527 int32_t samplerate;
528 // Only for audio track.
529 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
530 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
531 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
532 // Elst has to be processed only the first time this function is called.
533 track->elst_needs_processing = false;
534
535 if (track->elst_segment_duration > INT64_MAX) {
536 return;
537 }
538 int64_t segment_duration = track->elst_segment_duration;
539 int64_t media_time = track->elst_media_time;
540 int64_t halfscale = track->timescale / 2;
541
542 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
543 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
544 segment_duration, media_time,
545 halfscale, mHeaderTimescale, track->timescale);
546
547 if ((uint32_t)samplerate != track->timescale){
548 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
549 samplerate);
550 }
551 // Both delay and paddingsamples have to be set inorder for either to be
552 // effective in the lower layers.
553 int64_t delay = 0;
554 if (media_time > 0) { // Gapless playback
555 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
556 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
557 __builtin_add_overflow(delay, halfscale, &delay) ||
558 (delay /= track->timescale, false) ||
559 delay > INT32_MAX ||
560 delay < INT32_MIN) {
561 ALOGW("ignoring edit list with bogus values");
562 return;
563 }
564 }
565 ALOGV("delay = %" PRId64, delay);
566 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
567
568 int64_t paddingsamples = 0;
569 if (segment_duration > 0) {
570 int64_t scaled_duration;
571 // scaled_duration = duration * mHeaderTimescale;
572 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
573 return;
574 }
575 ALOGV("scaled_duration = %" PRId64, scaled_duration);
576
577 int64_t segment_end;
578 int64_t padding;
579 int64_t segment_duration_e6;
580 int64_t media_time_scaled_e6;
581 int64_t media_time_scaled;
582 // padding = scaled_duration - ((segment_duration * 1000000) +
583 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
584 // segment_duration is based on timescale in movie header box(mdhd)
585 // media_time is based on timescale track header/media timescale
586 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
587 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
588 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
589 return;
590 }
591 media_time_scaled_e6 /= track->timescale;
592 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
593 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
594 return;
595 }
596 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
597 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
598 // might be slightly shorter than the segment duration, which would make the
599 // padding negative. Clamp to zero.
600 if (padding > 0) {
601 int64_t halfscale_mht = mHeaderTimescale / 2;
602 int64_t halfscale_e6;
603 int64_t timescale_e6;
604 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
605 // / (mHeaderTimescale * 1000000);
606 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
607 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
608 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
609 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
610 (paddingsamples /= timescale_e6, false) ||
611 paddingsamples > INT32_MAX) {
612 return;
613 }
614 }
615 }
616 ALOGV("paddingsamples = %" PRId64, paddingsamples);
617 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
618 }
619 }();
620
621 if ((flags & kIncludeExtensiveMetaData)
622 && !track->includes_expensive_metadata) {
623 track->includes_expensive_metadata = true;
624
625 const char *mime;
626 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
627 if (!strncasecmp("video/", mime, 6)) {
628 // MPEG2 tracks do not provide CSD, so read the stream header
629 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
630 off64_t offset;
631 size_t size;
632 if (track->sampleTable->getMetaDataForSample(
633 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
634 if (size > kMaxTrackHeaderSize) {
635 size = kMaxTrackHeaderSize;
636 }
637 uint8_t header[kMaxTrackHeaderSize];
638 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
639 AMediaFormat_setBuffer(track->meta,
640 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
641 }
642 }
643 }
644
645 if (mMoofOffset > 0) {
646 int64_t duration;
647 if (AMediaFormat_getInt64(track->meta,
648 AMEDIAFORMAT_KEY_DURATION, &duration)) {
649 // nothing fancy, just pick a frame near 1/4th of the duration
650 AMediaFormat_setInt64(track->meta,
651 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
652 }
653 } else {
654 uint32_t sampleIndex;
655 uint64_t sampleTime;
656 if (track->timescale != 0 &&
657 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
658 && track->sampleTable->getMetaDataForSample(
659 sampleIndex, NULL /* offset */, NULL /* size */,
660 &sampleTime) == OK) {
661 AMediaFormat_setInt64(track->meta,
662 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
663 ((int64_t)sampleTime * 1000000) / track->timescale);
664 }
665 }
666 }
667 }
668
669 return AMediaFormat_copy(meta, track->meta);
670 }
671
readMetaData()672 status_t MPEG4Extractor::readMetaData() {
673 if (mInitCheck != NO_INIT) {
674 return mInitCheck;
675 }
676
677 off64_t offset = 0;
678 status_t err;
679 bool sawMoovOrSidx = false;
680
681 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
682 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
683 (mItemTable != NULL) && mItemTable->isValid()))) {
684 off64_t orig_offset = offset;
685 err = parseChunk(&offset, 0);
686
687 if (err != OK && err != UNKNOWN_ERROR) {
688 break;
689 } else if (offset <= orig_offset) {
690 // only continue parsing if the offset was advanced,
691 // otherwise we might end up in an infinite loop
692 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
693 err = ERROR_MALFORMED;
694 break;
695 } else if (err == UNKNOWN_ERROR) {
696 sawMoovOrSidx = true;
697 }
698 }
699
700 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
701 off64_t exifOffset;
702 size_t exifSize;
703 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
704 AMediaFormat_setInt64(mFileMetaData,
705 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
706 AMediaFormat_setInt64(mFileMetaData,
707 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
708 }
709 off64_t xmpOffset;
710 size_t xmpSize;
711 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
712 // TODO(chz): b/175717339
713 // Use a hard-coded string here instead of named keys. The keys are available
714 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
715 // of 29. This hard-coded string can be replaced with the named constant once
716 // the mp4 extractor is built against API 31+.
717 AMediaFormat_setInt64(mFileMetaData,
718 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
719 AMediaFormat_setInt64(mFileMetaData,
720 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
721 }
722 for (uint32_t imageIndex = 0;
723 imageIndex < mItemTable->countImages(); imageIndex++) {
724 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
725 if (meta == NULL) {
726 ALOGE("heif image %u has no meta!", imageIndex);
727 continue;
728 }
729 // Some heif files advertise image sequence brands (eg. 'hevc') in
730 // ftyp box, but don't have any valid tracks in them. Instead of
731 // reporting the entire file as malformed, we override the error
732 // to allow still images to be extracted.
733 if (err != OK) {
734 ALOGW("Extracting still images only");
735 err = OK;
736 }
737 mInitCheck = OK;
738
739 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
740 Track *track = new Track;
741 if (mLastTrack != NULL) {
742 mLastTrack->next = track;
743 } else {
744 mFirstTrack = track;
745 }
746 mLastTrack = track;
747
748 track->meta = meta;
749 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
750 track->timescale = 1000000;
751 }
752 }
753
754 if (mInitCheck == OK) {
755 if (findTrackByMimePrefix("video/") != NULL) {
756 AMediaFormat_setString(mFileMetaData,
757 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
758 } else if (findTrackByMimePrefix("audio/") != NULL) {
759 AMediaFormat_setString(mFileMetaData,
760 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
761 } else if (findTrackByMimePrefix(
762 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
763 AMediaFormat_setString(mFileMetaData,
764 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
765 } else if (findTrackByMimePrefix(
766 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
767 AMediaFormat_setString(mFileMetaData,
768 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
769 } else {
770 AMediaFormat_setString(mFileMetaData,
771 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
772 }
773 } else {
774 mInitCheck = err;
775 }
776
777 CHECK_NE(err, (status_t)NO_INIT);
778
779 // copy pssh data into file metadata
780 uint64_t psshsize = 0;
781 for (size_t i = 0; i < mPssh.size(); i++) {
782 psshsize += 20 + mPssh[i].datalen;
783 }
784 if (psshsize > 0 && psshsize <= UINT32_MAX) {
785 char *buf = (char*)malloc(psshsize);
786 if (!buf) {
787 ALOGE("b/28471206");
788 return NO_MEMORY;
789 }
790 char *ptr = buf;
791 for (size_t i = 0; i < mPssh.size(); i++) {
792 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
793 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
794 ptr += (20 + mPssh[i].datalen);
795 }
796 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
797 free(buf);
798 }
799
800 return mInitCheck;
801 }
802
803 struct PathAdder {
PathAdderandroid::PathAdder804 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
805 : mPath(path) {
806 mPath->push(chunkType);
807 }
808
~PathAdderandroid::PathAdder809 ~PathAdder() {
810 mPath->pop();
811 }
812
813 private:
814 Vector<uint32_t> *mPath;
815
816 PathAdder(const PathAdder &);
817 PathAdder &operator=(const PathAdder &);
818 };
819
underMetaDataPath(const Vector<uint32_t> & path)820 static bool underMetaDataPath(const Vector<uint32_t> &path) {
821 return path.size() >= 5
822 && path[0] == FOURCC("moov")
823 && path[1] == FOURCC("udta")
824 && path[2] == FOURCC("meta")
825 && path[3] == FOURCC("ilst");
826 }
827
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)828 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
829 return path.size() >= 2
830 && path[0] == FOURCC("moov")
831 && path[1] == FOURCC("meta")
832 && (depth == 2
833 || (depth == 3
834 && (path[2] == FOURCC("hdlr")
835 || path[2] == FOURCC("ilst")
836 || path[2] == FOURCC("keys"))));
837 }
838
839 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)840 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
841 // delta between mpeg4 time and unix epoch time
842 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
843 if (time_1904 < INT64_MIN + delta) {
844 return false;
845 }
846 time_t time_1970 = time_1904 - delta;
847
848 char tmp[32];
849 struct tm* tm = gmtime(&time_1970);
850 if (tm != NULL &&
851 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
852 *s = tmp;
853 return true;
854 }
855 return false;
856 }
857
parseChunk(off64_t * offset,int depth)858 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
859 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
860
861 if (*offset < 0) {
862 ALOGE("b/23540914");
863 return ERROR_MALFORMED;
864 }
865 if (depth > 100) {
866 ALOGE("b/27456299");
867 return ERROR_MALFORMED;
868 }
869 uint32_t hdr[2];
870 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
871 return ERROR_IO;
872 }
873 uint64_t chunk_size = ntohl(hdr[0]);
874 int32_t chunk_type = ntohl(hdr[1]);
875 off64_t data_offset = *offset + 8;
876
877 if (chunk_size == 1) {
878 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
879 return ERROR_IO;
880 }
881 chunk_size = ntoh64(chunk_size);
882 data_offset += 8;
883
884 if (chunk_size < 16) {
885 // The smallest valid chunk is 16 bytes long in this case.
886 return ERROR_MALFORMED;
887 }
888 } else if (chunk_size == 0) {
889 if (depth == 0) {
890 // atom extends to end of file
891 off64_t sourceSize;
892 if (mDataSource->getSize(&sourceSize) == OK) {
893 chunk_size = (sourceSize - *offset);
894 } else {
895 // XXX could we just pick a "sufficiently large" value here?
896 ALOGE("atom size is 0, and data source has no size");
897 return ERROR_MALFORMED;
898 }
899 } else {
900 // not allowed for non-toplevel atoms, skip it
901 *offset += 4;
902 return OK;
903 }
904 } else if (chunk_size < 8) {
905 // The smallest valid chunk is 8 bytes long.
906 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
907 return ERROR_MALFORMED;
908 }
909
910 char chunk[5];
911 MakeFourCCString(chunk_type, chunk);
912 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
913
914 if (kUseHexDump) {
915 static const char kWhitespace[] = " ";
916 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
917 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
918
919 char buffer[256];
920 size_t n = chunk_size;
921 if (n > sizeof(buffer)) {
922 n = sizeof(buffer);
923 }
924 if (mDataSource->readAt(*offset, buffer, n)
925 < (ssize_t)n) {
926 return ERROR_IO;
927 }
928
929 hexdump(buffer, n);
930 }
931
932 PathAdder autoAdder(&mPath, chunk_type);
933
934 // (data_offset - *offset) is either 8 or 16
935 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
936 if (chunk_data_size < 0) {
937 ALOGE("b/23540914");
938 return ERROR_MALFORMED;
939 }
940 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
941 char errMsg[100];
942 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
943 ALOGE("%s (b/28615448)", errMsg);
944 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
945 return ERROR_MALFORMED;
946 }
947
948 if (chunk_type != FOURCC("cprt")
949 && chunk_type != FOURCC("covr")
950 && mPath.size() == 5 && underMetaDataPath(mPath)) {
951 off64_t stop_offset = *offset + chunk_size;
952 *offset = data_offset;
953 while (*offset < stop_offset) {
954 status_t err = parseChunk(offset, depth + 1);
955 if (err != OK) {
956 return err;
957 }
958 }
959
960 if (*offset != stop_offset) {
961 return ERROR_MALFORMED;
962 }
963
964 return OK;
965 }
966
967 switch(chunk_type) {
968 case FOURCC("moov"):
969 case FOURCC("trak"):
970 case FOURCC("mdia"):
971 case FOURCC("minf"):
972 case FOURCC("dinf"):
973 case FOURCC("stbl"):
974 case FOURCC("mvex"):
975 case FOURCC("moof"):
976 case FOURCC("traf"):
977 case FOURCC("mfra"):
978 case FOURCC("udta"):
979 case FOURCC("ilst"):
980 case FOURCC("sinf"):
981 case FOURCC("schi"):
982 case FOURCC("edts"):
983 case FOURCC("wave"):
984 {
985 if (chunk_type == FOURCC("moov") && depth != 0) {
986 ALOGE("moov: depth %d", depth);
987 return ERROR_MALFORMED;
988 }
989
990 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
991 ALOGE("duplicate moov");
992 return ERROR_MALFORMED;
993 }
994
995 if (chunk_type == FOURCC("moof") && !mMoofFound) {
996 // store the offset of the first segment
997 mMoofFound = true;
998 mMoofOffset = *offset;
999 }
1000
1001 if (chunk_type == FOURCC("stbl")) {
1002 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
1003
1004 if (mDataSource->flags()
1005 & (DataSourceBase::kWantsPrefetching
1006 | DataSourceBase::kIsCachingDataSource)) {
1007 CachedRangedDataSource *cachedSource =
1008 new CachedRangedDataSource(mDataSource);
1009
1010 if (cachedSource->setCachedRange(
1011 *offset, chunk_size,
1012 true /* assume ownership on success */) == OK) {
1013 mDataSource = cachedSource;
1014 } else {
1015 delete cachedSource;
1016 }
1017 }
1018
1019 if (mLastTrack == NULL) {
1020 return ERROR_MALFORMED;
1021 }
1022
1023 mLastTrack->sampleTable = new SampleTable(mDataSource);
1024 }
1025
1026 bool isTrack = false;
1027 if (chunk_type == FOURCC("trak")) {
1028 if (depth != 1) {
1029 ALOGE("trak: depth %d", depth);
1030 return ERROR_MALFORMED;
1031 }
1032 isTrack = true;
1033
1034 ALOGV("adding new track");
1035 Track *track = new Track;
1036 if (mLastTrack) {
1037 mLastTrack->next = track;
1038 } else {
1039 mFirstTrack = track;
1040 }
1041 mLastTrack = track;
1042
1043 track->meta = AMediaFormat_new();
1044 AMediaFormat_setString(track->meta,
1045 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1046 }
1047
1048 off64_t stop_offset = *offset + chunk_size;
1049 *offset = data_offset;
1050 while (*offset < stop_offset) {
1051
1052 // pass udata terminate
1053 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1054 // handle the case that udta terminates with terminate code x00000000
1055 // note that 0 terminator is optional and we just handle this case.
1056 uint32_t terminate_code = 1;
1057 mDataSource->readAt(*offset, &terminate_code, 4);
1058 if (0 == terminate_code) {
1059 *offset += 4;
1060 ALOGD("Terminal code for udta");
1061 continue;
1062 } else {
1063 ALOGW("invalid udta Terminal code");
1064 }
1065 }
1066
1067 status_t err = parseChunk(offset, depth + 1);
1068 if (err != OK) {
1069 if (isTrack) {
1070 mLastTrack->skipTrack = true;
1071 break;
1072 }
1073 return err;
1074 }
1075 }
1076
1077 if (*offset != stop_offset) {
1078 return ERROR_MALFORMED;
1079 }
1080
1081 if (isTrack) {
1082 int32_t trackId;
1083 // There must be exactly one track header per track.
1084
1085 if (!AMediaFormat_getInt32(mLastTrack->meta,
1086 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1087 mLastTrack->skipTrack = true;
1088 }
1089
1090 status_t err = verifyTrack(mLastTrack);
1091 if (err != OK) {
1092 mLastTrack->skipTrack = true;
1093 }
1094
1095
1096 if (mLastTrack->skipTrack) {
1097 ALOGV("skipping this track...");
1098 Track *cur = mFirstTrack;
1099
1100 if (cur == mLastTrack) {
1101 delete cur;
1102 mFirstTrack = mLastTrack = NULL;
1103 } else {
1104 while (cur && cur->next != mLastTrack) {
1105 cur = cur->next;
1106 }
1107 if (cur) {
1108 cur->next = NULL;
1109 }
1110 delete mLastTrack;
1111 mLastTrack = cur;
1112 }
1113
1114 return OK;
1115 }
1116
1117 // place things we built elsewhere into their final locations
1118
1119 // put aggregated tx3g data into the metadata
1120 if (mLastTrack->mTx3gFilled > 0) {
1121 ALOGV("Putting %zu bytes of tx3g data into meta data",
1122 mLastTrack->mTx3gFilled);
1123 AMediaFormat_setBuffer(mLastTrack->meta,
1124 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1125 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1126 // drop it now to reduce our footprint
1127 free(mLastTrack->mTx3gBuffer);
1128 mLastTrack->mTx3gBuffer = NULL;
1129 mLastTrack->mTx3gFilled = 0;
1130 mLastTrack->mTx3gSize = 0;
1131 }
1132
1133 const char *mime;
1134 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1135
1136 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1137 void *data;
1138 size_t size;
1139
1140 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1141 &data, &size)
1142 && size >= 5) {
1143 const uint8_t *ptr = (const uint8_t *)data;
1144 const uint8_t profile = ptr[2] >> 1;
1145 const uint8_t blCompatibilityId = (ptr[4]) >> 4;
1146 bool create_two_tracks = false;
1147
1148 if (blCompatibilityId && blCompatibilityId != 15) {
1149 create_two_tracks = true;
1150 }
1151
1152 if (4 == profile || 7 == profile ||
1153 (profile >= 8 && profile < 11 && create_two_tracks)) {
1154 // we need a backward compatible track
1155 ALOGV("Adding new backward compatible track");
1156 Track *track_b = new Track;
1157
1158 track_b->timescale = mLastTrack->timescale;
1159 track_b->sampleTable = mLastTrack->sampleTable;
1160 track_b->includes_expensive_metadata =
1161 mLastTrack->includes_expensive_metadata;
1162 track_b->skipTrack = mLastTrack->skipTrack;
1163 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1164 track_b->elst_media_time = mLastTrack->elst_media_time;
1165 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1166 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1167 track_b->elst_initial_empty_edit_ticks =
1168 mLastTrack->elst_initial_empty_edit_ticks;
1169 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1170
1171 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1172 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1173 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1174
1175 track_b->meta = AMediaFormat_new();
1176 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1177
1178 mLastTrack->next = track_b;
1179 track_b->next = NULL;
1180
1181 // we want to remove the csd-2 key from the metadata, but
1182 // don't have an AMediaFormat_* function to do so. Settle
1183 // for replacing this csd-2 with an empty csd-2.
1184 uint8_t emptybuffer[8] = {};
1185 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1186 emptybuffer, 0);
1187
1188 if (4 == profile || 7 == profile || 8 == profile ) {
1189 AMediaFormat_setString(track_b->meta,
1190 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1191 } else if (9 == profile) {
1192 AMediaFormat_setString(track_b->meta,
1193 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1194 } else if (10 == profile) {
1195 AMediaFormat_setString(track_b->meta,
1196 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1197 } // Should never get to else part
1198
1199 mLastTrack = track_b;
1200 }
1201 }
1202 }
1203 } else if (chunk_type == FOURCC("moov")) {
1204 mInitCheck = OK;
1205
1206 return UNKNOWN_ERROR; // Return a generic error.
1207 }
1208 break;
1209 }
1210
1211 case FOURCC("schm"):
1212 {
1213
1214 *offset += chunk_size;
1215 if (!mLastTrack) {
1216 return ERROR_MALFORMED;
1217 }
1218
1219 uint32_t scheme_type;
1220 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1221 return ERROR_IO;
1222 }
1223 scheme_type = ntohl(scheme_type);
1224 int32_t mode = kCryptoModeUnencrypted;
1225 switch(scheme_type) {
1226 case FOURCC("cbc1"):
1227 {
1228 mode = kCryptoModeAesCbc;
1229 break;
1230 }
1231 case FOURCC("cbcs"):
1232 {
1233 mode = kCryptoModeAesCbc;
1234 mLastTrack->subsample_encryption = true;
1235 break;
1236 }
1237 case FOURCC("cenc"):
1238 {
1239 mode = kCryptoModeAesCtr;
1240 break;
1241 }
1242 case FOURCC("cens"):
1243 {
1244 mode = kCryptoModeAesCtr;
1245 mLastTrack->subsample_encryption = true;
1246 break;
1247 }
1248 }
1249 if (mode != kCryptoModeUnencrypted) {
1250 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1251 }
1252 break;
1253 }
1254
1255
1256 case FOURCC("elst"):
1257 {
1258 *offset += chunk_size;
1259
1260 if (!mLastTrack) {
1261 return ERROR_MALFORMED;
1262 }
1263
1264 // See 14496-12 8.6.6
1265 uint8_t version;
1266 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1267 return ERROR_IO;
1268 }
1269
1270 uint32_t entry_count;
1271 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1272 return ERROR_IO;
1273 }
1274
1275 if (entry_count > 2) {
1276 /* We support a single entry for gapless playback or negating offset for
1277 * reordering B frames, two entries (empty edit) for start offset at the moment.
1278 */
1279 ALOGW("ignoring edit list with %d entries", entry_count);
1280 } else {
1281 off64_t entriesoffset = data_offset + 8;
1282 uint64_t segment_duration;
1283 int64_t media_time;
1284 bool empty_edit_present = false;
1285 for (int i = 0; i < entry_count; ++i) {
1286 switch (version) {
1287 case 0: {
1288 uint32_t sd;
1289 int32_t mt;
1290 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1291 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1292 return ERROR_IO;
1293 }
1294 segment_duration = sd;
1295 media_time = mt;
1296 // 4(segment duration) + 4(media time) + 4(media rate)
1297 entriesoffset += 12;
1298 break;
1299 }
1300 case 1: {
1301 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1302 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1303 return ERROR_IO;
1304 }
1305 // 8(segment duration) + 8(media time) + 4(media rate)
1306 entriesoffset += 20;
1307 break;
1308 }
1309 default:
1310 return ERROR_IO;
1311 break;
1312 }
1313 // Empty edit entry would have to be first entry.
1314 if (media_time == -1 && i == 0) {
1315 empty_edit_present = true;
1316 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1317 /* In movie header timescale, and needs to be converted to media timescale
1318 * after we get that from a track's 'mdhd' atom,
1319 * which at times come after 'elst'.
1320 */
1321 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1322 } else if (media_time >= 0 && i == 0) {
1323 ALOGV("first edit list entry - from gapless playback files");
1324 mLastTrack->elst_media_time = media_time;
1325 mLastTrack->elst_segment_duration = segment_duration;
1326 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1327 segment_duration, media_time);
1328 // media_time is in media timescale as are STTS/CTTS entries.
1329 mLastTrack->elst_shift_start_ticks = media_time;
1330 } else if (empty_edit_present && i == 1) {
1331 // Process second entry only when the first entry was an empty edit entry.
1332 ALOGV("second edit list entry");
1333 mLastTrack->elst_shift_start_ticks = media_time;
1334 } else {
1335 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1336 }
1337 }
1338 // save these for later, because the elst atom might precede
1339 // the atoms that actually gives us the duration and sample rate
1340 // needed to calculate the padding and delay values
1341 mLastTrack->elst_needs_processing = true;
1342 }
1343 break;
1344 }
1345
1346 case FOURCC("frma"):
1347 {
1348 *offset += chunk_size;
1349
1350 uint32_t original_fourcc;
1351 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1352 return ERROR_IO;
1353 }
1354 original_fourcc = ntohl(original_fourcc);
1355 ALOGV("read original format: %d", original_fourcc);
1356
1357 if (mLastTrack == NULL) {
1358 return ERROR_MALFORMED;
1359 }
1360
1361 AMediaFormat_setString(mLastTrack->meta,
1362 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1363 uint32_t num_channels = 0;
1364 uint32_t sample_rate = 0;
1365 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1366 AMediaFormat_setInt32(mLastTrack->meta,
1367 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1368 AMediaFormat_setInt32(mLastTrack->meta,
1369 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1370 }
1371
1372 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1373 off64_t tmpOffset = *offset;
1374 status_t err = parseALACSampleEntry(&tmpOffset);
1375 if (err != OK) {
1376 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1377 return err;
1378 }
1379 *offset = tmpOffset + 8;
1380 }
1381
1382 break;
1383 }
1384
1385 case FOURCC("tenc"):
1386 {
1387 *offset += chunk_size;
1388
1389 if (chunk_size < 32) {
1390 return ERROR_MALFORMED;
1391 }
1392
1393 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1394 // default IV size, 16 bytes default KeyID
1395 // (ISO 23001-7)
1396
1397 uint8_t version;
1398 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1399 < (ssize_t)sizeof(version)) {
1400 return ERROR_IO;
1401 }
1402
1403 uint8_t buf[4];
1404 memset(buf, 0, 4);
1405 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1406 return ERROR_IO;
1407 }
1408
1409 if (mLastTrack == NULL) {
1410 return ERROR_MALFORMED;
1411 }
1412
1413 uint8_t defaultEncryptedByteBlock = 0;
1414 uint8_t defaultSkipByteBlock = 0;
1415 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1416 if (version == 1) {
1417 uint32_t pattern = buf[2];
1418 defaultEncryptedByteBlock = pattern >> 4;
1419 defaultSkipByteBlock = pattern & 0xf;
1420 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1421 // use (1,0) to mean "encrypt everything"
1422 defaultEncryptedByteBlock = 1;
1423 }
1424 } else if (mLastTrack->subsample_encryption) {
1425 ALOGW("subsample_encryption should be version 1");
1426 } else if (defaultAlgorithmId > 1) {
1427 // only 0 (clear) and 1 (AES-128) are valid
1428 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1429 defaultAlgorithmId = 1;
1430 }
1431
1432 memset(buf, 0, 4);
1433 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1434 return ERROR_IO;
1435 }
1436 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1437
1438 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1439 // only unencrypted data must have 0 IV size
1440 return ERROR_MALFORMED;
1441 } else if (defaultIVSize != 0 &&
1442 defaultIVSize != 8 &&
1443 defaultIVSize != 16) {
1444 return ERROR_MALFORMED;
1445 }
1446
1447 uint8_t defaultKeyId[16];
1448
1449 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1450 return ERROR_IO;
1451 }
1452
1453 sp<ABuffer> defaultConstantIv;
1454 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1455
1456 uint8_t ivlength;
1457 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1458 < (ssize_t)sizeof(ivlength)) {
1459 return ERROR_IO;
1460 }
1461
1462 if (ivlength != 8 && ivlength != 16) {
1463 ALOGW("unsupported IV length: %u", ivlength);
1464 return ERROR_MALFORMED;
1465 }
1466
1467 defaultConstantIv = new ABuffer(ivlength);
1468 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1469 < (ssize_t)ivlength) {
1470 return ERROR_IO;
1471 }
1472
1473 defaultConstantIv->setRange(0, ivlength);
1474 }
1475
1476 int32_t tmpAlgorithmId;
1477 if (!AMediaFormat_getInt32(mLastTrack->meta,
1478 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1479 AMediaFormat_setInt32(mLastTrack->meta,
1480 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1481 }
1482
1483 AMediaFormat_setInt32(mLastTrack->meta,
1484 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1485 AMediaFormat_setBuffer(mLastTrack->meta,
1486 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1487 AMediaFormat_setInt32(mLastTrack->meta,
1488 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1489 AMediaFormat_setInt32(mLastTrack->meta,
1490 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1491 if (defaultConstantIv != NULL) {
1492 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1493 defaultConstantIv->data(), defaultConstantIv->size());
1494 }
1495 break;
1496 }
1497
1498 case FOURCC("tkhd"):
1499 {
1500 *offset += chunk_size;
1501
1502 status_t err;
1503 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1504 return err;
1505 }
1506
1507 break;
1508 }
1509
1510 case FOURCC("tref"):
1511 {
1512 off64_t stop_offset = *offset + chunk_size;
1513 *offset = data_offset;
1514 while (*offset < stop_offset) {
1515 status_t err = parseChunk(offset, depth + 1);
1516 if (err != OK) {
1517 return err;
1518 }
1519 }
1520 if (*offset != stop_offset) {
1521 return ERROR_MALFORMED;
1522 }
1523 break;
1524 }
1525
1526 case FOURCC("thmb"):
1527 {
1528 *offset += chunk_size;
1529
1530 if (mLastTrack != NULL) {
1531 // Skip thumbnail track for now since we don't have an
1532 // API to retrieve it yet.
1533 // The thumbnail track can't be accessed by negative index or time,
1534 // because each timed sample has its own corresponding thumbnail
1535 // in the thumbnail track. We'll need a dedicated API to retrieve
1536 // thumbnail at time instead.
1537 mLastTrack->skipTrack = true;
1538 }
1539
1540 break;
1541 }
1542
1543 case FOURCC("pssh"):
1544 {
1545 *offset += chunk_size;
1546
1547 PsshInfo pssh;
1548
1549 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1550 return ERROR_IO;
1551 }
1552
1553 uint32_t psshdatalen = 0;
1554 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1555 return ERROR_IO;
1556 }
1557 pssh.datalen = ntohl(psshdatalen);
1558 ALOGV("pssh data size: %d", pssh.datalen);
1559 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1560 // pssh data length exceeds size of containing box
1561 return ERROR_MALFORMED;
1562 }
1563
1564 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1565 if (pssh.data == NULL) {
1566 return ERROR_MALFORMED;
1567 }
1568 ALOGV("allocated pssh @ %p", pssh.data);
1569 ssize_t requested = (ssize_t) pssh.datalen;
1570 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1571 delete[] pssh.data;
1572 return ERROR_IO;
1573 }
1574 mPssh.push_back(pssh);
1575
1576 break;
1577 }
1578
1579 case FOURCC("mdhd"):
1580 {
1581 *offset += chunk_size;
1582
1583 if (chunk_data_size < 4 || mLastTrack == NULL) {
1584 return ERROR_MALFORMED;
1585 }
1586
1587 uint8_t version;
1588 if (mDataSource->readAt(
1589 data_offset, &version, sizeof(version))
1590 < (ssize_t)sizeof(version)) {
1591 return ERROR_IO;
1592 }
1593
1594 off64_t timescale_offset;
1595
1596 if (version == 1) {
1597 timescale_offset = data_offset + 4 + 16;
1598 } else if (version == 0) {
1599 timescale_offset = data_offset + 4 + 8;
1600 } else {
1601 return ERROR_IO;
1602 }
1603
1604 uint32_t timescale;
1605 if (mDataSource->readAt(
1606 timescale_offset, ×cale, sizeof(timescale))
1607 < (ssize_t)sizeof(timescale)) {
1608 return ERROR_IO;
1609 }
1610
1611 if (!timescale) {
1612 ALOGE("timescale should not be ZERO.");
1613 return ERROR_MALFORMED;
1614 }
1615
1616 mLastTrack->timescale = ntohl(timescale);
1617
1618 // 14496-12 says all ones means indeterminate, but some files seem to use
1619 // 0 instead. We treat both the same.
1620 int64_t duration = 0;
1621 if (version == 1) {
1622 if (mDataSource->readAt(
1623 timescale_offset + 4, &duration, sizeof(duration))
1624 < (ssize_t)sizeof(duration)) {
1625 return ERROR_IO;
1626 }
1627 if (duration != -1) {
1628 duration = ntoh64(duration);
1629 }
1630 } else {
1631 uint32_t duration32;
1632 if (mDataSource->readAt(
1633 timescale_offset + 4, &duration32, sizeof(duration32))
1634 < (ssize_t)sizeof(duration32)) {
1635 return ERROR_IO;
1636 }
1637 if (duration32 != 0xffffffff) {
1638 duration = ntohl(duration32);
1639 }
1640 }
1641 if (duration != 0 && mLastTrack->timescale != 0) {
1642 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1643 if (durationUs < 0 || durationUs > INT64_MAX) {
1644 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1645 (long long) duration, (long long) mLastTrack->timescale);
1646 return ERROR_MALFORMED;
1647 }
1648 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1649 }
1650
1651 uint8_t lang[2];
1652 off64_t lang_offset;
1653 if (version == 1) {
1654 lang_offset = timescale_offset + 4 + 8;
1655 } else if (version == 0) {
1656 lang_offset = timescale_offset + 4 + 4;
1657 } else {
1658 return ERROR_IO;
1659 }
1660
1661 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1662 < (ssize_t)sizeof(lang)) {
1663 return ERROR_IO;
1664 }
1665
1666 // To get the ISO-639-2/T three character language code
1667 // 1 bit pad followed by 3 5-bits characters. Each character
1668 // is packed as the difference between its ASCII value and 0x60.
1669 char lang_code[4];
1670 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1671 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1672 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1673 lang_code[3] = '\0';
1674
1675 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1676
1677 break;
1678 }
1679
1680 case FOURCC("stsd"):
1681 {
1682 uint8_t buffer[8];
1683 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1684 return ERROR_MALFORMED;
1685 }
1686
1687 if (mDataSource->readAt(
1688 data_offset, buffer, 8) < 8) {
1689 return ERROR_IO;
1690 }
1691
1692 if (U32_AT(buffer) != 0) {
1693 // Should be version 0, flags 0.
1694 return ERROR_MALFORMED;
1695 }
1696
1697 uint32_t entry_count = U32_AT(&buffer[4]);
1698
1699 if (entry_count > 1) {
1700 // For 3GPP timed text, there could be multiple tx3g boxes contain
1701 // multiple text display formats. These formats will be used to
1702 // display the timed text.
1703 // For encrypted files, there may also be more than one entry.
1704 const char *mime;
1705
1706 if (mLastTrack == NULL)
1707 return ERROR_MALFORMED;
1708
1709 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1710 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1711 strcasecmp(mime, "application/octet-stream")) {
1712 // For now we only support a single type of media per track.
1713 mLastTrack->skipTrack = true;
1714 *offset += chunk_size;
1715 break;
1716 }
1717 }
1718 off64_t stop_offset = *offset + chunk_size;
1719 *offset = data_offset + 8;
1720 for (uint32_t i = 0; i < entry_count; ++i) {
1721 status_t err = parseChunk(offset, depth + 1);
1722 if (err != OK) {
1723 return err;
1724 }
1725 }
1726
1727 if (*offset != stop_offset) {
1728 return ERROR_MALFORMED;
1729 }
1730 break;
1731 }
1732 case FOURCC("mett"):
1733 {
1734 *offset += chunk_size;
1735
1736 // the absolute minimum size of a compliant mett box is 11 bytes:
1737 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1738 // The resulting mime_format would be invalid at that size though.
1739 if (mLastTrack == NULL || chunk_data_size < 11) {
1740 return ERROR_MALFORMED;
1741 }
1742
1743 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1744 if (buffer.get() == NULL) {
1745 return NO_MEMORY;
1746 }
1747
1748 if (mDataSource->readAt(
1749 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1750 return ERROR_IO;
1751 }
1752
1753 // ISO-14496-12:
1754 // int8 reserved[6]; // should be all zeroes
1755 // int16_t data_reference_index;
1756 // char content_encoding[]; // null terminated, optional (= just the null byte)
1757 // char mime_format[]; // null terminated, mandatory
1758 // optional other boxes
1759 //
1760 // API < 29:
1761 // char mime_format[]; // null terminated
1762 //
1763 // API >= 29
1764 // char mime_format[]; // null terminated
1765 // char mime_format[]; // null terminated
1766
1767 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1768 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1769 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1770 // make it somewhat compatible with the standard. The workaround is to write the
1771 // null-terminated mime_format string twice. This allows compliant parsers to
1772 // read the missing reserved, data_reference_index, and content_encoding fields
1773 // from the first mime_type string. The actual mime_format field would then be
1774 // read correctly from the second string. The non-compliant Android frameworks
1775 // from API 28 and earlier would still be able to read the mime_format correctly
1776 // as it would only read the first null-terminated mime_format string. To enable
1777 // reading metadata tracks generated from both the non-compliant and compliant
1778 // formats, a check needs to be done to see which format is used.
1779 const char *str = (const char*) buffer.get();
1780 size_t string_length = strnlen(str, chunk_data_size);
1781
1782 if (string_length == chunk_data_size - 1) {
1783 // This is likely a pre API 29 file, since it's a single null terminated
1784 // string filling the entire box.
1785 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1786 } else {
1787 // This might be a fully compliant metadata track, a "double mime" compatibility
1788 // track, or anything else, including a single non-terminated string, so we need
1789 // to determine the length of each string we want to parse out of the box.
1790 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1791 if (encoding_length + 8 >= chunk_data_size - 2) {
1792 // the encoding extends to the end of the box, so there's no mime_format
1793 return ERROR_MALFORMED;
1794 }
1795 String8 contentEncoding(str + 8, encoding_length);
1796 String8 mimeFormat(str + 8 + encoding_length + 1,
1797 chunk_data_size - 8 - encoding_length - 1);
1798 AMediaFormat_setString(mLastTrack->meta,
1799 AMEDIAFORMAT_KEY_MIME, mimeFormat.c_str());
1800 }
1801 break;
1802 }
1803
1804 case FOURCC("mp4a"):
1805 case FOURCC("enca"):
1806 case FOURCC("samr"):
1807 case FOURCC("sawb"):
1808 case FOURCC("Opus"):
1809 case FOURCC("twos"):
1810 case FOURCC("sowt"):
1811 case FOURCC("alac"):
1812 case FOURCC("fLaC"):
1813 case FOURCC(".mp3"):
1814 case 0x6D730055: // "ms U" mp3 audio
1815 case FOURCC("mha1"):
1816 case FOURCC("mhm1"):
1817 case FOURCC("dtsc"):
1818 case FOURCC("dtse"):
1819 case FOURCC("dtsh"):
1820 case FOURCC("dtsl"):
1821 case FOURCC("dtsx"):
1822 {
1823 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1824
1825 if (chunk_type == FOURCC("alac")) {
1826 off64_t offsetTmp = *offset;
1827 status_t err = parseALACSampleEntry(&offsetTmp);
1828 if (err != OK) {
1829 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1830 return err;
1831 }
1832 }
1833
1834 // Ignore all atoms embedded in QT wave atom
1835 ALOGV("Ignore all atoms embedded in QT wave atom");
1836 *offset += chunk_size;
1837 break;
1838 }
1839
1840 uint8_t buffer[8 + 20];
1841 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1842 // Basic AudioSampleEntry size.
1843 return ERROR_MALFORMED;
1844 }
1845
1846 if (mDataSource->readAt(
1847 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1848 return ERROR_IO;
1849 }
1850
1851 // we can get data_ref_index value from U16_AT(&buffer[6])
1852 uint16_t version = U16_AT(&buffer[8]);
1853 uint32_t num_channels = U16_AT(&buffer[16]);
1854
1855 uint16_t sample_size = U16_AT(&buffer[18]);
1856 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1857
1858 if (mLastTrack == NULL)
1859 return ERROR_MALFORMED;
1860
1861 off64_t stop_offset = *offset + chunk_size;
1862 *offset = data_offset + sizeof(buffer);
1863
1864 if (mIsQT) {
1865 if (version == 1) {
1866 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1867 return ERROR_IO;
1868 }
1869
1870 #if 0
1871 U32_AT(buffer); // samples per packet
1872 U32_AT(&buffer[4]); // bytes per packet
1873 U32_AT(&buffer[8]); // bytes per frame
1874 U32_AT(&buffer[12]); // bytes per sample
1875 #endif
1876 *offset += 16;
1877 } else if (version == 2) {
1878 uint8_t v2buffer[36];
1879 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1880 return ERROR_IO;
1881 }
1882
1883 #if 0
1884 U32_AT(v2buffer); // size of struct only
1885 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1886 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1887 U32_AT(&v2buffer[16]); // always 0x7f000000
1888 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1889 U32_AT(&v2buffer[24]); // format specifc flags
1890 U32_AT(&v2buffer[28]); // const bytes per audio packet
1891 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1892 #endif
1893 *offset += 36;
1894 }
1895 }
1896
1897 if (chunk_type != FOURCC("enca")) {
1898 // if the chunk type is enca, we'll get the type from the frma box later
1899 AMediaFormat_setString(mLastTrack->meta,
1900 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1901 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1902
1903 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1904 AMediaFormat_setInt32(mLastTrack->meta,
1905 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1906 if (chunk_type == FOURCC("twos")) {
1907 AMediaFormat_setInt32(mLastTrack->meta,
1908 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1909 }
1910 }
1911 }
1912 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1913 chunk, num_channels, sample_size, sample_rate);
1914 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1915 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1916
1917 if (chunk_type == FOURCC("Opus")) {
1918 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1919 data_offset += sizeof(buffer);
1920 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1921
1922 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1923 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1924 return ERROR_MALFORMED;
1925 }
1926 // Read Opus Header
1927 if (mDataSource->readAt(
1928 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1929 return ERROR_IO;
1930 }
1931
1932 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1933 // http://wiki.xiph.org/OggOpus#ID_Header
1934 strncpy((char *)opusInfo, "OpusHead", 8);
1935
1936 // Version shall be 0 as per mp4 Opus Specific Box
1937 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1938 if (opusInfo[8]) {
1939 return ERROR_MALFORMED;
1940 }
1941 // Force version to 1 as per OpusHead definition
1942 // (http://wiki.xiph.org/OggOpus#ID_Header)
1943 opusInfo[8] = 1;
1944
1945 // Read Opus Specific Box values
1946 size_t opusOffset = 10;
1947 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1948 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1949 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1950
1951 // Convert Opus Specific Box values. ParseOpusHeader expects
1952 // the values in LE, however MP4 stores these values as BE
1953 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1954 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1955 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1956 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1957
1958 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1959 static const int32_t kOpusSampleRate = 48000;
1960 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1961
1962 AMediaFormat_setBuffer(mLastTrack->meta,
1963 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1964 AMediaFormat_setBuffer(mLastTrack->meta,
1965 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1966 AMediaFormat_setBuffer(mLastTrack->meta,
1967 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1968
1969 data_offset += opusInfoSize;
1970 *offset = data_offset;
1971 CHECK_EQ(*offset, stop_offset);
1972 }
1973
1974 if (!mIsQT && chunk_type == FOURCC("alac")) {
1975 data_offset += sizeof(buffer);
1976
1977 status_t err = parseALACSampleEntry(&data_offset);
1978 if (err != OK) {
1979 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1980 return err;
1981 }
1982 *offset = data_offset;
1983 CHECK_EQ(*offset, stop_offset);
1984 }
1985
1986 if (chunk_type == FOURCC("fLaC")) {
1987 data_offset += sizeof(buffer);
1988 *offset = data_offset;
1989 }
1990
1991 while (*offset < stop_offset) {
1992 status_t err = parseChunk(offset, depth + 1);
1993 if (err != OK) {
1994 return err;
1995 }
1996 }
1997
1998 if (*offset != stop_offset) {
1999 return ERROR_MALFORMED;
2000 }
2001 break;
2002 }
2003 case FOURCC("mhaC"):
2004 {
2005 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2006 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2007 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2008 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2009 uint8_t mhac_header[mhac_header_size];
2010 off64_t data_offset = *offset;
2011
2012 if (mLastTrack == NULL || chunk_size < sizeof(mhac_header)) {
2013 return ERROR_MALFORMED;
2014 }
2015
2016 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2017 < (ssize_t)sizeof(mhac_header)) {
2018 return ERROR_IO;
2019 }
2020
2021 //get mpegh3daProfileLevelIndication
2022 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2023 AMediaFormat_setInt32(mLastTrack->meta,
2024 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2025 mpegh3daProfileLevelIndication);
2026
2027 //get referenceChannelLayout
2028 const uint32_t referenceChannelLayout = mhac_header[10];
2029 AMediaFormat_setInt32(mLastTrack->meta,
2030 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2031 referenceChannelLayout);
2032
2033 // get mpegh3daConfigLength
2034 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2035 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2036 return ERROR_MALFORMED;
2037 }
2038
2039 data_offset += sizeof(mhac_header);
2040 uint8_t mhac_config[mhac_config_size];
2041 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2042 < (ssize_t)sizeof(mhac_config)) {
2043 return ERROR_IO;
2044 }
2045
2046 AMediaFormat_setBuffer(mLastTrack->meta,
2047 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2048 data_offset += sizeof(mhac_config);
2049 *offset = data_offset;
2050 break;
2051 }
2052 case FOURCC("mhaP"):
2053 {
2054 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2055 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2056 + 1 /* numCompatibleSets */;
2057
2058 uint8_t mhap_header[mhap_header_size];
2059 off64_t data_offset = *offset;
2060
2061 if (chunk_size < (ssize_t)mhap_header_size) {
2062 return ERROR_MALFORMED;
2063 }
2064
2065 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2066 < (ssize_t)sizeof(mhap_header)) {
2067 return ERROR_IO;
2068 }
2069
2070 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2071 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2072 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2073 return ERROR_MALFORMED;
2074 }
2075
2076 data_offset += sizeof(mhap_header);
2077 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2078 if (mDataSource->readAt(
2079 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2080 < (ssize_t)sizeof(mhap_compatible_sets)) {
2081 return ERROR_IO;
2082 }
2083
2084 AMediaFormat_setBuffer(mLastTrack->meta,
2085 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2086 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2087 data_offset += sizeof(mhap_compatible_sets);
2088 *offset = data_offset;
2089 break;
2090 }
2091 case FOURCC("mp4v"):
2092 case FOURCC("encv"):
2093 case FOURCC("s263"):
2094 case FOURCC("H263"):
2095 case FOURCC("h263"):
2096 case FOURCC("avc1"):
2097 case FOURCC("hvc1"):
2098 case FOURCC("hev1"):
2099 case FOURCC("dvav"):
2100 case FOURCC("dva1"):
2101 case FOURCC("dvhe"):
2102 case FOURCC("dvh1"):
2103 case FOURCC("dav1"):
2104 case FOURCC("av01"):
2105 case FOURCC("vp09"):
2106 {
2107 uint8_t buffer[78];
2108 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2109 // Basic VideoSampleEntry size.
2110 return ERROR_MALFORMED;
2111 }
2112
2113 if (mDataSource->readAt(
2114 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2115 return ERROR_IO;
2116 }
2117
2118 // we can get data_ref_index value from U16_AT(&buffer[6])
2119 uint16_t width = U16_AT(&buffer[6 + 18]);
2120 uint16_t height = U16_AT(&buffer[6 + 20]);
2121
2122 // The video sample is not standard-compliant if it has invalid dimension.
2123 // Use some default width and height value, and
2124 // let the decoder figure out the actual width and height (and thus
2125 // be prepared for INFO_FOMRAT_CHANGED event).
2126 if (width == 0) width = 352;
2127 if (height == 0) height = 288;
2128
2129 // printf("*** coding='%s' width=%d height=%d\n",
2130 // chunk, width, height);
2131
2132 if (mLastTrack == NULL)
2133 return ERROR_MALFORMED;
2134
2135 if (chunk_type != FOURCC("encv")) {
2136 // if the chunk type is encv, we'll get the type from the frma box later
2137 AMediaFormat_setString(mLastTrack->meta,
2138 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2139 }
2140 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2141 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2142
2143 off64_t stop_offset = *offset + chunk_size;
2144 *offset = data_offset + sizeof(buffer);
2145 while (*offset < stop_offset) {
2146 status_t err = parseChunk(offset, depth + 1);
2147 if (err != OK) {
2148 return err;
2149 }
2150 }
2151
2152 if (*offset != stop_offset) {
2153 return ERROR_MALFORMED;
2154 }
2155 break;
2156 }
2157
2158 case FOURCC("stco"):
2159 case FOURCC("co64"):
2160 {
2161 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2162 return ERROR_MALFORMED;
2163 }
2164
2165 status_t err =
2166 mLastTrack->sampleTable->setChunkOffsetParams(
2167 chunk_type, data_offset, chunk_data_size);
2168
2169 *offset += chunk_size;
2170
2171 if (err != OK) {
2172 return err;
2173 }
2174
2175 break;
2176 }
2177
2178 case FOURCC("stsc"):
2179 {
2180 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2181 return ERROR_MALFORMED;
2182
2183 status_t err =
2184 mLastTrack->sampleTable->setSampleToChunkParams(
2185 data_offset, chunk_data_size);
2186
2187 *offset += chunk_size;
2188
2189 if (err != OK) {
2190 return err;
2191 }
2192
2193 break;
2194 }
2195
2196 case FOURCC("stsz"):
2197 case FOURCC("stz2"):
2198 {
2199 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2200 return ERROR_MALFORMED;
2201 }
2202
2203 status_t err =
2204 mLastTrack->sampleTable->setSampleSizeParams(
2205 chunk_type, data_offset, chunk_data_size);
2206
2207 *offset += chunk_size;
2208
2209 if (err != OK) {
2210 return err;
2211 }
2212
2213 adjustRawDefaultFrameSize();
2214
2215 size_t max_size;
2216 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2217
2218 if (err != OK) {
2219 return err;
2220 }
2221
2222 if (max_size != 0) {
2223 // Assume that a given buffer only contains at most 10 chunks,
2224 // each chunk originally prefixed with a 2 byte length will
2225 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2226 // and thus will grow by 2 bytes per chunk.
2227 if (max_size > SIZE_MAX - 10 * 2) {
2228 ALOGE("max sample size too big: %zu", max_size);
2229 return ERROR_MALFORMED;
2230 }
2231 AMediaFormat_setInt32(mLastTrack->meta,
2232 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2233 } else {
2234 // No size was specified. Pick a conservatively large size.
2235 uint32_t width, height;
2236 if (!AMediaFormat_getInt32(mLastTrack->meta,
2237 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2238 !AMediaFormat_getInt32(mLastTrack->meta,
2239 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2240 ALOGE("No width or height, assuming worst case 1080p");
2241 width = 1920;
2242 height = 1080;
2243 } else {
2244 // A resolution was specified, check that it's not too big. The values below
2245 // were chosen so that the calculations below don't cause overflows, they're
2246 // not indicating that resolutions up to 32kx32k are actually supported.
2247 if (width > 32768 || height > 32768) {
2248 ALOGE("can't support %u x %u video", width, height);
2249 return ERROR_MALFORMED;
2250 }
2251 }
2252
2253 const char *mime;
2254 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2255 if (!strncmp(mime, "audio/", 6)) {
2256 // for audio, use 128KB
2257 max_size = 1024 * 128;
2258 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2259 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2260 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2261 // AVC & HEVC requires compression ratio of at least 2, and uses
2262 // macroblocks
2263 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2264 } else {
2265 // For all other formats there is no minimum compression
2266 // ratio. Use compression ratio of 1.
2267 max_size = width * height * 3 / 2;
2268 }
2269 // HACK: allow 10% overhead
2270 // TODO: read sample size from traf atom for fragmented MPEG4.
2271 max_size += max_size / 10;
2272 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2273 }
2274
2275 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2276 // mimetype) previously obtained, so don't cache them.
2277 const char *mime;
2278 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2279 // Calculate average frame rate.
2280 if (!strncasecmp("video/", mime, 6)) {
2281 size_t nSamples = mLastTrack->sampleTable->countSamples();
2282 if (nSamples == 0) {
2283 int32_t trackId;
2284 if (AMediaFormat_getInt32(mLastTrack->meta,
2285 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2286 for (size_t i = 0; i < mTrex.size(); i++) {
2287 Trex *t = &mTrex.editItemAt(i);
2288 if (t->track_ID == (uint32_t) trackId) {
2289 if (t->default_sample_duration > 0) {
2290 int32_t frameRate =
2291 mLastTrack->timescale / t->default_sample_duration;
2292 AMediaFormat_setInt32(mLastTrack->meta,
2293 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2294 }
2295 break;
2296 }
2297 }
2298 }
2299 } else {
2300 int64_t durationUs;
2301 if (AMediaFormat_getInt64(mLastTrack->meta,
2302 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2303 if (durationUs > 0) {
2304 int32_t frameRate = (nSamples * 1000000LL +
2305 (durationUs >> 1)) / durationUs;
2306 AMediaFormat_setInt32(mLastTrack->meta,
2307 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2308 }
2309 }
2310 ALOGV("setting frame count %zu", nSamples);
2311 AMediaFormat_setInt32(mLastTrack->meta,
2312 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2313 }
2314 }
2315
2316 break;
2317 }
2318
2319 case FOURCC("stts"):
2320 {
2321 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2322 return ERROR_MALFORMED;
2323
2324 *offset += chunk_size;
2325
2326 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2327 char chunk[5];
2328 MakeFourCCString(mPath[depth - 1], chunk);
2329 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2330 break;
2331 }
2332
2333 status_t err =
2334 mLastTrack->sampleTable->setTimeToSampleParams(
2335 data_offset, chunk_data_size);
2336
2337 if (err != OK) {
2338 return err;
2339 }
2340
2341 break;
2342 }
2343
2344 case FOURCC("ctts"):
2345 {
2346 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2347 return ERROR_MALFORMED;
2348
2349 *offset += chunk_size;
2350
2351 status_t err =
2352 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2353 data_offset, chunk_data_size);
2354
2355 if (err != OK) {
2356 return err;
2357 }
2358
2359 break;
2360 }
2361
2362 case FOURCC("stss"):
2363 {
2364 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2365 return ERROR_MALFORMED;
2366
2367 *offset += chunk_size;
2368
2369 status_t err =
2370 mLastTrack->sampleTable->setSyncSampleParams(
2371 data_offset, chunk_data_size);
2372
2373 if (err != OK) {
2374 return err;
2375 }
2376
2377 break;
2378 }
2379
2380 // \xA9xyz
2381 case FOURCC("\251xyz"):
2382 {
2383 *offset += chunk_size;
2384
2385 // Best case the total data length inside "\xA9xyz" box would
2386 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2387 // where "\x00\x05" is the text string length with value = 5,
2388 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2389 // location (string) value with longitude = 0 and latitude = 0.
2390 // Since some devices encountered in the wild omit the trailing
2391 // slash, we'll allow that.
2392 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2393 return ERROR_MALFORMED;
2394 }
2395
2396 uint16_t len;
2397 if (!mDataSource->getUInt16(data_offset, &len)) {
2398 return ERROR_IO;
2399 }
2400
2401 // allow "+0+0" without trailing slash
2402 if (len < 4 || len > chunk_data_size - 4) {
2403 return ERROR_MALFORMED;
2404 }
2405 // The location string following the language code is formatted
2406 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2407 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2408 // and to add a terminating 0.
2409 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2410 if (!buffer) {
2411 return NO_MEMORY;
2412 }
2413
2414 if (mDataSource->readAt(
2415 data_offset + 4, &buffer[0], len) < len) {
2416 return ERROR_IO;
2417 }
2418
2419 len = strlen(&buffer[0]);
2420 if (len < 4) {
2421 return ERROR_MALFORMED;
2422 }
2423 // Add a trailing slash if there wasn't one.
2424 if (buffer[len - 1] != '/') {
2425 buffer[len] = '/';
2426 }
2427 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2428 break;
2429 }
2430
2431 case FOURCC("esds"):
2432 {
2433 *offset += chunk_size;
2434
2435 if (chunk_data_size < 4) {
2436 return ERROR_MALFORMED;
2437 }
2438
2439 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2440 uint8_t *buffer = tmp.get();
2441 if (buffer == NULL) {
2442 return -ENOMEM;
2443 }
2444
2445 if (mDataSource->readAt(
2446 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2447 return ERROR_IO;
2448 }
2449
2450 if (U32_AT(buffer) != 0) {
2451 // Should be version 0, flags 0.
2452 return ERROR_MALFORMED;
2453 }
2454
2455 if (mLastTrack == NULL)
2456 return ERROR_MALFORMED;
2457
2458 AMediaFormat_setBuffer(mLastTrack->meta,
2459 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2460
2461 if (mPath.size() >= 2
2462 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2463 // Information from the ESDS must be relied on for proper
2464 // setup of sample rate and channel count for MPEG4 Audio.
2465 // The generic header appears to only contain generic
2466 // information...
2467
2468 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2469 &buffer[4], chunk_data_size - 4);
2470
2471 if (err != OK) {
2472 return err;
2473 }
2474 }
2475 if (mPath.size() >= 2
2476 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2477 // Check if the video is MPEG2
2478 ESDS esds(&buffer[4], chunk_data_size - 4);
2479
2480 uint8_t objectTypeIndication;
2481 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2482 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2483 AMediaFormat_setString(mLastTrack->meta,
2484 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2485 }
2486 }
2487 }
2488 break;
2489 }
2490
2491 case FOURCC("btrt"):
2492 {
2493 *offset += chunk_size;
2494 if (mLastTrack == NULL) {
2495 return ERROR_MALFORMED;
2496 }
2497
2498 uint8_t buffer[12];
2499 if (chunk_data_size != sizeof(buffer)) {
2500 return ERROR_MALFORMED;
2501 }
2502
2503 if (mDataSource->readAt(
2504 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2505 return ERROR_IO;
2506 }
2507
2508 uint32_t maxBitrate = U32_AT(&buffer[4]);
2509 uint32_t avgBitrate = U32_AT(&buffer[8]);
2510 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2511 AMediaFormat_setInt32(mLastTrack->meta,
2512 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2513 }
2514 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2515 AMediaFormat_setInt32(mLastTrack->meta,
2516 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2517 }
2518 break;
2519 }
2520
2521 case FOURCC("dfLa"):
2522 {
2523 *offset += chunk_size;
2524
2525 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
2526 // 4 for mediaType, 4 for blockType and BlockLen, 34 for metadata
2527 uint8_t flacInfo[4 + 4 + 34];
2528
2529 if (chunk_data_size != sizeof(flacInfo)) {
2530 return ERROR_MALFORMED;
2531 }
2532
2533 data_offset += 4;
2534 size_t flacOffset = 4;
2535 // Add flaC header mediaType to CSD
2536 strncpy((char *)flacInfo, "fLaC", 4);
2537
2538 ssize_t bytesToRead = sizeof(flacInfo) - flacOffset;
2539 if (mDataSource->readAt(
2540 data_offset, flacInfo + flacOffset, bytesToRead) < bytesToRead) {
2541 return ERROR_IO;
2542 }
2543
2544 data_offset += bytesToRead;
2545 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
2546 sizeof(flacInfo));
2547 break;
2548 }
2549
2550 case FOURCC("avcC"):
2551 {
2552 *offset += chunk_size;
2553
2554 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2555
2556 if (buffer.get() == NULL) {
2557 ALOGE("b/28471206");
2558 return NO_MEMORY;
2559 }
2560
2561 if (mDataSource->readAt(
2562 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2563 return ERROR_IO;
2564 }
2565
2566 if (mLastTrack == NULL)
2567 return ERROR_MALFORMED;
2568
2569 AMediaFormat_setBuffer(mLastTrack->meta,
2570 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2571
2572 break;
2573 }
2574 case FOURCC("hvcC"):
2575 {
2576 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2577
2578 if (buffer.get() == NULL) {
2579 ALOGE("b/28471206");
2580 return NO_MEMORY;
2581 }
2582
2583 if (mDataSource->readAt(
2584 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2585 return ERROR_IO;
2586 }
2587
2588 if (mLastTrack == NULL)
2589 return ERROR_MALFORMED;
2590
2591 AMediaFormat_setBuffer(mLastTrack->meta,
2592 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2593
2594 *offset += chunk_size;
2595 break;
2596 }
2597
2598 case FOURCC("vpcC"):
2599 case FOURCC("av1C"):
2600 {
2601 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2602
2603 if (buffer.get() == NULL) {
2604 ALOGE("b/28471206");
2605 return NO_MEMORY;
2606 }
2607
2608 if (mDataSource->readAt(
2609 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2610 return ERROR_IO;
2611 }
2612
2613 if (mLastTrack == NULL)
2614 return ERROR_MALFORMED;
2615
2616 AMediaFormat_setBuffer(mLastTrack->meta,
2617 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2618
2619 *offset += chunk_size;
2620 break;
2621 }
2622
2623 case FOURCC("dvcC"):
2624 case FOURCC("dvvC"):
2625 case FOURCC("dvwC"):
2626 {
2627 if (chunk_data_size != 24) {
2628 return ERROR_MALFORMED;
2629 }
2630
2631 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2632
2633 if (buffer.get() == NULL) {
2634 ALOGE("b/28471206");
2635 return NO_MEMORY;
2636 }
2637
2638 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2639 return ERROR_IO;
2640 }
2641
2642 if (mLastTrack == NULL)
2643 return ERROR_MALFORMED;
2644
2645 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2646 buffer.get(), chunk_data_size);
2647 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2648 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2649
2650 *offset += chunk_size;
2651 break;
2652 }
2653
2654 case FOURCC("d263"):
2655 {
2656 *offset += chunk_size;
2657 /*
2658 * d263 contains a fixed 7 bytes part:
2659 * vendor - 4 bytes
2660 * version - 1 byte
2661 * level - 1 byte
2662 * profile - 1 byte
2663 * optionally, "d263" box itself may contain a 16-byte
2664 * bit rate box (bitr)
2665 * average bit rate - 4 bytes
2666 * max bit rate - 4 bytes
2667 */
2668 char buffer[23];
2669 if (chunk_data_size != 7 &&
2670 chunk_data_size != 23) {
2671 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2672 return ERROR_MALFORMED;
2673 }
2674
2675 if (mDataSource->readAt(
2676 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2677 return ERROR_IO;
2678 }
2679
2680 if (mLastTrack == NULL)
2681 return ERROR_MALFORMED;
2682
2683 AMediaFormat_setBuffer(mLastTrack->meta,
2684 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2685
2686 break;
2687 }
2688
2689 case FOURCC("meta"):
2690 {
2691 off64_t stop_offset = *offset + chunk_size;
2692 *offset = data_offset;
2693 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2694 if (!isParsingMetaKeys) {
2695 uint8_t buffer[4];
2696 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2697 *offset = stop_offset;
2698 return ERROR_MALFORMED;
2699 }
2700
2701 if (mDataSource->readAt(
2702 data_offset, buffer, 4) < 4) {
2703 *offset = stop_offset;
2704 return ERROR_IO;
2705 }
2706
2707 if (U32_AT(buffer) != 0) {
2708 // Should be version 0, flags 0.
2709
2710 // If it's not, let's assume this is one of those
2711 // apparently malformed chunks that don't have flags
2712 // and completely different semantics than what's
2713 // in the MPEG4 specs and skip it.
2714 *offset = stop_offset;
2715 return OK;
2716 }
2717 *offset += sizeof(buffer);
2718 }
2719
2720 while (*offset < stop_offset) {
2721 status_t err = parseChunk(offset, depth + 1);
2722 if (err != OK) {
2723 return err;
2724 }
2725 }
2726
2727 if (*offset != stop_offset) {
2728 return ERROR_MALFORMED;
2729 }
2730 break;
2731 }
2732
2733 case FOURCC("iloc"):
2734 case FOURCC("iinf"):
2735 case FOURCC("iprp"):
2736 case FOURCC("pitm"):
2737 case FOURCC("idat"):
2738 case FOURCC("iref"):
2739 case FOURCC("ipro"):
2740 {
2741 if (mIsHeif || mIsAvif) {
2742 if (mItemTable == NULL) {
2743 mItemTable = new ItemTable(mDataSource, mIsHeif);
2744 }
2745 status_t err = mItemTable->parse(
2746 chunk_type, data_offset, chunk_data_size);
2747 if (err != OK) {
2748 return err;
2749 }
2750 }
2751 *offset += chunk_size;
2752 break;
2753 }
2754
2755 case FOURCC("mean"):
2756 case FOURCC("name"):
2757 case FOURCC("data"):
2758 {
2759 *offset += chunk_size;
2760
2761 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2762 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2763
2764 if (err != OK) {
2765 return err;
2766 }
2767 }
2768
2769 break;
2770 }
2771
2772 case FOURCC("mvhd"):
2773 {
2774 *offset += chunk_size;
2775
2776 if (depth != 1) {
2777 ALOGE("mvhd: depth %d", depth);
2778 return ERROR_MALFORMED;
2779 }
2780 if (chunk_data_size < 32) {
2781 return ERROR_MALFORMED;
2782 }
2783
2784 uint8_t header[32];
2785 if (mDataSource->readAt(
2786 data_offset, header, sizeof(header))
2787 < (ssize_t)sizeof(header)) {
2788 return ERROR_IO;
2789 }
2790
2791 uint64_t creationTime;
2792 uint64_t duration = 0;
2793 if (header[0] == 1) {
2794 creationTime = U64_AT(&header[4]);
2795 mHeaderTimescale = U32_AT(&header[20]);
2796 duration = U64_AT(&header[24]);
2797 if (duration == 0xffffffffffffffff) {
2798 duration = 0;
2799 }
2800 } else if (header[0] != 0) {
2801 return ERROR_MALFORMED;
2802 } else {
2803 creationTime = U32_AT(&header[4]);
2804 mHeaderTimescale = U32_AT(&header[12]);
2805 uint32_t d32 = U32_AT(&header[16]);
2806 if (d32 == 0xffffffff) {
2807 d32 = 0;
2808 }
2809 duration = d32;
2810 }
2811 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2812 AMediaFormat_setInt64(mFileMetaData,
2813 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2814 }
2815
2816 String8 s;
2817 if (convertTimeToDate(creationTime, &s)) {
2818 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.c_str());
2819 }
2820
2821 break;
2822 }
2823
2824 case FOURCC("mehd"):
2825 {
2826 *offset += chunk_size;
2827
2828 if (chunk_data_size < 8) {
2829 return ERROR_MALFORMED;
2830 }
2831
2832 uint8_t flags[4];
2833 if (mDataSource->readAt(
2834 data_offset, flags, sizeof(flags))
2835 < (ssize_t)sizeof(flags)) {
2836 return ERROR_IO;
2837 }
2838
2839 uint64_t duration = 0;
2840 if (flags[0] == 1) {
2841 // 64 bit
2842 if (chunk_data_size < 12) {
2843 return ERROR_MALFORMED;
2844 }
2845 mDataSource->getUInt64(data_offset + 4, &duration);
2846 if (duration == 0xffffffffffffffff) {
2847 duration = 0;
2848 }
2849 } else if (flags[0] == 0) {
2850 // 32 bit
2851 uint32_t d32;
2852 mDataSource->getUInt32(data_offset + 4, &d32);
2853 if (d32 == 0xffffffff) {
2854 d32 = 0;
2855 }
2856 duration = d32;
2857 } else {
2858 return ERROR_MALFORMED;
2859 }
2860
2861 if (duration != 0 && mHeaderTimescale != 0) {
2862 AMediaFormat_setInt64(mFileMetaData,
2863 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2864 }
2865
2866 break;
2867 }
2868
2869 case FOURCC("mdat"):
2870 {
2871 mMdatFound = true;
2872
2873 *offset += chunk_size;
2874 break;
2875 }
2876
2877 case FOURCC("hdlr"):
2878 {
2879 *offset += chunk_size;
2880
2881 if (underQTMetaPath(mPath, 3)) {
2882 break;
2883 }
2884
2885 uint32_t buffer;
2886 if (mDataSource->readAt(
2887 data_offset + 8, &buffer, 4) < 4) {
2888 return ERROR_IO;
2889 }
2890
2891 uint32_t type = ntohl(buffer);
2892 // For the 3GPP file format, the handler-type within the 'hdlr' box
2893 // shall be 'text'. We also want to support 'sbtl' handler type
2894 // for a practical reason as various MPEG4 containers use it.
2895 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2896 if (mLastTrack != NULL) {
2897 AMediaFormat_setString(mLastTrack->meta,
2898 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2899 }
2900 }
2901
2902 break;
2903 }
2904
2905 case FOURCC("keys"):
2906 {
2907 *offset += chunk_size;
2908
2909 if (underQTMetaPath(mPath, 3)) {
2910 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2911 if (err != OK) {
2912 return err;
2913 }
2914 }
2915 break;
2916 }
2917
2918 case FOURCC("trex"):
2919 {
2920 *offset += chunk_size;
2921
2922 if (chunk_data_size < 24) {
2923 return ERROR_IO;
2924 }
2925 Trex trex;
2926 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2927 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2928 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2929 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2930 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2931 return ERROR_IO;
2932 }
2933 mTrex.add(trex);
2934 break;
2935 }
2936
2937 case FOURCC("tx3g"):
2938 {
2939 if (mLastTrack == NULL)
2940 return ERROR_MALFORMED;
2941
2942 // complain about ridiculous chunks
2943 if (chunk_size > kMaxAtomSize) {
2944 return ERROR_MALFORMED;
2945 }
2946
2947 // complain about empty atoms
2948 if (chunk_data_size <= 0) {
2949 ALOGE("b/124330204");
2950 android_errorWriteLog(0x534e4554, "124330204");
2951 return ERROR_MALFORMED;
2952 }
2953
2954 // should fill buffer based on "data_offset" and "chunk_data_size"
2955 // instead of *offset and chunk_size;
2956 // but we've been feeding the extra data to consumers for multiple releases and
2957 // if those apps are compensating for it, we'd break them with such a change
2958 //
2959
2960 if (mLastTrack->mTx3gBuffer == NULL) {
2961 mLastTrack->mTx3gSize = 0;
2962 mLastTrack->mTx3gFilled = 0;
2963 }
2964 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2965 size_t growth = kTx3gGrowth;
2966 if (growth < chunk_size) {
2967 growth = chunk_size;
2968 }
2969 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2970 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2971 ALOGE("b/124330204 - too much space");
2972 android_errorWriteLog(0x534e4554, "124330204");
2973 return ERROR_MALFORMED;
2974 }
2975 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2976 mLastTrack->mTx3gSize + growth);
2977 if (updated == NULL) {
2978 return ERROR_MALFORMED;
2979 }
2980 mLastTrack->mTx3gBuffer = updated;
2981 mLastTrack->mTx3gSize += growth;
2982 }
2983
2984 if ((size_t)(mDataSource->readAt(*offset,
2985 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2986 chunk_size))
2987 < chunk_size) {
2988
2989 // advance read pointer so we don't end up reading this again
2990 *offset += chunk_size;
2991 return ERROR_IO;
2992 }
2993
2994 mLastTrack->mTx3gFilled += chunk_size;
2995 *offset += chunk_size;
2996 break;
2997 }
2998
2999 case FOURCC("covr"):
3000 {
3001 *offset += chunk_size;
3002
3003 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
3004 chunk_data_size, data_offset);
3005
3006 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
3007 return ERROR_MALFORMED;
3008 }
3009 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
3010 if (buffer.get() == NULL) {
3011 ALOGE("b/28471206");
3012 return NO_MEMORY;
3013 }
3014 if (mDataSource->readAt(
3015 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
3016 return ERROR_IO;
3017 }
3018 const int kSkipBytesOfDataBox = 16;
3019 if (chunk_data_size <= kSkipBytesOfDataBox) {
3020 return ERROR_MALFORMED;
3021 }
3022
3023 AMediaFormat_setBuffer(mFileMetaData,
3024 AMEDIAFORMAT_KEY_ALBUMART,
3025 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
3026
3027 break;
3028 }
3029
3030 case FOURCC("colr"):
3031 {
3032 *offset += chunk_size;
3033 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3034 // ignore otherwise
3035 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3036 status_t err = parseColorInfo(data_offset, chunk_data_size);
3037 if (err != OK) {
3038 return err;
3039 }
3040 }
3041
3042 break;
3043 }
3044
3045 case FOURCC("pasp"):
3046 {
3047 *offset += chunk_size;
3048 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3049 // ignore otherwise
3050 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3051 status_t err = parsePaspBox(data_offset, chunk_data_size);
3052 if (err != OK) {
3053 return err;
3054 }
3055 }
3056
3057 break;
3058 }
3059
3060 case FOURCC("titl"):
3061 case FOURCC("perf"):
3062 case FOURCC("auth"):
3063 case FOURCC("gnre"):
3064 case FOURCC("albm"):
3065 case FOURCC("yrrc"):
3066 {
3067 *offset += chunk_size;
3068
3069 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3070
3071 if (err != OK) {
3072 return err;
3073 }
3074
3075 break;
3076 }
3077
3078 case FOURCC("ID32"):
3079 {
3080 *offset += chunk_size;
3081
3082 if (chunk_data_size < 6) {
3083 return ERROR_MALFORMED;
3084 }
3085
3086 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3087
3088 break;
3089 }
3090
3091 case FOURCC("----"):
3092 {
3093 mLastCommentMean.clear();
3094 mLastCommentName.clear();
3095 mLastCommentData.clear();
3096 *offset += chunk_size;
3097 break;
3098 }
3099
3100 case FOURCC("sidx"):
3101 {
3102 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3103 if (err != OK) {
3104 return err;
3105 }
3106 *offset += chunk_size;
3107 return UNKNOWN_ERROR; // stop parsing after sidx
3108 }
3109
3110 case FOURCC("ac-3"):
3111 {
3112 *offset += chunk_size;
3113 // bypass ac-3 if parse fail
3114 if (parseAC3SpecificBox(data_offset) != OK) {
3115 if (mLastTrack != NULL) {
3116 ALOGW("Fail to parse ac-3");
3117 mLastTrack->skipTrack = true;
3118 }
3119 }
3120 return OK;
3121 }
3122
3123 case FOURCC("ec-3"):
3124 {
3125 *offset += chunk_size;
3126 // bypass ec-3 if parse fail
3127 if (parseEAC3SpecificBox(data_offset) != OK) {
3128 if (mLastTrack != NULL) {
3129 ALOGW("Fail to parse ec-3");
3130 mLastTrack->skipTrack = true;
3131 }
3132 }
3133 return OK;
3134 }
3135
3136 case FOURCC("ac-4"):
3137 {
3138 *offset += chunk_size;
3139 // bypass ac-4 if parse fail
3140 if (parseAC4SpecificBox(data_offset) != OK) {
3141 if (mLastTrack != NULL) {
3142 ALOGW("Fail to parse ac-4");
3143 mLastTrack->skipTrack = true;
3144 }
3145 }
3146 return OK;
3147 }
3148
3149 case FOURCC("ftyp"):
3150 {
3151 if (chunk_data_size < 8 || depth != 0) {
3152 return ERROR_MALFORMED;
3153 }
3154
3155 off64_t stop_offset = *offset + chunk_size;
3156 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3157 std::set<uint32_t> brandSet;
3158 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3159 if (i == 1) {
3160 // Skip this index, it refers to the minorVersion,
3161 // not a brand.
3162 continue;
3163 }
3164
3165 uint32_t brand;
3166 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3167 return ERROR_MALFORMED;
3168 }
3169
3170 brand = ntohl(brand);
3171 brandSet.insert(brand);
3172 }
3173
3174 if (brandSet.count(FOURCC("qt ")) > 0) {
3175 mIsQT = true;
3176 } else {
3177 if (brandSet.count(FOURCC("mif1")) > 0
3178 && brandSet.count(FOURCC("heic")) > 0) {
3179 ALOGV("identified HEIF image");
3180
3181 mIsHeif = true;
3182 brandSet.erase(FOURCC("mif1"));
3183 brandSet.erase(FOURCC("heic"));
3184 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3185 brandSet.count(FOURCC("avis")) > 0) {
3186 ALOGV("identified AVIF image");
3187 mIsAvif = true;
3188 brandSet.erase(FOURCC("avif"));
3189 brandSet.erase(FOURCC("avis"));
3190 }
3191
3192 if (!brandSet.empty()) {
3193 // This means that the file should have moov box.
3194 // It could be any iso files (mp4, heifs, etc.)
3195 mHasMoovBox = true;
3196 if (mIsHeif || mIsAvif) {
3197 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3198 }
3199 }
3200 }
3201
3202 *offset = stop_offset;
3203
3204 break;
3205 }
3206
3207 default:
3208 {
3209 // check if we're parsing 'ilst' for meta keys
3210 // if so, treat type as a number (key-id).
3211 if (underQTMetaPath(mPath, 3)) {
3212 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3213 if (err != OK) {
3214 return err;
3215 }
3216 }
3217
3218 *offset += chunk_size;
3219 break;
3220 }
3221 }
3222
3223 return OK;
3224 }
3225
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3226 status_t MPEG4Extractor::parseChannelCountSampleRate(
3227 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3228 // skip 16 bytes:
3229 // + 6-byte reserved,
3230 // + 2-byte data reference index,
3231 // + 8-byte reserved
3232 *offset += 16;
3233 if (!mDataSource->getUInt16(*offset, channelCount)) {
3234 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3235 return ERROR_MALFORMED;
3236 }
3237 // skip 8 bytes:
3238 // + 2-byte channelCount,
3239 // + 2-byte sample size,
3240 // + 4-byte reserved
3241 *offset += 8;
3242 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3243 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3244 return ERROR_MALFORMED;
3245 }
3246 // skip 4 bytes:
3247 // + 2-byte sampleRate,
3248 // + 2-byte reserved
3249 *offset += 4;
3250 return OK;
3251 }
3252
parseAC4SpecificBox(off64_t offset)3253 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3254 if (mLastTrack == NULL) {
3255 return ERROR_MALFORMED;
3256 }
3257
3258 uint16_t sampleRate, channelCount;
3259 status_t status;
3260 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3261 return status;
3262 }
3263 uint32_t size;
3264 // + 4-byte size
3265 // + 4-byte type
3266 // + 3-byte payload
3267 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3268 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3269 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3270 return ERROR_MALFORMED;
3271 }
3272
3273 // + 4-byte size
3274 offset += 4;
3275 uint32_t type;
3276 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3277 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3278 return ERROR_MALFORMED;
3279 }
3280
3281 // + 4-byte type
3282 offset += 4;
3283 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3284 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3285 ssize_t dsiSize = size - 8; // size of box - size and type fields
3286 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3287 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3288 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3289 return ERROR_MALFORMED;
3290 }
3291 // + size-byte payload
3292 offset += dsiSize;
3293 ABitReader br(chunk, dsiSize);
3294 AC4DSIParser parser(br);
3295 if (!parser.parse()){
3296 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3297 return ERROR_MALFORMED;
3298 }
3299
3300 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3301 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3302 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3303
3304 AudioPresentationCollection presentations;
3305 // translate the AC4 presentation information to audio presentations for this track
3306 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3307 if (!ac4Presentations.empty()) {
3308 for (const auto& ac4Presentation : ac4Presentations) {
3309 auto& presentation = ac4Presentation.second;
3310 if (!presentation.mEnabled) {
3311 continue;
3312 }
3313 AudioPresentationV1 ap;
3314 ap.mPresentationId = presentation.mGroupIndex;
3315 ap.mProgramId = presentation.mProgramID;
3316 ap.mLanguage = presentation.mLanguage;
3317 if (presentation.mPreVirtualized) {
3318 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3319 } else {
3320 switch (presentation.mChannelMode) {
3321 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3322 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3323 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3324 break;
3325 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3326 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3327 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3328 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3329 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3330 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3331 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3332 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3333 break;
3334 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3335 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3336 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3337 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3338 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3339 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3340 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3341 ap.mMasteringIndication = MASTERED_FOR_3D;
3342 break;
3343 default:
3344 ALOGE("Invalid channel mode in AC4 presentation");
3345 return ERROR_MALFORMED;
3346 }
3347 }
3348
3349 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3350 AC4Parser::AC4Presentation::kVisuallyImpaired);
3351 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3352 AC4Parser::AC4Presentation::kVoiceOver);
3353 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3354 if (!ap.mLanguage.empty()) {
3355 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3356 }
3357 presentations.push_back(std::move(ap));
3358 }
3359 }
3360
3361 if (presentations.empty()) {
3362 // Clear audio presentation info in metadata.
3363 AMediaFormat_setBuffer(
3364 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3365 } else {
3366 std::ostringstream outStream(std::ios::out);
3367 serializeAudioPresentations(presentations, &outStream);
3368 AMediaFormat_setBuffer(
3369 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3370 outStream.str().data(), outStream.str().size());
3371 }
3372 return OK;
3373 }
3374
parseEAC3SpecificBox(off64_t offset)3375 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3376 if (mLastTrack == NULL) {
3377 return ERROR_MALFORMED;
3378 }
3379
3380 uint16_t sampleRate, channels;
3381 status_t status;
3382 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3383 return status;
3384 }
3385 uint32_t size;
3386 // + 4-byte size
3387 // + 4-byte type
3388 // + 3-byte payload
3389 const uint32_t kEAC3SpecificBoxMinSize = 11;
3390 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3391 // calculated from the required bits read below as well as the maximum number of independent
3392 // and dependant sub streams you can have
3393 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3394 if (!mDataSource->getUInt32(offset, &size) ||
3395 size < kEAC3SpecificBoxMinSize ||
3396 size > kEAC3SpecificBoxMaxSize) {
3397 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3398 return ERROR_MALFORMED;
3399 }
3400
3401 offset += 4;
3402 uint32_t type;
3403 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3404 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3405 return ERROR_MALFORMED;
3406 }
3407
3408 offset += 4;
3409 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3410 if (chunk == NULL) {
3411 return ERROR_MALFORMED;
3412 }
3413
3414 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3415 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3416 delete[] chunk;
3417 return ERROR_MALFORMED;
3418 }
3419
3420 ABitReader br(chunk, size);
3421 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3422 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3423
3424 if (br.numBitsLeft() < 16) {
3425 delete[] chunk;
3426 return ERROR_MALFORMED;
3427 }
3428 unsigned data_rate = br.getBits(13);
3429 ALOGV("EAC3 data rate = %d", data_rate);
3430
3431 unsigned num_ind_sub = br.getBits(3) + 1;
3432 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3433 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3434 delete[] chunk;
3435 return ERROR_MALFORMED;
3436 }
3437
3438 unsigned channelCount = 0;
3439 for (unsigned i = 0; i < num_ind_sub; i++) {
3440 unsigned fscod = br.getBits(2);
3441 if (fscod == 3) {
3442 ALOGE("Incorrect fscod (3) in EAC3 header");
3443 delete[] chunk;
3444 return ERROR_MALFORMED;
3445 }
3446 unsigned boxSampleRate = sampleRateTable[fscod];
3447 if (boxSampleRate != sampleRate) {
3448 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3449 boxSampleRate, sampleRate);
3450 delete[] chunk;
3451 return ERROR_MALFORMED;
3452 }
3453
3454 unsigned bsid = br.getBits(5);
3455 if (bsid == 9 || bsid == 10) {
3456 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3457 } else if (bsid > 16) {
3458 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3459 delete[] chunk;
3460 return ERROR_MALFORMED;
3461 }
3462
3463 // skip
3464 br.skipBits(2);
3465 unsigned bsmod = br.getBits(3);
3466 unsigned acmod = br.getBits(3);
3467 unsigned lfeon = br.getBits(1);
3468 // we currently only support the first stream
3469 if (i == 0)
3470 channelCount = channelCountTable[acmod] + lfeon;
3471 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3472
3473 br.skipBits(3);
3474 unsigned num_dep_sub = br.getBits(4);
3475 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3476 if (num_dep_sub != 0) {
3477 if (br.numBitsLeft() < 9) {
3478 delete[] chunk;
3479 return ERROR_MALFORMED;
3480 }
3481 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3482 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3483 unsigned chan_loc = br.getBits(9);
3484 unsigned mask = 1;
3485 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3486 if ((chan_loc & mask) != 0) {
3487 // we currently only support the first stream
3488 if (i == 0) {
3489 channelCount++;
3490 // these are 2 channels in the mask
3491 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3492 channelCount++;
3493 }
3494 }
3495 ALOGV(" %s", chan_loc_tbl[j]);
3496 }
3497 }
3498 } else {
3499 if (br.numBitsLeft() == 0) {
3500 delete[] chunk;
3501 return ERROR_MALFORMED;
3502 }
3503 br.skipBits(1);
3504 }
3505 }
3506
3507 if (br.numBitsLeft() != 0) {
3508 if (br.numBitsLeft() < 8) {
3509 delete[] chunk;
3510 return ERROR_MALFORMED;
3511 }
3512 unsigned mask = br.getBits(8);
3513 for (unsigned i = 0; i < 8; i++) {
3514 if (((0x1 << i) & mask) == 0)
3515 continue;
3516
3517 if (br.numBitsLeft() < 8) {
3518 delete[] chunk;
3519 return ERROR_MALFORMED;
3520 }
3521 switch (i) {
3522 case 0: {
3523 unsigned complexity = br.getBits(8);
3524 ALOGV("Found a JOC stream with complexity = %d", complexity);
3525 }break;
3526 default: {
3527 br.skipBits(8);
3528 }break;
3529 }
3530 }
3531 }
3532 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3533 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3534 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3535
3536 delete[] chunk;
3537 return OK;
3538 }
3539
parseAC3SpecificBox(off64_t offset)3540 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3541 if (mLastTrack == NULL) {
3542 return ERROR_MALFORMED;
3543 }
3544
3545 uint16_t sampleRate, channels;
3546 status_t status;
3547 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3548 return status;
3549 }
3550 uint32_t size;
3551 // + 4-byte size
3552 // + 4-byte type
3553 // + 3-byte payload
3554 const uint32_t kAC3SpecificBoxSize = 11;
3555 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3556 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3557 return ERROR_MALFORMED;
3558 }
3559
3560 offset += 4;
3561 uint32_t type;
3562 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3563 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3564 return ERROR_MALFORMED;
3565 }
3566
3567 offset += 4;
3568 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3569 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3570 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3571 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3572 return ERROR_MALFORMED;
3573 }
3574
3575 ABitReader br(chunk, sizeof(chunk));
3576 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3577 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3578
3579 unsigned fscod = br.getBits(2);
3580 if (fscod == 3) {
3581 ALOGE("Incorrect fscod (3) in AC3 header");
3582 return ERROR_MALFORMED;
3583 }
3584 unsigned boxSampleRate = sampleRateTable[fscod];
3585 if (boxSampleRate != sampleRate) {
3586 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3587 boxSampleRate, sampleRate);
3588 return ERROR_MALFORMED;
3589 }
3590
3591 unsigned bsid = br.getBits(5);
3592 if (bsid > 8) {
3593 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3594 return ERROR_MALFORMED;
3595 }
3596
3597 // skip
3598 br.skipBits(3); // bsmod
3599
3600 unsigned acmod = br.getBits(3);
3601 unsigned lfeon = br.getBits(1);
3602 unsigned channelCount = channelCountTable[acmod] + lfeon;
3603
3604 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3605 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3606 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3607 return OK;
3608 }
3609
parseALACSampleEntry(off64_t * offset)3610 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3611 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3612 // Store ALAC magic cookie (decoder needs it).
3613 uint8_t alacInfo[12];
3614 off64_t data_offset = *offset;
3615
3616 if (mDataSource->readAt(
3617 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3618 return ERROR_IO;
3619 }
3620 uint32_t size = U32_AT(&alacInfo[0]);
3621 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3622 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3623 (U32_AT(&alacInfo[8]) != 0)) {
3624 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3625 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3626 return ERROR_MALFORMED;
3627 }
3628 data_offset += sizeof(alacInfo);
3629 uint8_t cookie[size - sizeof(alacInfo)];
3630 if (mDataSource->readAt(
3631 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3632 return ERROR_IO;
3633 }
3634
3635 uint8_t bitsPerSample = cookie[5];
3636 AMediaFormat_setInt32(mLastTrack->meta,
3637 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3638 AMediaFormat_setInt32(mLastTrack->meta,
3639 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3640 AMediaFormat_setInt32(mLastTrack->meta,
3641 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3642 AMediaFormat_setBuffer(mLastTrack->meta,
3643 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3644 data_offset += sizeof(cookie);
3645 *offset = data_offset;
3646 return OK;
3647 }
3648
parseSegmentIndex(off64_t offset,size_t size)3649 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3650 ALOGV("MPEG4Extractor::parseSegmentIndex");
3651
3652 if (size < 12) {
3653 return -EINVAL;
3654 }
3655
3656 uint32_t flags;
3657 if (!mDataSource->getUInt32(offset, &flags)) {
3658 return ERROR_MALFORMED;
3659 }
3660
3661 uint32_t version = flags >> 24;
3662 flags &= 0xffffff;
3663
3664 ALOGV("sidx version %d", version);
3665
3666 uint32_t referenceId;
3667 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3668 return ERROR_MALFORMED;
3669 }
3670
3671 uint32_t timeScale;
3672 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3673 return ERROR_MALFORMED;
3674 }
3675 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3676 if (timeScale == 0)
3677 return ERROR_MALFORMED;
3678
3679 uint64_t earliestPresentationTime;
3680 uint64_t firstOffset;
3681
3682 offset += 12;
3683 size -= 12;
3684
3685 if (version == 0) {
3686 if (size < 8) {
3687 return -EINVAL;
3688 }
3689 uint32_t tmp;
3690 if (!mDataSource->getUInt32(offset, &tmp)) {
3691 return ERROR_MALFORMED;
3692 }
3693 earliestPresentationTime = tmp;
3694 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3695 return ERROR_MALFORMED;
3696 }
3697 firstOffset = tmp;
3698 offset += 8;
3699 size -= 8;
3700 } else {
3701 if (size < 16) {
3702 return -EINVAL;
3703 }
3704 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3705 return ERROR_MALFORMED;
3706 }
3707 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3708 return ERROR_MALFORMED;
3709 }
3710 offset += 16;
3711 size -= 16;
3712 }
3713 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3714
3715 if (size < 4) {
3716 return -EINVAL;
3717 }
3718
3719 uint16_t referenceCount;
3720 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3721 return ERROR_MALFORMED;
3722 }
3723 offset += 4;
3724 size -= 4;
3725 ALOGV("refcount: %d", referenceCount);
3726
3727 if (size < referenceCount * 12) {
3728 return -EINVAL;
3729 }
3730
3731 uint64_t total_duration = 0;
3732 for (unsigned int i = 0; i < referenceCount; i++) {
3733 uint32_t d1, d2, d3;
3734
3735 if (!mDataSource->getUInt32(offset, &d1) || // size
3736 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3737 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3738 return ERROR_MALFORMED;
3739 }
3740
3741 if (d1 & 0x80000000) {
3742 ALOGW("sub-sidx boxes not supported yet");
3743 }
3744 bool sap = d3 & 0x80000000;
3745 uint32_t saptype = (d3 >> 28) & 7;
3746 if (!sap || (saptype != 1 && saptype != 2)) {
3747 // type 1 and 2 are sync samples
3748 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3749 }
3750 total_duration += d2;
3751 offset += 12;
3752 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3753 SidxEntry se;
3754 se.mSize = d1 & 0x7fffffff;
3755 se.mDurationUs = 1000000LL * d2 / timeScale;
3756 mSidxEntries.add(se);
3757 }
3758
3759 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3760
3761 if (mLastTrack == NULL)
3762 return ERROR_MALFORMED;
3763
3764 int64_t metaDuration;
3765 if (!AMediaFormat_getInt64(mLastTrack->meta,
3766 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3767 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3768 }
3769 return OK;
3770 }
3771
parseQTMetaKey(off64_t offset,size_t size)3772 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3773 if (size < 8) {
3774 return ERROR_MALFORMED;
3775 }
3776
3777 uint32_t count;
3778 if (!mDataSource->getUInt32(offset + 4, &count)) {
3779 return ERROR_MALFORMED;
3780 }
3781
3782 if (mMetaKeyMap.size() > 0) {
3783 ALOGW("'keys' atom seen again, discarding existing entries");
3784 mMetaKeyMap.clear();
3785 }
3786
3787 off64_t keyOffset = offset + 8;
3788 off64_t stopOffset = offset + size;
3789 for (size_t i = 1; i <= count; i++) {
3790 if (keyOffset + 8 > stopOffset) {
3791 return ERROR_MALFORMED;
3792 }
3793
3794 uint32_t keySize;
3795 if (!mDataSource->getUInt32(keyOffset, &keySize)
3796 || keySize < 8
3797 || keyOffset + keySize > stopOffset) {
3798 return ERROR_MALFORMED;
3799 }
3800
3801 uint32_t type;
3802 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3803 || type != FOURCC("mdta")) {
3804 return ERROR_MALFORMED;
3805 }
3806
3807 keySize -= 8;
3808 keyOffset += 8;
3809
3810 auto keyData = heapbuffer<uint8_t>(keySize);
3811 if (keyData.get() == NULL) {
3812 return ERROR_MALFORMED;
3813 }
3814 if (mDataSource->readAt(
3815 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3816 return ERROR_MALFORMED;
3817 }
3818
3819 AString key((const char *)keyData.get(), keySize);
3820 mMetaKeyMap.add(i, key);
3821
3822 keyOffset += keySize;
3823 }
3824 return OK;
3825 }
3826
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3827 status_t MPEG4Extractor::parseQTMetaVal(
3828 int32_t keyId, off64_t offset, size_t size) {
3829 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3830 if (index < 0) {
3831 // corresponding key is not present, ignore
3832 return ERROR_MALFORMED;
3833 }
3834
3835 if (size <= 16) {
3836 return ERROR_MALFORMED;
3837 }
3838 uint32_t dataSize;
3839 if (!mDataSource->getUInt32(offset, &dataSize)
3840 || dataSize > size || dataSize <= 16) {
3841 return ERROR_MALFORMED;
3842 }
3843 uint32_t atomFourCC;
3844 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3845 || atomFourCC != FOURCC("data")) {
3846 return ERROR_MALFORMED;
3847 }
3848 uint32_t dataType;
3849 if (!mDataSource->getUInt32(offset + 8, &dataType)
3850 || ((dataType & 0xff000000) != 0)) {
3851 // not well-known type
3852 return ERROR_MALFORMED;
3853 }
3854
3855 dataSize -= 16;
3856 offset += 16;
3857
3858 if (dataType == 23 && dataSize >= 4) {
3859 // BE Float32
3860 uint32_t val;
3861 if (!mDataSource->getUInt32(offset, &val)) {
3862 return ERROR_MALFORMED;
3863 }
3864 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3865 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3866 }
3867 } else if (dataType == 67 && dataSize >= 4) {
3868 // BE signed int32
3869 uint32_t val;
3870 if (!mDataSource->getUInt32(offset, &val)) {
3871 return ERROR_MALFORMED;
3872 }
3873 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3874 AMediaFormat_setInt32(mFileMetaData,
3875 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3876 }
3877 } else {
3878 // add more keys if needed
3879 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3880 }
3881
3882 return OK;
3883 }
3884
parseTrackHeader(off64_t data_offset,off64_t data_size)3885 status_t MPEG4Extractor::parseTrackHeader(
3886 off64_t data_offset, off64_t data_size) {
3887 if (data_size < 4) {
3888 return ERROR_MALFORMED;
3889 }
3890
3891 uint8_t version;
3892 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3893 return ERROR_IO;
3894 }
3895
3896 size_t dynSize = (version == 1) ? 36 : 24;
3897
3898 uint8_t buffer[36 + 60];
3899
3900 if (data_size != (off64_t)dynSize + 60) {
3901 return ERROR_MALFORMED;
3902 }
3903
3904 if (mDataSource->readAt(
3905 data_offset, buffer, data_size) < (ssize_t)data_size) {
3906 return ERROR_IO;
3907 }
3908
3909 int32_t id;
3910
3911 if (version == 1) {
3912 // we can get ctime value from U64_AT(&buffer[4])
3913 // we can get mtime value from U64_AT(&buffer[12])
3914 id = U32_AT(&buffer[20]);
3915 // we can get duration value from U64_AT(&buffer[28])
3916 } else if (version == 0) {
3917 // we can get ctime value from U32_AT(&buffer[4])
3918 // we can get mtime value from U32_AT(&buffer[8])
3919 id = U32_AT(&buffer[12]);
3920 // we can get duration value from U32_AT(&buffer[20])
3921 } else {
3922 return ERROR_UNSUPPORTED;
3923 }
3924
3925 if (mLastTrack == NULL)
3926 return ERROR_MALFORMED;
3927
3928 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3929
3930 size_t matrixOffset = dynSize + 16;
3931 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3932 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3933 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3934 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3935
3936 #if 0
3937 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3938 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3939
3940 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3941 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3942 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3943 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3944 #endif
3945
3946 uint32_t rotationDegrees;
3947
3948 static const int32_t kFixedOne = 0x10000;
3949 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3950 // Identity, no rotation
3951 rotationDegrees = 0;
3952 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3953 rotationDegrees = 90;
3954 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3955 rotationDegrees = 270;
3956 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3957 rotationDegrees = 180;
3958 } else {
3959 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3960 rotationDegrees = 0;
3961 }
3962
3963 if (rotationDegrees != 0) {
3964 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3965 }
3966
3967 // Handle presentation display size, which could be different
3968 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3969 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3970 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3971 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3972 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3973
3974 return OK;
3975 }
3976
parseITunesMetaData(off64_t offset,size_t size)3977 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3978 if (size == 0) {
3979 return OK;
3980 }
3981
3982 if (size < 4 || size == SIZE_MAX) {
3983 return ERROR_MALFORMED;
3984 }
3985
3986 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3987 if (buffer == NULL) {
3988 return ERROR_MALFORMED;
3989 }
3990 if (mDataSource->readAt(
3991 offset, buffer, size) != (ssize_t)size) {
3992 delete[] buffer;
3993 buffer = NULL;
3994
3995 return ERROR_IO;
3996 }
3997
3998 uint32_t flags = U32_AT(buffer);
3999
4000 const char *metadataKey = nullptr;
4001 char chunk[5];
4002 MakeFourCCString(mPath[4], chunk);
4003 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
4004 switch ((int32_t)mPath[4]) {
4005 case FOURCC("\251alb"):
4006 {
4007 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
4008 break;
4009 }
4010 case FOURCC("\251ART"):
4011 {
4012 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
4013 break;
4014 }
4015 case FOURCC("aART"):
4016 {
4017 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
4018 break;
4019 }
4020 case FOURCC("\251day"):
4021 {
4022 metadataKey = AMEDIAFORMAT_KEY_YEAR;
4023 break;
4024 }
4025 case FOURCC("\251nam"):
4026 {
4027 metadataKey = AMEDIAFORMAT_KEY_TITLE;
4028 break;
4029 }
4030 case FOURCC("\251wrt"):
4031 {
4032 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4033 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4034 break;
4035 }
4036 case FOURCC("covr"):
4037 {
4038 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4039 break;
4040 }
4041 case FOURCC("gnre"):
4042 case FOURCC("\251gen"):
4043 {
4044 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4045 break;
4046 }
4047 case FOURCC("cpil"):
4048 {
4049 if (size == 9 && flags == 21) {
4050 char tmp[16];
4051 sprintf(tmp, "%d",
4052 (int)buffer[size - 1]);
4053
4054 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4055 }
4056 break;
4057 }
4058 case FOURCC("trkn"):
4059 {
4060 if (size == 16 && flags == 0) {
4061 char tmp[16];
4062 uint16_t* pTrack = (uint16_t*)&buffer[10];
4063 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4064 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4065
4066 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4067 }
4068 break;
4069 }
4070 case FOURCC("disk"):
4071 {
4072 if ((size == 14 || size == 16) && flags == 0) {
4073 char tmp[16];
4074 uint16_t* pDisc = (uint16_t*)&buffer[10];
4075 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4076 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4077
4078 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4079 }
4080 break;
4081 }
4082 case FOURCC("----"):
4083 {
4084 buffer[size] = '\0';
4085 switch (mPath[5]) {
4086 case FOURCC("mean"):
4087 mLastCommentMean = ((const char *)buffer + 4);
4088 break;
4089 case FOURCC("name"):
4090 mLastCommentName = ((const char *)buffer + 4);
4091 break;
4092 case FOURCC("data"):
4093 if (size < 8) {
4094 delete[] buffer;
4095 buffer = NULL;
4096 ALOGE("b/24346430");
4097 return ERROR_MALFORMED;
4098 }
4099 mLastCommentData = ((const char *)buffer + 8);
4100 break;
4101 }
4102
4103 // Once we have a set of mean/name/data info, go ahead and process
4104 // it to see if its something we are interested in. Whether or not
4105 // were are interested in the specific tag, make sure to clear out
4106 // the set so we can be ready to process another tuple should one
4107 // show up later in the file.
4108 if ((mLastCommentMean.length() != 0) &&
4109 (mLastCommentName.length() != 0) &&
4110 (mLastCommentData.length() != 0)) {
4111
4112 if (mLastCommentMean == "com.apple.iTunes"
4113 && mLastCommentName == "iTunSMPB") {
4114 int32_t delay, padding;
4115 if (sscanf(mLastCommentData,
4116 " %*x %x %x %*x", &delay, &padding) == 2) {
4117 if (mLastTrack == NULL) {
4118 delete[] buffer;
4119 return ERROR_MALFORMED;
4120 }
4121
4122 AMediaFormat_setInt32(mLastTrack->meta,
4123 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4124 AMediaFormat_setInt32(mLastTrack->meta,
4125 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4126 }
4127 }
4128
4129 mLastCommentMean.clear();
4130 mLastCommentName.clear();
4131 mLastCommentData.clear();
4132 }
4133 break;
4134 }
4135
4136 default:
4137 break;
4138 }
4139
4140 void *tmpData;
4141 size_t tmpDataSize;
4142 const char *s;
4143 if (size >= 8 && metadataKey &&
4144 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4145 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4146 if (!strcmp(metadataKey, "albumart")) {
4147 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4148 buffer + 8, size - 8);
4149 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4150 if (flags == 0) {
4151 // uint8_t genre code, iTunes genre codes are
4152 // the standard id3 codes, except they start
4153 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4154 // We use standard id3 numbering, so subtract 1.
4155 int genrecode = (int)buffer[size - 1];
4156 genrecode--;
4157 if (genrecode < 0) {
4158 genrecode = 255; // reserved for 'unknown genre'
4159 }
4160 char genre[10];
4161 sprintf(genre, "%d", genrecode);
4162
4163 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4164 } else if (flags == 1) {
4165 // custom genre string
4166 buffer[size] = '\0';
4167
4168 AMediaFormat_setString(mFileMetaData,
4169 metadataKey, (const char *)buffer + 8);
4170 }
4171 } else {
4172 buffer[size] = '\0';
4173
4174 AMediaFormat_setString(mFileMetaData,
4175 metadataKey, (const char *)buffer + 8);
4176 }
4177 }
4178
4179 delete[] buffer;
4180 buffer = NULL;
4181
4182 return OK;
4183 }
4184
parseColorInfo(off64_t offset,size_t size)4185 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4186 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4187 return ERROR_MALFORMED;
4188 }
4189
4190 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4191 if (buffer == NULL) {
4192 return ERROR_MALFORMED;
4193 }
4194 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4195 delete[] buffer;
4196 buffer = NULL;
4197
4198 return ERROR_IO;
4199 }
4200
4201 int32_t type = U32_AT(&buffer[0]);
4202 if ((type == FOURCC("nclx") && size >= 11)
4203 || (type == FOURCC("nclc") && size >= 10)) {
4204 // only store the first color specification
4205 int32_t existingColor;
4206 if (!AMediaFormat_getInt32(mLastTrack->meta,
4207 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4208 int32_t primaries = U16_AT(&buffer[4]);
4209 int32_t isotransfer = U16_AT(&buffer[6]);
4210 int32_t coeffs = U16_AT(&buffer[8]);
4211 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4212
4213 int32_t range = 0;
4214 int32_t standard = 0;
4215 int32_t transfer = 0;
4216 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4217 primaries, isotransfer, coeffs, fullRange,
4218 &range, &standard, &transfer);
4219
4220 if (range != 0) {
4221 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4222 }
4223 if (standard != 0) {
4224 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4225 }
4226 if (transfer != 0) {
4227 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4228 }
4229 }
4230 }
4231
4232 delete[] buffer;
4233 buffer = NULL;
4234
4235 return OK;
4236 }
4237
parsePaspBox(off64_t offset,size_t size)4238 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4239 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4240 return ERROR_MALFORMED;
4241 }
4242
4243 uint32_t data[2]; // hSpacing, vSpacing
4244 if (mDataSource->readAt(offset, data, 8) < 8) {
4245 return ERROR_IO;
4246 }
4247 uint32_t hSpacing = ntohl(data[0]);
4248 uint32_t vSpacing = ntohl(data[1]);
4249
4250 if (hSpacing != 0 && vSpacing != 0) {
4251 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4252 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4253 }
4254
4255 return OK;
4256 }
4257
parse3GPPMetaData(off64_t offset,size_t size,int depth)4258 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4259 if (size < 4 || size == SIZE_MAX) {
4260 return ERROR_MALFORMED;
4261 }
4262
4263 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4264 if (buffer == NULL) {
4265 return ERROR_MALFORMED;
4266 }
4267 if (mDataSource->readAt(
4268 offset, buffer, size) != (ssize_t)size) {
4269 delete[] buffer;
4270 buffer = NULL;
4271
4272 return ERROR_IO;
4273 }
4274
4275 const char *metadataKey = nullptr;
4276 switch (mPath[depth]) {
4277 case FOURCC("titl"):
4278 {
4279 metadataKey = "title";
4280 break;
4281 }
4282 case FOURCC("perf"):
4283 {
4284 metadataKey = "artist";
4285 break;
4286 }
4287 case FOURCC("auth"):
4288 {
4289 metadataKey = "writer";
4290 break;
4291 }
4292 case FOURCC("gnre"):
4293 {
4294 metadataKey = "genre";
4295 break;
4296 }
4297 case FOURCC("albm"):
4298 {
4299 if (buffer[size - 1] != '\0') {
4300 char tmp[4];
4301 sprintf(tmp, "%u", buffer[size - 1]);
4302
4303 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4304 }
4305
4306 metadataKey = "album";
4307 break;
4308 }
4309 case FOURCC("yrrc"):
4310 {
4311 if (size < 6) {
4312 delete[] buffer;
4313 buffer = NULL;
4314 ALOGE("b/62133227");
4315 android_errorWriteLog(0x534e4554, "62133227");
4316 return ERROR_MALFORMED;
4317 }
4318 char tmp[5];
4319 uint16_t year = U16_AT(&buffer[4]);
4320
4321 if (year < 10000) {
4322 sprintf(tmp, "%u", year);
4323
4324 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4325 }
4326 break;
4327 }
4328
4329 default:
4330 break;
4331 }
4332
4333 if (metadataKey) {
4334 bool isUTF8 = true; // Common case
4335 char16_t *framedata = NULL;
4336 int len16 = 0; // Number of UTF-16 characters
4337
4338 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4339 if (size < 6) {
4340 delete[] buffer;
4341 buffer = NULL;
4342 return ERROR_MALFORMED;
4343 }
4344
4345 if (size - 6 >= 4) {
4346 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4347 framedata = (char16_t *)(buffer + 6);
4348 if (0xfffe == *framedata) {
4349 // endianness marker (BOM) doesn't match host endianness
4350 for (int i = 0; i < len16; i++) {
4351 framedata[i] = bswap_16(framedata[i]);
4352 }
4353 // BOM is now swapped to 0xfeff, we will execute next block too
4354 }
4355
4356 if (0xfeff == *framedata) {
4357 // Remove the BOM
4358 framedata++;
4359 len16--;
4360 isUTF8 = false;
4361 }
4362 // else normal non-zero-length UTF-8 string
4363 // we can't handle UTF-16 without BOM as there is no other
4364 // indication of encoding.
4365 }
4366
4367 if (isUTF8) {
4368 buffer[size] = 0;
4369 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4370 } else {
4371 // Convert from UTF-16 string to UTF-8 string.
4372 String8 tmpUTF8str(framedata, len16);
4373 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.c_str());
4374 }
4375 }
4376
4377 delete[] buffer;
4378 buffer = NULL;
4379
4380 return OK;
4381 }
4382
parseID3v2MetaData(off64_t offset,uint64_t size)4383 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4384 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4385 if (buffer == NULL) {
4386 return;
4387 }
4388 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4389 delete[] buffer;
4390 buffer = NULL;
4391 return;
4392 }
4393
4394 ID3 id3(buffer, size, true /* ignorev1 */);
4395 delete[] buffer;
4396
4397 if (id3.isValid()) {
4398 struct Map {
4399 const char *key;
4400 const char *tag1;
4401 const char *tag2;
4402 };
4403 static const Map kMap[] = {
4404 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4405 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4406 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4407 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4408 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4409 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4410 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4411 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4412 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4413 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4414 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4415 };
4416 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4417
4418 for (size_t i = 0; i < kNumMapEntries; ++i) {
4419 const char *ss;
4420 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4421 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4422 if (it->done()) {
4423 delete it;
4424 it = new ID3::Iterator(id3, kMap[i].tag2);
4425 }
4426
4427 if (it->done()) {
4428 delete it;
4429 continue;
4430 }
4431
4432 String8 s;
4433 it->getString(&s);
4434 delete it;
4435
4436 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4437 }
4438 }
4439
4440 size_t dataSize;
4441 String8 mime;
4442 const void *data = id3.getAlbumArt(&dataSize, &mime);
4443
4444 if (data) {
4445 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4446 }
4447 }
4448 }
4449
getTrack(size_t index)4450 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4451 status_t err;
4452 if ((err = readMetaData()) != OK) {
4453 return NULL;
4454 }
4455
4456 Track *track = mFirstTrack;
4457 while (index > 0) {
4458 if (track == NULL) {
4459 return NULL;
4460 }
4461
4462 track = track->next;
4463 --index;
4464 }
4465
4466 if (track == NULL) {
4467 return NULL;
4468 }
4469
4470
4471 Trex *trex = NULL;
4472 int32_t trackId;
4473 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4474 for (size_t i = 0; i < mTrex.size(); i++) {
4475 Trex *t = &mTrex.editItemAt(i);
4476 if (t->track_ID == (uint32_t) trackId) {
4477 trex = t;
4478 break;
4479 }
4480 }
4481 } else {
4482 ALOGE("b/21657957");
4483 return NULL;
4484 }
4485
4486 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4487
4488 const char *mime;
4489 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4490 return NULL;
4491 }
4492 sp<ItemTable> itemTable;
4493 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4494 void *data;
4495 size_t size;
4496 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4497 return NULL;
4498 }
4499
4500 const uint8_t *ptr = (const uint8_t *)data;
4501
4502 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4503 return NULL;
4504 }
4505 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4506 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4507 void *data;
4508 size_t size;
4509 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4510 return NULL;
4511 }
4512
4513 const uint8_t *ptr = (const uint8_t *)data;
4514
4515 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4516 return NULL;
4517 }
4518 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4519 itemTable = mItemTable;
4520 }
4521 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4522 void *data;
4523 size_t size;
4524 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)
4525 || size != 24) {
4526 return NULL;
4527 }
4528
4529 const uint8_t *ptr = (const uint8_t *)data;
4530 // dv_major.dv_minor Should be 1.0 or 2.1
4531 if ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)) {
4532 return NULL;
4533 }
4534 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4535 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4536 void *data;
4537 size_t size;
4538 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4539 return NULL;
4540 }
4541
4542 const uint8_t *ptr = (const uint8_t *)data;
4543
4544 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4545 return NULL;
4546 }
4547 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4548 itemTable = mItemTable;
4549 }
4550 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4551 void *data;
4552 size_t size;
4553 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4554 return NULL;
4555 }
4556
4557 const uint8_t *ptr = (const uint8_t *)data;
4558
4559 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4560 return NULL;
4561 }
4562 }
4563
4564 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4565
4566 uint64_t elst_initial_empty_edit_ticks = 0;
4567 if (mHeaderTimescale != 0) {
4568 // Convert empty_edit_ticks from movie timescale to media timescale.
4569 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4570 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4571 &elst_initial_empty_edit_ticks_mul) ||
4572 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4573 &elst_initial_empty_edit_ticks_add)) {
4574 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4575 return nullptr;
4576 }
4577 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4578 }
4579 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4580 elst_initial_empty_edit_ticks);
4581
4582 MPEG4Source* source =
4583 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4584 mSidxEntries, trex, mMoofOffset, itemTable,
4585 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4586 if (source->init() != OK) {
4587 delete source;
4588 return NULL;
4589 }
4590 return source;
4591 }
4592
4593 // static
verifyTrack(Track * track)4594 status_t MPEG4Extractor::verifyTrack(Track *track) {
4595 const char *mime;
4596 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4597
4598 void *data;
4599 size_t size;
4600 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4601 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4602 return ERROR_MALFORMED;
4603 }
4604 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4605 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4606 return ERROR_MALFORMED;
4607 }
4608 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4609 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4610 return ERROR_MALFORMED;
4611 }
4612 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4613 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4614 return ERROR_MALFORMED;
4615 }
4616 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4617 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4618 return ERROR_MALFORMED;
4619 }
4620 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4621 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4622 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4623 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4624 return ERROR_MALFORMED;
4625 }
4626 }
4627
4628 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4629 // Make sure we have all the metadata we need.
4630 ALOGE("stbl atom missing/invalid.");
4631 return ERROR_MALFORMED;
4632 }
4633
4634 if (track->timescale == 0) {
4635 ALOGE("timescale invalid.");
4636 return ERROR_MALFORMED;
4637 }
4638
4639 return OK;
4640 }
4641
4642 typedef enum {
4643 //AOT_NONE = -1,
4644 //AOT_NULL_OBJECT = 0,
4645 //AOT_AAC_MAIN = 1, /**< Main profile */
4646 AOT_AAC_LC = 2, /**< Low Complexity object */
4647 //AOT_AAC_SSR = 3,
4648 //AOT_AAC_LTP = 4,
4649 AOT_SBR = 5,
4650 //AOT_AAC_SCAL = 6,
4651 //AOT_TWIN_VQ = 7,
4652 //AOT_CELP = 8,
4653 //AOT_HVXC = 9,
4654 //AOT_RSVD_10 = 10, /**< (reserved) */
4655 //AOT_RSVD_11 = 11, /**< (reserved) */
4656 //AOT_TTSI = 12, /**< TTSI Object */
4657 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4658 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4659 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4660 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4661 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4662 //AOT_RSVD_18 = 18, /**< (reserved) */
4663 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4664 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4665 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4666 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4667 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4668 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4669 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4670 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4671 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4672 //AOT_RSVD_28 = 28, /**< might become SSC */
4673 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4674 //AOT_MPEGS = 30, /**< MPEG Surround */
4675
4676 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4677
4678 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4679 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4680 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4681 //AOT_RSVD_35 = 35, /**< might become DST */
4682 //AOT_RSVD_36 = 36, /**< might become ALS */
4683 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4684 //AOT_SLS = 38, /**< SLS */
4685 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4686
4687 AOT_USAC = 42, /**< USAC */
4688 //AOT_SAOC = 43, /**< SAOC */
4689 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4690
4691 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4692 } AUDIO_OBJECT_TYPE;
4693
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4694 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4695 const void *esds_data, size_t esds_size) {
4696 ESDS esds(esds_data, esds_size);
4697
4698 uint8_t objectTypeIndication;
4699 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4700 return ERROR_MALFORMED;
4701 }
4702
4703 if (objectTypeIndication == 0xe1) {
4704 // This isn't MPEG4 audio at all, it's QCELP 14k...
4705 if (mLastTrack == NULL)
4706 return ERROR_MALFORMED;
4707
4708 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4709 return OK;
4710 }
4711
4712 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4713 // mp3 audio
4714 if (mLastTrack == NULL)
4715 return ERROR_MALFORMED;
4716
4717 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4718 return OK;
4719 }
4720
4721 if (mLastTrack != NULL) {
4722 uint32_t maxBitrate = 0;
4723 uint32_t avgBitrate = 0;
4724 esds.getBitRate(&maxBitrate, &avgBitrate);
4725 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4726 AMediaFormat_setInt32(mLastTrack->meta,
4727 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4728 }
4729 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4730 AMediaFormat_setInt32(mLastTrack->meta,
4731 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4732 }
4733 }
4734
4735 const uint8_t *csd;
4736 size_t csd_size;
4737 if (esds.getCodecSpecificInfo(
4738 (const void **)&csd, &csd_size) != OK) {
4739 return ERROR_MALFORMED;
4740 }
4741
4742 if (kUseHexDump) {
4743 printf("ESD of size %zu\n", csd_size);
4744 hexdump(csd, csd_size);
4745 }
4746
4747 if (csd_size == 0) {
4748 // There's no further information, i.e. no codec specific data
4749 // Let's assume that the information provided in the mpeg4 headers
4750 // is accurate and hope for the best.
4751
4752 return OK;
4753 }
4754
4755 if (csd_size < 2) {
4756 return ERROR_MALFORMED;
4757 }
4758
4759 if (objectTypeIndication == 0xdd) {
4760 // vorbis audio
4761 if (csd[0] != 0x02) {
4762 return ERROR_MALFORMED;
4763 }
4764
4765 // codecInfo starts with two lengths, len1 and len2, that are
4766 // "Xiph-style-lacing encoded"..
4767
4768 size_t offset = 1;
4769 size_t len1 = 0;
4770 while (offset < csd_size && csd[offset] == 0xff) {
4771 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4772 return ERROR_MALFORMED;
4773 }
4774 ++offset;
4775 }
4776 if (offset >= csd_size) {
4777 return ERROR_MALFORMED;
4778 }
4779 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4780 return ERROR_MALFORMED;
4781 }
4782 ++offset;
4783 if (len1 == 0) {
4784 return ERROR_MALFORMED;
4785 }
4786
4787 size_t len2 = 0;
4788 while (offset < csd_size && csd[offset] == 0xff) {
4789 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4790 return ERROR_MALFORMED;
4791 }
4792 ++offset;
4793 }
4794 if (offset >= csd_size) {
4795 return ERROR_MALFORMED;
4796 }
4797 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4798 return ERROR_MALFORMED;
4799 }
4800 ++offset;
4801 if (len2 == 0) {
4802 return ERROR_MALFORMED;
4803 }
4804 if (offset + len1 > csd_size || csd[offset] != 0x01) {
4805 return ERROR_MALFORMED;
4806 }
4807
4808 if (mLastTrack == NULL) {
4809 return ERROR_MALFORMED;
4810 }
4811 // formerly kKeyVorbisInfo
4812 AMediaFormat_setBuffer(mLastTrack->meta,
4813 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4814
4815 if (__builtin_add_overflow(offset, len1, &offset) ||
4816 offset >= csd_size || csd[offset] != 0x03) {
4817 return ERROR_MALFORMED;
4818 }
4819
4820 if (__builtin_add_overflow(offset, len2, &offset) ||
4821 offset >= csd_size || csd[offset] != 0x05) {
4822 return ERROR_MALFORMED;
4823 }
4824
4825 // formerly kKeyVorbisBooks
4826 AMediaFormat_setBuffer(mLastTrack->meta,
4827 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4828 AMediaFormat_setString(mLastTrack->meta,
4829 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4830
4831 return OK;
4832 }
4833
4834 static uint32_t kSamplingRate[] = {
4835 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4836 16000, 12000, 11025, 8000, 7350
4837 };
4838
4839 ABitReader br(csd, csd_size);
4840 uint32_t objectType = br.getBits(5);
4841
4842 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4843 objectType = 32 + br.getBits(6);
4844 }
4845
4846 if (mLastTrack == NULL)
4847 return ERROR_MALFORMED;
4848
4849 //keep AOT type
4850 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4851
4852 uint32_t freqIndex = br.getBits(4);
4853
4854 int32_t sampleRate = 0;
4855 int32_t numChannels = 0;
4856 if (freqIndex == 15) {
4857 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4858 sampleRate = br.getBits(24);
4859 numChannels = br.getBits(4);
4860 } else {
4861 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4862 numChannels = br.getBits(4);
4863
4864 if (freqIndex == 13 || freqIndex == 14) {
4865 return ERROR_MALFORMED;
4866 }
4867
4868 sampleRate = kSamplingRate[freqIndex];
4869 }
4870
4871 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4872 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4873 uint32_t extFreqIndex = br.getBits(4);
4874 if (extFreqIndex == 15) {
4875 if (csd_size < 8) {
4876 return ERROR_MALFORMED;
4877 }
4878 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4879 br.skipBits(24); // extSampleRate
4880 } else {
4881 if (extFreqIndex == 13 || extFreqIndex == 14) {
4882 return ERROR_MALFORMED;
4883 }
4884 //extSampleRate = kSamplingRate[extFreqIndex];
4885 }
4886 //TODO: save the extension sampling rate value in meta data =>
4887 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4888 }
4889
4890 switch (numChannels) {
4891 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4892 case 0:
4893 case 1:// FC
4894 case 2:// FL FR
4895 case 3:// FC, FL FR
4896 case 4:// FC, FL FR, RC
4897 case 5:// FC, FL FR, SL SR
4898 case 6:// FC, FL FR, SL SR, LFE
4899 //numChannels already contains the right value
4900 break;
4901 case 11:// FC, FL FR, SL SR, RC, LFE
4902 numChannels = 7;
4903 break;
4904 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4905 case 12:// FC, FL FR, SL SR, RL RR, LFE
4906 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4907 numChannels = 8;
4908 break;
4909 default:
4910 return ERROR_UNSUPPORTED;
4911 }
4912
4913 {
4914 if (objectType == AOT_SBR || objectType == AOT_PS) {
4915 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4916 objectType = br.getBits(5);
4917
4918 if (objectType == AOT_ESCAPE) {
4919 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4920 objectType = 32 + br.getBits(6);
4921 }
4922 }
4923 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4924 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4925 objectType == AOT_ER_BSAC) {
4926 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4927 br.skipBits(1); // frameLengthFlag
4928
4929 const int32_t dependsOnCoreCoder = br.getBits(1);
4930
4931 if (dependsOnCoreCoder ) {
4932 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4933 br.skipBits(14); // coreCoderDelay
4934 }
4935
4936 int32_t extensionFlag = -1;
4937 if (br.numBitsLeft() > 0) {
4938 extensionFlag = br.getBits(1);
4939 } else {
4940 switch (objectType) {
4941 // 14496-3 4.5.1.1 extensionFlag
4942 case AOT_AAC_LC:
4943 extensionFlag = 0;
4944 break;
4945 case AOT_ER_AAC_LC:
4946 case AOT_ER_AAC_SCAL:
4947 case AOT_ER_BSAC:
4948 case AOT_ER_AAC_LD:
4949 extensionFlag = 1;
4950 break;
4951 default:
4952 return ERROR_MALFORMED;
4953 break;
4954 }
4955 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4956 extensionFlag, objectType);
4957 }
4958
4959 if (numChannels == 0) {
4960 int32_t channelsEffectiveNum = 0;
4961 int32_t channelsNum = 0;
4962 if (br.numBitsLeft() < 32) {
4963 return ERROR_MALFORMED;
4964 }
4965 br.skipBits(4); // ElementInstanceTag
4966 br.skipBits(2); // Profile
4967 br.skipBits(4); // SamplingFrequencyIndex
4968 const int32_t NumFrontChannelElements = br.getBits(4);
4969 const int32_t NumSideChannelElements = br.getBits(4);
4970 const int32_t NumBackChannelElements = br.getBits(4);
4971 const int32_t NumLfeChannelElements = br.getBits(2);
4972 br.skipBits(3); // NumAssocDataElements
4973 br.skipBits(4); // NumValidCcElements
4974
4975 const int32_t MonoMixdownPresent = br.getBits(1);
4976
4977 if (MonoMixdownPresent != 0) {
4978 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4979 br.skipBits(4); // MonoMixdownElementNumber
4980 }
4981
4982 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4983 const int32_t StereoMixdownPresent = br.getBits(1);
4984 if (StereoMixdownPresent != 0) {
4985 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4986 br.skipBits(4); // StereoMixdownElementNumber
4987 }
4988
4989 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4990 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4991 if (MatrixMixdownIndexPresent != 0) {
4992 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4993 br.skipBits(2); // MatrixMixdownIndex
4994 br.skipBits(1); // PseudoSurroundEnable
4995 }
4996
4997 int i;
4998 for (i=0; i < NumFrontChannelElements; i++) {
4999 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5000 const int32_t FrontElementIsCpe = br.getBits(1);
5001 br.skipBits(4); // FrontElementTagSelect
5002 channelsNum += FrontElementIsCpe ? 2 : 1;
5003 }
5004
5005 for (i=0; i < NumSideChannelElements; i++) {
5006 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5007 const int32_t SideElementIsCpe = br.getBits(1);
5008 br.skipBits(4); // SideElementTagSelect
5009 channelsNum += SideElementIsCpe ? 2 : 1;
5010 }
5011
5012 for (i=0; i < NumBackChannelElements; i++) {
5013 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5014 const int32_t BackElementIsCpe = br.getBits(1);
5015 br.skipBits(4); // BackElementTagSelect
5016 channelsNum += BackElementIsCpe ? 2 : 1;
5017 }
5018 channelsEffectiveNum = channelsNum;
5019
5020 for (i=0; i < NumLfeChannelElements; i++) {
5021 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5022 br.skipBits(4); // LfeElementTagSelect
5023 channelsNum += 1;
5024 }
5025 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
5026 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
5027 numChannels = channelsNum;
5028 }
5029 }
5030 }
5031
5032 if (numChannels == 0) {
5033 return ERROR_UNSUPPORTED;
5034 }
5035
5036 if (mLastTrack == NULL)
5037 return ERROR_MALFORMED;
5038
5039 int32_t prevSampleRate;
5040 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5041
5042 if (prevSampleRate != sampleRate) {
5043 ALOGV("mpeg4 audio sample rate different from previous setting. "
5044 "was: %d, now: %d", prevSampleRate, sampleRate);
5045 }
5046
5047 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5048
5049 int32_t prevChannelCount;
5050 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5051 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5052
5053 if (prevChannelCount != numChannels) {
5054 ALOGV("mpeg4 audio channel count different from previous setting. "
5055 "was: %d, now: %d", prevChannelCount, numChannels);
5056 }
5057
5058 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5059
5060 return OK;
5061 }
5062
adjustRawDefaultFrameSize()5063 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5064 int32_t chanCount = 0;
5065 int32_t bitWidth = 0;
5066 const char *mimeStr = NULL;
5067
5068 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5069 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5070 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5071 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5072 // samplesize in stsz may not right , so updade default samplesize
5073 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5074 }
5075 }
5076
5077 ////////////////////////////////////////////////////////////////////////////////
5078
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5079 MPEG4Source::MPEG4Source(
5080 AMediaFormat *format,
5081 DataSourceHelper *dataSource,
5082 int32_t timeScale,
5083 const sp<SampleTable> &sampleTable,
5084 Vector<SidxEntry> &sidx,
5085 const Trex *trex,
5086 off64_t firstMoofOffset,
5087 const sp<ItemTable> &itemTable,
5088 uint64_t elstShiftStartTicks,
5089 uint64_t elstInitialEmptyEditTicks)
5090 : mFormat(format),
5091 mDataSource(dataSource),
5092 mTimescale(timeScale),
5093 mSampleTable(sampleTable),
5094 mCurrentSampleIndex(0),
5095 mCurrentFragmentIndex(0),
5096 mSegments(sidx),
5097 mTrex(trex),
5098 mFirstMoofOffset(firstMoofOffset),
5099 mCurrentMoofOffset(firstMoofOffset),
5100 mCurrentMoofSize(0),
5101 mNextMoofOffset(-1),
5102 mCurrentTime(0),
5103 mDefaultEncryptedByteBlock(0),
5104 mDefaultSkipByteBlock(0),
5105 mCurrentSampleInfoAllocSize(0),
5106 mCurrentSampleInfoSizes(NULL),
5107 mCurrentSampleInfoOffsetsAllocSize(0),
5108 mCurrentSampleInfoOffsets(NULL),
5109 mIsAVC(false),
5110 mIsHEVC(false),
5111 mIsDolbyVision(false),
5112 mIsAC4(false),
5113 mIsPcm(false),
5114 mNALLengthSize(0),
5115 mStarted(false),
5116 mBuffer(NULL),
5117 mSrcBufferSize(0),
5118 mSrcBuffer(NULL),
5119 mItemTable(itemTable),
5120 mElstShiftStartTicks(elstShiftStartTicks),
5121 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5122
5123 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5124
5125 AMediaFormat_getInt32(mFormat,
5126 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5127 mDefaultIVSize = 0;
5128 AMediaFormat_getInt32(mFormat,
5129 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5130 void *key;
5131 size_t keysize;
5132 if (AMediaFormat_getBuffer(mFormat,
5133 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5134 CHECK(keysize <= 16);
5135 memset(mCryptoKey, 0, 16);
5136 memcpy(mCryptoKey, key, keysize);
5137 }
5138
5139 AMediaFormat_getInt32(mFormat,
5140 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5141 AMediaFormat_getInt32(mFormat,
5142 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5143
5144 const char *mime;
5145 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5146 CHECK(success);
5147
5148 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5149 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5150 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5151 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5152 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5153 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5154 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5155 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5156 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5157
5158 if (mIsAVC) {
5159 void *data;
5160 size_t size;
5161 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5162
5163 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5164 } else if (mIsHEVC) {
5165 void *data;
5166 size_t size;
5167 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5168
5169 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5170 } else if (mIsDolbyVision) {
5171 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5172 void *data;
5173 size_t size;
5174 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5175
5176 const uint8_t *ptr = (const uint8_t *)data;
5177
5178 CHECK(size == 24);
5179
5180 // dv_major.dv_minor Should be 1.0 or 2.1
5181 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5182
5183 const uint8_t profile = ptr[2] >> 1;
5184 // profile == (4,5,6,7,8) --> HEVC; profile == (9) --> AVC; profile == (10) --> AV1
5185 if (profile > 3 && profile < 9) {
5186 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5187
5188 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5189 } else if (9 == profile) {
5190 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5191
5192 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5193 } else if (10 == profile) {
5194 /* AV1 profile nothing to do */
5195 } else {
5196 if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
5197 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5198 } else if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
5199 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5200 } else {
5201 LOG_ALWAYS_FATAL("Invalid Dolby Vision profile = %d", profile);
5202 }
5203 }
5204 }
5205
5206 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5207 mIsAudio = !strncasecmp(mime, "audio/", 6);
5208
5209 int32_t aacObjectType = -1;
5210
5211 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5212 mIsUsac = (aacObjectType == AOT_USAC);
5213 }
5214
5215 if (mIsPcm) {
5216 int32_t numChannels = 0;
5217 int32_t bitsPerSample = 0;
5218 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5219 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5220
5221 int32_t bytesPerSample = bitsPerSample >> 3;
5222 int32_t pcmSampleSize = bytesPerSample * numChannels;
5223
5224 size_t maxSampleSize;
5225 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5226 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5227 || bitsPerSample != 16) {
5228 // Not supported
5229 mIsPcm = false;
5230 } else {
5231 AMediaFormat_setInt32(mFormat,
5232 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5233 }
5234 }
5235
5236 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5237 }
5238
init()5239 status_t MPEG4Source::init() {
5240 if (mFirstMoofOffset != 0) {
5241 off64_t offset = mFirstMoofOffset;
5242 return parseChunk(&offset);
5243 }
5244 return OK;
5245 }
5246
~MPEG4Source()5247 MPEG4Source::~MPEG4Source() {
5248 if (mStarted) {
5249 stop();
5250 }
5251 free(mCurrentSampleInfoSizes);
5252 free(mCurrentSampleInfoOffsets);
5253 }
5254
start()5255 media_status_t MPEG4Source::start() {
5256 Mutex::Autolock autoLock(mLock);
5257
5258 CHECK(!mStarted);
5259
5260 int32_t tmp;
5261 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5262 size_t max_size = tmp;
5263
5264 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5265 // If you see the message below for a valid input stream: increase the limit
5266 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5267 if (max_size > kMaxBufferSize) {
5268 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5269 return AMEDIA_ERROR_MALFORMED;
5270 }
5271 if (max_size == 0) {
5272 ALOGE("zero max input size");
5273 return AMEDIA_ERROR_MALFORMED;
5274 }
5275
5276 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5277 const size_t kInitialBuffers = 2;
5278 const size_t kMaxBuffers = 8;
5279 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5280 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5281 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5282 if (mSrcBuffer == NULL) {
5283 // file probably specified a bad max size
5284 return AMEDIA_ERROR_MALFORMED;
5285 }
5286 mSrcBufferSize = max_size;
5287
5288 mStarted = true;
5289
5290 return AMEDIA_OK;
5291 }
5292
stop()5293 media_status_t MPEG4Source::stop() {
5294 Mutex::Autolock autoLock(mLock);
5295
5296 CHECK(mStarted);
5297
5298 if (mBuffer != NULL) {
5299 mBuffer->release();
5300 mBuffer = NULL;
5301 }
5302
5303 mSrcBufferSize = 0;
5304 delete[] mSrcBuffer;
5305 mSrcBuffer = NULL;
5306
5307 mStarted = false;
5308 mCurrentSampleIndex = 0;
5309
5310 return AMEDIA_OK;
5311 }
5312
parseChunk(off64_t * offset)5313 status_t MPEG4Source::parseChunk(off64_t *offset) {
5314 uint32_t hdr[2];
5315 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5316 return ERROR_IO;
5317 }
5318 uint64_t chunk_size = ntohl(hdr[0]);
5319 uint32_t chunk_type = ntohl(hdr[1]);
5320 off64_t data_offset = *offset + 8;
5321
5322 if (chunk_size == 1) {
5323 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5324 return ERROR_IO;
5325 }
5326 chunk_size = ntoh64(chunk_size);
5327 data_offset += 8;
5328
5329 if (chunk_size < 16) {
5330 // The smallest valid chunk is 16 bytes long in this case.
5331 return ERROR_MALFORMED;
5332 }
5333 } else if (chunk_size < 8) {
5334 // The smallest valid chunk is 8 bytes long.
5335 return ERROR_MALFORMED;
5336 }
5337
5338 char chunk[5];
5339 MakeFourCCString(chunk_type, chunk);
5340 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5341
5342 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5343
5344 switch(chunk_type) {
5345
5346 case FOURCC("traf"):
5347 case FOURCC("moof"): {
5348 off64_t stop_offset = *offset + chunk_size;
5349 *offset = data_offset;
5350 if (chunk_type == FOURCC("moof")) {
5351 mCurrentMoofSize = chunk_data_size;
5352 }
5353 while (*offset < stop_offset) {
5354 status_t err = parseChunk(offset);
5355 if (err != OK) {
5356 return err;
5357 }
5358 }
5359 if (chunk_type == FOURCC("moof")) {
5360 // *offset points to the box following this moof. Find the next moof from there.
5361
5362 while (true) {
5363 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5364 // no more box to the end of file.
5365 break;
5366 }
5367 chunk_size = ntohl(hdr[0]);
5368 chunk_type = ntohl(hdr[1]);
5369 if (chunk_size == 1) {
5370 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5371 // which is defined in 4.2 Object Structure.
5372 // When chunk_size==1, 8 bytes follows as "largesize".
5373 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5374 return ERROR_IO;
5375 }
5376 chunk_size = ntoh64(chunk_size);
5377 if (chunk_size < 16) {
5378 // The smallest valid chunk is 16 bytes long in this case.
5379 return ERROR_MALFORMED;
5380 }
5381 } else if (chunk_size == 0) {
5382 // next box extends to end of file.
5383 } else if (chunk_size < 8) {
5384 // The smallest valid chunk is 8 bytes long in this case.
5385 return ERROR_MALFORMED;
5386 }
5387
5388 if (chunk_type == FOURCC("moof")) {
5389 mNextMoofOffset = *offset;
5390 break;
5391 } else if (chunk_type == FOURCC("mdat")) {
5392 parseChunk(offset);
5393 continue;
5394 } else if (chunk_size == 0) {
5395 break;
5396 }
5397 *offset += chunk_size;
5398 }
5399 }
5400 break;
5401 }
5402
5403 case FOURCC("tfhd"): {
5404 status_t err;
5405 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5406 return err;
5407 }
5408 *offset += chunk_size;
5409 break;
5410 }
5411
5412 case FOURCC("trun"): {
5413 status_t err;
5414 if (mLastParsedTrackId == mTrackId) {
5415 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5416 return err;
5417 }
5418 }
5419
5420 *offset += chunk_size;
5421 break;
5422 }
5423
5424 case FOURCC("saiz"): {
5425 status_t err;
5426 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5427 return err;
5428 }
5429 *offset += chunk_size;
5430 break;
5431 }
5432 case FOURCC("saio"): {
5433 status_t err;
5434 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5435 != OK) {
5436 return err;
5437 }
5438 *offset += chunk_size;
5439 break;
5440 }
5441
5442 case FOURCC("senc"): {
5443 status_t err;
5444 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5445 return err;
5446 }
5447 *offset += chunk_size;
5448 break;
5449 }
5450
5451 case FOURCC("mdat"): {
5452 // parse DRM info if present
5453 ALOGV("MPEG4Source::parseChunk mdat");
5454 // if saiz/saoi was previously observed, do something with the sampleinfos
5455 status_t err = OK;
5456 auto kv = mDrmOffsets.lower_bound(*offset);
5457 if (kv != mDrmOffsets.end()) {
5458 auto drmoffset = kv->first;
5459 auto flags = kv->second;
5460 mDrmOffsets.erase(kv);
5461 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5462 chunk_size, drmoffset, *offset);
5463 if (chunk_size >= drmoffset - *offset) {
5464 err = parseClearEncryptedSizes(drmoffset, false, flags,
5465 chunk_size - (drmoffset - *offset));
5466 }
5467 }
5468 if (err != OK) {
5469 return err;
5470 }
5471 *offset += chunk_size;
5472 break;
5473 }
5474
5475 default: {
5476 *offset += chunk_size;
5477 break;
5478 }
5479 }
5480 return OK;
5481 }
5482
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5483 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5484 off64_t offset, off64_t size) {
5485 ALOGV("parseSampleAuxiliaryInformationSizes");
5486 if (size < 9) {
5487 return -EINVAL;
5488 }
5489 // 14496-12 8.7.12
5490 uint8_t version;
5491 if (mDataSource->readAt(
5492 offset, &version, sizeof(version))
5493 < (ssize_t)sizeof(version)) {
5494 return ERROR_IO;
5495 }
5496
5497 if (version != 0) {
5498 return ERROR_UNSUPPORTED;
5499 }
5500 offset++;
5501 size--;
5502
5503 uint32_t flags;
5504 if (!mDataSource->getUInt24(offset, &flags)) {
5505 return ERROR_IO;
5506 }
5507 offset += 3;
5508 size -= 3;
5509
5510 if (flags & 1) {
5511 if (size < 13) {
5512 return -EINVAL;
5513 }
5514 uint32_t tmp;
5515 if (!mDataSource->getUInt32(offset, &tmp)) {
5516 return ERROR_MALFORMED;
5517 }
5518 mCurrentAuxInfoType = tmp;
5519 offset += 4;
5520 size -= 4;
5521 if (!mDataSource->getUInt32(offset, &tmp)) {
5522 return ERROR_MALFORMED;
5523 }
5524 mCurrentAuxInfoTypeParameter = tmp;
5525 offset += 4;
5526 size -= 4;
5527 }
5528
5529 uint8_t defsize;
5530 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5531 return ERROR_MALFORMED;
5532 }
5533 mCurrentDefaultSampleInfoSize = defsize;
5534 offset++;
5535 size--;
5536
5537 uint32_t smplcnt;
5538 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5539 return ERROR_MALFORMED;
5540 }
5541 mCurrentSampleInfoCount = smplcnt;
5542 offset += 4;
5543 size -= 4;
5544 if (mCurrentDefaultSampleInfoSize != 0) {
5545 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5546 return OK;
5547 }
5548 if(smplcnt > size) {
5549 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5550 android_errorWriteLog(0x534e4554, "124525515");
5551 return -EINVAL;
5552 }
5553 if (smplcnt > mCurrentSampleInfoAllocSize) {
5554 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5555 if (newPtr == NULL) {
5556 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5557 return NO_MEMORY;
5558 }
5559 mCurrentSampleInfoSizes = newPtr;
5560 mCurrentSampleInfoAllocSize = smplcnt;
5561 }
5562
5563 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5564 return OK;
5565 }
5566
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5567 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5568 off64_t offset, off64_t size) {
5569 ALOGV("parseSampleAuxiliaryInformationOffsets");
5570 if (size < 8) {
5571 return -EINVAL;
5572 }
5573 // 14496-12 8.7.13
5574 uint8_t version;
5575 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5576 return ERROR_IO;
5577 }
5578 offset++;
5579 size--;
5580
5581 uint32_t flags;
5582 if (!mDataSource->getUInt24(offset, &flags)) {
5583 return ERROR_IO;
5584 }
5585 offset += 3;
5586 size -= 3;
5587
5588 uint32_t entrycount;
5589 if (!mDataSource->getUInt32(offset, &entrycount)) {
5590 return ERROR_IO;
5591 }
5592 offset += 4;
5593 size -= 4;
5594 if (entrycount == 0) {
5595 return OK;
5596 }
5597 if (entrycount > UINT32_MAX / 8) {
5598 return ERROR_MALFORMED;
5599 }
5600
5601 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5602 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5603 if (newPtr == NULL) {
5604 ALOGE("failed to realloc %u -> %u",
5605 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5606 return NO_MEMORY;
5607 }
5608 mCurrentSampleInfoOffsets = newPtr;
5609 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5610 }
5611 mCurrentSampleInfoOffsetCount = entrycount;
5612
5613 if (mCurrentSampleInfoOffsets == NULL) {
5614 return OK;
5615 }
5616
5617 for (size_t i = 0; i < entrycount; i++) {
5618 if (version == 0) {
5619 if (size < 4) {
5620 ALOGW("b/124526959");
5621 android_errorWriteLog(0x534e4554, "124526959");
5622 return -EINVAL;
5623 }
5624 uint32_t tmp;
5625 if (!mDataSource->getUInt32(offset, &tmp)) {
5626 return ERROR_IO;
5627 }
5628 mCurrentSampleInfoOffsets[i] = tmp;
5629 offset += 4;
5630 size -= 4;
5631 } else {
5632 if (size < 8) {
5633 ALOGW("b/124526959");
5634 android_errorWriteLog(0x534e4554, "124526959");
5635 return -EINVAL;
5636 }
5637 uint64_t tmp;
5638 if (!mDataSource->getUInt64(offset, &tmp)) {
5639 return ERROR_IO;
5640 }
5641 mCurrentSampleInfoOffsets[i] = tmp;
5642 offset += 8;
5643 size -= 8;
5644 }
5645 }
5646
5647 // parse clear/encrypted data
5648
5649 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5650
5651 drmoffset += mCurrentMoofOffset;
5652 mDrmOffsets[drmoffset] = flags;
5653 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5654
5655 return OK;
5656 }
5657
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5658 status_t MPEG4Source::parseClearEncryptedSizes(
5659 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5660
5661 int32_t ivlength;
5662 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5663 return ERROR_MALFORMED;
5664 }
5665
5666 // only 0, 8 and 16 byte initialization vectors are supported
5667 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5668 ALOGW("unsupported IV length: %d", ivlength);
5669 return ERROR_MALFORMED;
5670 }
5671
5672 uint32_t sampleCount = mCurrentSampleInfoCount;
5673 if (isSampleEncryption) {
5674 if (size < 4) {
5675 return ERROR_MALFORMED;
5676 }
5677 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5678 return ERROR_IO;
5679 }
5680 offset += 4;
5681 size -= 4;
5682 }
5683
5684 // read CencSampleAuxiliaryDataFormats
5685 for (size_t i = 0; i < sampleCount; i++) {
5686 if (i >= mCurrentSamples.size()) {
5687 ALOGW("too few samples");
5688 break;
5689 }
5690 Sample *smpl = &mCurrentSamples.editItemAt(i);
5691 if (!smpl->clearsizes.isEmpty()) {
5692 continue;
5693 }
5694
5695 memset(smpl->iv, 0, 16);
5696 if (size < ivlength) {
5697 return ERROR_MALFORMED;
5698 }
5699 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5700 return ERROR_IO;
5701 }
5702
5703 offset += ivlength;
5704 size -= ivlength;
5705
5706 bool readSubsamples;
5707 if (isSampleEncryption) {
5708 readSubsamples = flags & 2;
5709 } else {
5710 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5711 if (smplinfosize == 0) {
5712 smplinfosize = mCurrentSampleInfoSizes[i];
5713 }
5714 readSubsamples = smplinfosize > ivlength;
5715 }
5716
5717 if (readSubsamples) {
5718 uint16_t numsubsamples;
5719 if (size < 2) {
5720 return ERROR_MALFORMED;
5721 }
5722 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5723 return ERROR_IO;
5724 }
5725 offset += 2;
5726 size -= 2;
5727 for (size_t j = 0; j < numsubsamples; j++) {
5728 uint16_t numclear;
5729 uint32_t numencrypted;
5730 if (size < 6) {
5731 return ERROR_MALFORMED;
5732 }
5733 if (!mDataSource->getUInt16(offset, &numclear)) {
5734 return ERROR_IO;
5735 }
5736 offset += 2;
5737 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5738 return ERROR_IO;
5739 }
5740 offset += 4;
5741 size -= 6;
5742 smpl->clearsizes.add(numclear);
5743 smpl->encryptedsizes.add(numencrypted);
5744 }
5745 } else {
5746 smpl->clearsizes.add(0);
5747 smpl->encryptedsizes.add(smpl->size);
5748 }
5749 }
5750
5751 return OK;
5752 }
5753
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5754 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5755 uint32_t flags;
5756 if (chunk_data_size < 4) {
5757 return ERROR_MALFORMED;
5758 }
5759 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5760 return ERROR_MALFORMED;
5761 }
5762 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5763 }
5764
parseTrackFragmentHeader(off64_t offset,off64_t size)5765 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5766
5767 if (size < 8) {
5768 return -EINVAL;
5769 }
5770
5771 uint32_t flags;
5772 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5773 return ERROR_MALFORMED;
5774 }
5775
5776 if (flags & 0xff000000) {
5777 return -EINVAL;
5778 }
5779
5780 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5781 return ERROR_MALFORMED;
5782 }
5783
5784 if (mLastParsedTrackId != mTrackId) {
5785 // this is not the right track, skip it
5786 return OK;
5787 }
5788
5789 mTrackFragmentHeaderInfo.mFlags = flags;
5790 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5791 offset += 8;
5792 size -= 8;
5793
5794 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5795
5796 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5797 if (size < 8) {
5798 return -EINVAL;
5799 }
5800
5801 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5802 return ERROR_MALFORMED;
5803 }
5804 offset += 8;
5805 size -= 8;
5806 }
5807
5808 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5809 if (size < 4) {
5810 return -EINVAL;
5811 }
5812
5813 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5814 return ERROR_MALFORMED;
5815 }
5816 offset += 4;
5817 size -= 4;
5818 }
5819
5820 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5821 if (size < 4) {
5822 return -EINVAL;
5823 }
5824
5825 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5826 return ERROR_MALFORMED;
5827 }
5828 offset += 4;
5829 size -= 4;
5830 }
5831
5832 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5833 if (size < 4) {
5834 return -EINVAL;
5835 }
5836
5837 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5838 return ERROR_MALFORMED;
5839 }
5840 offset += 4;
5841 size -= 4;
5842 }
5843
5844 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5845 if (size < 4) {
5846 return -EINVAL;
5847 }
5848
5849 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5850 return ERROR_MALFORMED;
5851 }
5852 offset += 4;
5853 size -= 4;
5854 }
5855
5856 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5857 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5858 }
5859
5860 mTrackFragmentHeaderInfo.mDataOffset = 0;
5861 return OK;
5862 }
5863
parseTrackFragmentRun(off64_t offset,off64_t size)5864 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5865
5866 ALOGV("MPEG4Source::parseTrackFragmentRun");
5867 if (size < 8) {
5868 return -EINVAL;
5869 }
5870
5871 enum {
5872 kDataOffsetPresent = 0x01,
5873 kFirstSampleFlagsPresent = 0x04,
5874 kSampleDurationPresent = 0x100,
5875 kSampleSizePresent = 0x200,
5876 kSampleFlagsPresent = 0x400,
5877 kSampleCompositionTimeOffsetPresent = 0x800,
5878 };
5879
5880 uint32_t flags;
5881 if (!mDataSource->getUInt32(offset, &flags)) {
5882 return ERROR_MALFORMED;
5883 }
5884 // |version| only affects SampleCompositionTimeOffset field.
5885 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5886 // Otherwise, SampleCompositionTimeOffset is int32_t.
5887 // Sample.compositionOffset is defined as int32_t.
5888 uint8_t version = flags >> 24;
5889 flags &= 0xffffff;
5890 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5891
5892 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5893 // These two shall not be used together.
5894 return -EINVAL;
5895 }
5896
5897 uint32_t sampleCount;
5898 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5899 return ERROR_MALFORMED;
5900 }
5901 offset += 8;
5902 size -= 8;
5903
5904 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5905
5906 uint32_t firstSampleFlags = 0;
5907
5908 if (flags & kDataOffsetPresent) {
5909 if (size < 4) {
5910 return -EINVAL;
5911 }
5912
5913 uint32_t dataOffsetDelta;
5914 if (!mDataSource->getUInt32(offset, &dataOffsetDelta)) {
5915 return ERROR_MALFORMED;
5916 }
5917
5918 if (__builtin_add_overflow(
5919 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta, &dataOffset)) {
5920 ALOGW("b/232242894 mBaseDataOffset(%" PRIu64 ") + dataOffsetDelta(%u) overflows uint64",
5921 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta);
5922 android_errorWriteLog(0x534e4554, "232242894");
5923 return ERROR_MALFORMED;
5924 }
5925
5926 offset += 4;
5927 size -= 4;
5928 }
5929
5930 if (flags & kFirstSampleFlagsPresent) {
5931 if (size < 4) {
5932 return -EINVAL;
5933 }
5934
5935 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5936 return ERROR_MALFORMED;
5937 }
5938 offset += 4;
5939 size -= 4;
5940 }
5941
5942 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5943 sampleCtsOffset = 0;
5944
5945 size_t bytesPerSample = 0;
5946 if (flags & kSampleDurationPresent) {
5947 bytesPerSample += 4;
5948 } else if (mTrackFragmentHeaderInfo.mFlags
5949 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5950 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5951 } else if (mTrex) {
5952 sampleDuration = mTrex->default_sample_duration;
5953 }
5954
5955 if (flags & kSampleSizePresent) {
5956 bytesPerSample += 4;
5957 } else {
5958 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5959 #ifdef VERY_VERY_VERBOSE_LOGGING
5960 // We don't expect this, but also want to avoid spamming the log if
5961 // we hit this case.
5962 if (!(mTrackFragmentHeaderInfo.mFlags
5963 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5964 ALOGW("No sample size specified");
5965 }
5966 #endif
5967 }
5968
5969 if (flags & kSampleFlagsPresent) {
5970 bytesPerSample += 4;
5971 } else {
5972 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5973 #ifdef VERY_VERY_VERBOSE_LOGGING
5974 // We don't expect this, but also want to avoid spamming the log if
5975 // we hit this case.
5976 if (!(mTrackFragmentHeaderInfo.mFlags
5977 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5978 ALOGW("No sample flags specified");
5979 }
5980 #endif
5981 }
5982
5983 if (flags & kSampleCompositionTimeOffsetPresent) {
5984 bytesPerSample += 4;
5985 } else {
5986 sampleCtsOffset = 0;
5987 }
5988
5989 if (bytesPerSample != 0) {
5990 if (size < (off64_t)sampleCount * bytesPerSample) {
5991 return -EINVAL;
5992 }
5993 } else {
5994 if (sampleDuration == 0) {
5995 ALOGW("b/123389881 sampleDuration == 0");
5996 android_errorWriteLog(0x534e4554, "124389881 zero");
5997 return -EINVAL;
5998 }
5999
6000 // apply some quick (vs strict legality) checks
6001 //
6002 static constexpr uint32_t kMaxTrunSampleCount = 10000;
6003 if (sampleCount > kMaxTrunSampleCount) {
6004 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
6005 sampleCount, kMaxTrunSampleCount);
6006 android_errorWriteLog(0x534e4554, "124389881 count");
6007 return -EINVAL;
6008 }
6009 }
6010
6011 Sample tmp;
6012 for (uint32_t i = 0; i < sampleCount; ++i) {
6013 if (flags & kSampleDurationPresent) {
6014 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
6015 return ERROR_MALFORMED;
6016 }
6017 offset += 4;
6018 }
6019
6020 if (flags & kSampleSizePresent) {
6021 if (!mDataSource->getUInt32(offset, &sampleSize)) {
6022 return ERROR_MALFORMED;
6023 }
6024 offset += 4;
6025 }
6026
6027 if (flags & kSampleFlagsPresent) {
6028 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6029 return ERROR_MALFORMED;
6030 }
6031 offset += 4;
6032 }
6033
6034 if (flags & kSampleCompositionTimeOffsetPresent) {
6035 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6036 return ERROR_MALFORMED;
6037 }
6038 offset += 4;
6039 }
6040
6041 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6042 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6043 dataOffset, sampleSize, sampleDuration,
6044 (flags & kFirstSampleFlagsPresent) && i == 0
6045 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6046 tmp.offset = dataOffset;
6047 tmp.size = sampleSize;
6048 tmp.duration = sampleDuration;
6049 tmp.compositionOffset = sampleCtsOffset;
6050 memset(tmp.iv, 0, sizeof(tmp.iv));
6051 if (mCurrentSamples.add(tmp) < 0) {
6052 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6053 android_errorWriteLog(0x534e4554, "124389881 allocation");
6054 mCurrentSamples.clear();
6055 return NO_MEMORY;
6056 }
6057
6058 if (__builtin_add_overflow(dataOffset, sampleSize, &dataOffset)) {
6059 ALOGW("b/232242894 dataOffset(%" PRIu64 ") + sampleSize(%u) overflows uint64",
6060 dataOffset, sampleSize);
6061 android_errorWriteLog(0x534e4554, "232242894");
6062 return ERROR_MALFORMED;
6063 }
6064 }
6065
6066 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6067
6068 return OK;
6069 }
6070
getFormat(AMediaFormat * meta)6071 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6072 Mutex::Autolock autoLock(mLock);
6073 AMediaFormat_copy(meta, mFormat);
6074 return AMEDIA_OK;
6075 }
6076
parseNALSize(const uint8_t * data) const6077 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6078 switch (mNALLengthSize) {
6079 case 1:
6080 return *data;
6081 case 2:
6082 return U16_AT(data);
6083 case 3:
6084 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6085 case 4:
6086 return U32_AT(data);
6087 }
6088
6089 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6090 // a 2-bit integer.
6091 CHECK(!"Should not be here.");
6092
6093 return 0;
6094 }
6095
parseHEVCLayerId(const uint8_t * data,size_t size)6096 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6097 if (data == nullptr || size < mNALLengthSize + 2) {
6098 return -1;
6099 }
6100
6101 // HEVC NAL-header (16-bit)
6102 // 1 6 6 3
6103 // |-|uuuuuu|------|iii|
6104 // ^ ^
6105 // NAL_type layer_id + 1
6106 //
6107 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6108 enum {
6109 TSA_N = 2,
6110 TSA_R = 3,
6111 STSA_N = 4,
6112 STSA_R = 5,
6113 };
6114
6115 data += mNALLengthSize;
6116 uint16_t nalHeader = data[0] << 8 | data[1];
6117
6118 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6119 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6120 int32_t layerIdPlusOne = nalHeader & 0x7u;
6121 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6122 return layerIdPlusOne - 1;
6123 }
6124 return 0;
6125 }
6126
getNALLengthSizeFromAvcCsd(const uint8_t * data,const size_t size) const6127 size_t MPEG4Source::getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const {
6128 CHECK(data != nullptr);
6129 CHECK(size >= 7);
6130 CHECK_EQ((unsigned)data[0], 1u); // configurationVersion == 1
6131
6132 // The number of bytes used to encode the length of a NAL unit.
6133 return 1 + (data[4] & 3);
6134 }
6135
getNALLengthSizeFromHevcCsd(const uint8_t * data,const size_t size) const6136 size_t MPEG4Source::getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const {
6137 CHECK(data != nullptr);
6138 CHECK(size >= 22);
6139 CHECK_EQ((unsigned)data[0], 1u); // configurationVersion == 1
6140
6141 // The number of bytes used to encode the length of a NAL unit.
6142 return 1 + (data[14 + 7] & 3);
6143 }
6144
read(MediaBufferHelper ** out,const ReadOptions * options)6145 media_status_t MPEG4Source::read(
6146 MediaBufferHelper **out, const ReadOptions *options) {
6147 Mutex::Autolock autoLock(mLock);
6148
6149 CHECK(mStarted);
6150
6151 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6152 *out = nullptr;
6153 return AMEDIA_ERROR_WOULD_BLOCK;
6154 }
6155
6156 if (mFirstMoofOffset > 0) {
6157 return fragmentedRead(out, options);
6158 }
6159
6160 *out = NULL;
6161
6162 int64_t targetSampleTimeUs = -1;
6163
6164 int64_t seekTimeUs;
6165 ReadOptions::SeekMode mode;
6166
6167 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6168 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6169 if (mIsHeif || mIsAvif) {
6170 CHECK(mSampleTable == NULL);
6171 CHECK(mItemTable != NULL);
6172 int32_t imageIndex;
6173 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6174 return AMEDIA_ERROR_MALFORMED;
6175 }
6176
6177 status_t err;
6178 if (seekTimeUs >= 0) {
6179 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6180 } else {
6181 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6182 }
6183 if (err != OK) {
6184 return AMEDIA_ERROR_UNKNOWN;
6185 }
6186 } else {
6187 uint32_t findFlags = 0;
6188 switch (mode) {
6189 case ReadOptions::SEEK_PREVIOUS_SYNC:
6190 findFlags = SampleTable::kFlagBefore;
6191 break;
6192 case ReadOptions::SEEK_NEXT_SYNC:
6193 findFlags = SampleTable::kFlagAfter;
6194 break;
6195 case ReadOptions::SEEK_CLOSEST_SYNC:
6196 case ReadOptions::SEEK_CLOSEST:
6197 findFlags = SampleTable::kFlagClosest;
6198 break;
6199 case ReadOptions::SEEK_FRAME_INDEX:
6200 findFlags = SampleTable::kFlagFrameIndex;
6201 break;
6202 default:
6203 CHECK(!"Should not be here.");
6204 break;
6205 }
6206 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6207 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6208 if (mElstInitialEmptyEditTicks > 0) {
6209 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6210 mTimescale;
6211 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6212 * Hence, lower bound on seekTimeUs is 0.
6213 */
6214 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6215 }
6216 if (mElstShiftStartTicks > 0) {
6217 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6218 seekTimeUs += elstShiftStartUs;
6219 }
6220 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6221 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6222 elstShiftStartUs);
6223 }
6224
6225 uint32_t sampleIndex;
6226 status_t err = mSampleTable->findSampleAtTime(
6227 seekTimeUs, 1000000, mTimescale,
6228 &sampleIndex, findFlags);
6229
6230 if (mode == ReadOptions::SEEK_CLOSEST
6231 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6232 // We found the closest sample already, now we want the sync
6233 // sample preceding it (or the sample itself of course), even
6234 // if the subsequent sync sample is closer.
6235 findFlags = SampleTable::kFlagBefore;
6236 }
6237
6238 uint32_t syncSampleIndex = sampleIndex;
6239 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6240 // This works around
6241 // seek issues with files that were incorrectly written with an
6242 // empty or single-sample stss block for the audio track
6243 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6244 err = mSampleTable->findSyncSampleNear(
6245 sampleIndex, &syncSampleIndex, findFlags);
6246 }
6247
6248 uint64_t sampleTime;
6249 if (err == OK) {
6250 err = mSampleTable->getMetaDataForSample(
6251 sampleIndex, NULL, NULL, &sampleTime);
6252 }
6253
6254 if (err != OK) {
6255 if (err == ERROR_OUT_OF_RANGE) {
6256 // An attempt to seek past the end of the stream would
6257 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6258 // this all the way to the MediaPlayer would cause abnormal
6259 // termination. Legacy behaviour appears to be to behave as if
6260 // we had seeked to the end of stream, ending normally.
6261 return AMEDIA_ERROR_END_OF_STREAM;
6262 }
6263 ALOGV("end of stream");
6264 return AMEDIA_ERROR_UNKNOWN;
6265 }
6266
6267 if (mode == ReadOptions::SEEK_CLOSEST
6268 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6269 if (mElstInitialEmptyEditTicks > 0) {
6270 sampleTime += mElstInitialEmptyEditTicks;
6271 }
6272 if (mElstShiftStartTicks > 0){
6273 if (sampleTime > mElstShiftStartTicks) {
6274 sampleTime -= mElstShiftStartTicks;
6275 } else {
6276 sampleTime = 0;
6277 }
6278 }
6279 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6280 }
6281
6282 #if 0
6283 uint32_t syncSampleTime;
6284 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6285 syncSampleIndex, NULL, NULL, &syncSampleTime));
6286
6287 ALOGI("seek to time %lld us => sample at time %lld us, "
6288 "sync sample at time %lld us",
6289 seekTimeUs,
6290 sampleTime * 1000000ll / mTimescale,
6291 syncSampleTime * 1000000ll / mTimescale);
6292 #endif
6293
6294 mCurrentSampleIndex = syncSampleIndex;
6295 }
6296
6297 if (mBuffer != NULL) {
6298 mBuffer->release();
6299 mBuffer = NULL;
6300 }
6301
6302 // fall through
6303 }
6304
6305 off64_t offset = 0;
6306 size_t size = 0;
6307 int64_t cts;
6308 uint64_t stts;
6309 bool isSyncSample;
6310 bool newBuffer = false;
6311 if (mBuffer == NULL) {
6312 newBuffer = true;
6313
6314 status_t err;
6315 if (!mIsHeif && !mIsAvif) {
6316 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6317 (uint64_t*)&cts, &isSyncSample, &stts);
6318 if(err == OK) {
6319 if (mElstInitialEmptyEditTicks > 0) {
6320 cts += mElstInitialEmptyEditTicks;
6321 }
6322 if (mElstShiftStartTicks > 0) {
6323 // cts can be negative. for example, initial audio samples for gapless playback.
6324 cts -= (int64_t)mElstShiftStartTicks;
6325 }
6326 }
6327 } else {
6328 err = mItemTable->getImageOffsetAndSize(
6329 options && options->getSeekTo(&seekTimeUs, &mode) ?
6330 &mCurrentSampleIndex : NULL, &offset, &size);
6331
6332 cts = stts = 0;
6333 isSyncSample = 0;
6334 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6335 }
6336
6337 if (err != OK) {
6338 if (err == ERROR_END_OF_STREAM) {
6339 return AMEDIA_ERROR_END_OF_STREAM;
6340 }
6341 return AMEDIA_ERROR_UNKNOWN;
6342 }
6343
6344 err = mBufferGroup->acquire_buffer(&mBuffer);
6345
6346 if (err != OK || mBuffer == nullptr) {
6347 CHECK(mBuffer == NULL);
6348 return AMEDIA_ERROR_UNKNOWN;
6349 }
6350 if (size > mBuffer->size()) {
6351 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6352 mBuffer->release();
6353 mBuffer = NULL;
6354 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6355 }
6356 }
6357
6358 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6359 if (newBuffer) {
6360 if (mIsPcm) {
6361 // The twos' PCM block reader assumes that all samples has the same size.
6362 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6363 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6364 mBuffer->release();
6365 mBuffer = nullptr;
6366 return AMEDIA_ERROR_UNKNOWN;
6367 }
6368 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6369 if (samplesToRead > kMaxPcmFrameSize) {
6370 samplesToRead = kMaxPcmFrameSize;
6371 }
6372
6373 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6374 samplesToRead, size, mCurrentSampleIndex,
6375 mSampleTable->getLastSampleIndexInChunk());
6376
6377 size_t totalSize = samplesToRead * size;
6378 if (mBuffer->size() < totalSize) {
6379 mBuffer->release();
6380 mBuffer = nullptr;
6381 return AMEDIA_ERROR_UNKNOWN;
6382 }
6383 uint8_t* buf = (uint8_t *)mBuffer->data();
6384 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6385 if (bytesRead < (ssize_t)totalSize) {
6386 mBuffer->release();
6387 mBuffer = NULL;
6388 return AMEDIA_ERROR_IO;
6389 }
6390
6391 AMediaFormat *meta = mBuffer->meta_data();
6392 AMediaFormat_clear(meta);
6393 AMediaFormat_setInt64(
6394 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6395 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6396
6397 int32_t byteOrder = 0;
6398 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6399 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6400
6401 if (isGetBigEndian && byteOrder == 1) {
6402 // Big-endian -> little-endian
6403 uint16_t *dstData = (uint16_t *)buf;
6404 uint16_t *srcData = (uint16_t *)buf;
6405
6406 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6407 dstData[j] = ntohs(srcData[j]);
6408 }
6409 }
6410
6411 mCurrentSampleIndex += samplesToRead;
6412 mBuffer->set_range(0, totalSize);
6413 } else {
6414 ssize_t num_bytes_read =
6415 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6416
6417 if (num_bytes_read < (ssize_t)size) {
6418 mBuffer->release();
6419 mBuffer = NULL;
6420
6421 return AMEDIA_ERROR_IO;
6422 }
6423
6424 CHECK(mBuffer != NULL);
6425 mBuffer->set_range(0, size);
6426 AMediaFormat *meta = mBuffer->meta_data();
6427 AMediaFormat_clear(meta);
6428 AMediaFormat_setInt64(
6429 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6430 AMediaFormat_setInt64(
6431 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6432
6433 if (targetSampleTimeUs >= 0) {
6434 AMediaFormat_setInt64(
6435 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6436 }
6437
6438 if (isSyncSample) {
6439 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6440 }
6441
6442 AMediaFormat_setInt64(
6443 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6444 offset);
6445
6446 if (mSampleTable != nullptr &&
6447 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6448 AMediaFormat_setInt64(
6449 meta,
6450 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6451 mSampleTable->getLastSampleIndexInChunk());
6452 }
6453
6454 ++mCurrentSampleIndex;
6455 }
6456 }
6457
6458 *out = mBuffer;
6459 mBuffer = NULL;
6460
6461 return AMEDIA_OK;
6462
6463 } else if (mIsAC4) {
6464 CHECK(mBuffer != NULL);
6465 // Make sure there is enough space to write the sync header and the raw frame
6466 if (mBuffer->range_length() < (7 + size)) {
6467 mBuffer->release();
6468 mBuffer = NULL;
6469
6470 return AMEDIA_ERROR_IO;
6471 }
6472
6473 uint8_t *dstData = (uint8_t *)mBuffer->data();
6474 size_t dstOffset = 0;
6475 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6476 // AC40 sync word, meaning no CRC at the end of the frame
6477 dstData[dstOffset++] = 0xAC;
6478 dstData[dstOffset++] = 0x40;
6479 dstData[dstOffset++] = 0xFF;
6480 dstData[dstOffset++] = 0xFF;
6481 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6482 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6483 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6484
6485 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6486 if (numBytesRead != (ssize_t)size) {
6487 mBuffer->release();
6488 mBuffer = NULL;
6489
6490 return AMEDIA_ERROR_IO;
6491 }
6492
6493 mBuffer->set_range(0, dstOffset + size);
6494 AMediaFormat *meta = mBuffer->meta_data();
6495 AMediaFormat_clear(meta);
6496 AMediaFormat_setInt64(
6497 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6498 AMediaFormat_setInt64(
6499 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6500
6501 if (targetSampleTimeUs >= 0) {
6502 AMediaFormat_setInt64(
6503 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6504 }
6505
6506 if (isSyncSample) {
6507 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6508 }
6509
6510 void *presentationsData;
6511 size_t presentationsSize;
6512 if (AMediaFormat_getBuffer(
6513 mFormat, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6514 &presentationsData, &presentationsSize)) {
6515 AMediaFormat_setBuffer(
6516 meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6517 presentationsData, presentationsSize);
6518 }
6519
6520 ++mCurrentSampleIndex;
6521
6522 *out = mBuffer;
6523 mBuffer = NULL;
6524
6525 return AMEDIA_OK;
6526 } else {
6527 // Whole NAL units are returned but each fragment is prefixed by
6528 // the start code (0x00 00 00 01).
6529 ssize_t num_bytes_read = 0;
6530 bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6531 if (mSrcBufferFitsDataToRead) {
6532 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6533 } else {
6534 // We are trying to read a sample larger than the expected max sample size.
6535 // Fall through and let the failure be handled by the following if.
6536 android_errorWriteLog(0x534e4554, "188893559");
6537 }
6538
6539 if (num_bytes_read < (ssize_t)size) {
6540 mBuffer->release();
6541 mBuffer = NULL;
6542 return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6543 }
6544
6545 uint8_t *dstData = (uint8_t *)mBuffer->data();
6546 size_t srcOffset = 0;
6547 size_t dstOffset = 0;
6548
6549 while (srcOffset < size) {
6550 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6551 size_t nalLength = 0;
6552 if (!isMalFormed) {
6553 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6554 srcOffset += mNALLengthSize;
6555 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6556 }
6557
6558 if (isMalFormed) {
6559 //if nallength abnormal,ignore it.
6560 ALOGW("abnormal nallength, ignore this NAL");
6561 srcOffset = size;
6562 break;
6563 }
6564
6565 if (nalLength == 0) {
6566 continue;
6567 }
6568
6569 if (dstOffset > SIZE_MAX - 4 ||
6570 dstOffset + 4 > SIZE_MAX - nalLength ||
6571 dstOffset + 4 + nalLength > mBuffer->size()) {
6572 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6573 android_errorWriteLog(0x534e4554, "27208621");
6574 mBuffer->release();
6575 mBuffer = NULL;
6576 return AMEDIA_ERROR_MALFORMED;
6577 }
6578
6579 dstData[dstOffset++] = 0;
6580 dstData[dstOffset++] = 0;
6581 dstData[dstOffset++] = 0;
6582 dstData[dstOffset++] = 1;
6583 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6584 srcOffset += nalLength;
6585 dstOffset += nalLength;
6586 }
6587 CHECK_EQ(srcOffset, size);
6588 CHECK(mBuffer != NULL);
6589 mBuffer->set_range(0, dstOffset);
6590
6591 AMediaFormat *meta = mBuffer->meta_data();
6592 AMediaFormat_clear(meta);
6593 AMediaFormat_setInt64(
6594 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6595 AMediaFormat_setInt64(
6596 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6597
6598 if (targetSampleTimeUs >= 0) {
6599 AMediaFormat_setInt64(
6600 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6601 }
6602
6603 if (mIsAVC) {
6604 uint32_t layerId = FindAVCLayerId(
6605 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6606 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6607 } else if (mIsHEVC) {
6608 int32_t layerId = parseHEVCLayerId(
6609 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6610 if (layerId >= 0) {
6611 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6612 }
6613 }
6614
6615 if (isSyncSample) {
6616 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6617 }
6618
6619 AMediaFormat_setInt64(
6620 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6621
6622 if (mSampleTable != nullptr &&
6623 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6624 AMediaFormat_setInt64(
6625 meta,
6626 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6627 mSampleTable->getLastSampleIndexInChunk());
6628 }
6629
6630 ++mCurrentSampleIndex;
6631
6632 *out = mBuffer;
6633 mBuffer = NULL;
6634
6635 return AMEDIA_OK;
6636 }
6637 }
6638
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6639 media_status_t MPEG4Source::fragmentedRead(
6640 MediaBufferHelper **out, const ReadOptions *options) {
6641
6642 ALOGV("MPEG4Source::fragmentedRead");
6643
6644 CHECK(mStarted);
6645
6646 *out = NULL;
6647
6648 int64_t targetSampleTimeUs = -1;
6649
6650 int64_t seekTimeUs;
6651 ReadOptions::SeekMode mode;
6652 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6653 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6654 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6655 if (mElstInitialEmptyEditTicks > 0) {
6656 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6657 mTimescale;
6658 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6659 * Hence, lower bound on seekTimeUs is 0.
6660 */
6661 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6662 }
6663 if (mElstShiftStartTicks > 0){
6664 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6665 seekTimeUs += elstShiftStartUs;
6666 }
6667 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6668 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6669 elstShiftStartUs);
6670
6671 int numSidxEntries = mSegments.size();
6672 if (numSidxEntries != 0) {
6673 int64_t totalTime = 0;
6674 off64_t totalOffset = mFirstMoofOffset;
6675 for (int i = 0; i < numSidxEntries; i++) {
6676 const SidxEntry *se = &mSegments[i];
6677 if (totalTime + se->mDurationUs > seekTimeUs) {
6678 // The requested time is somewhere in this segment
6679 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6680 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6681 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6682 // requested next sync, or closest sync and it was closer to the end of
6683 // this segment
6684 totalTime += se->mDurationUs;
6685 totalOffset += se->mSize;
6686 }
6687 break;
6688 }
6689 totalTime += se->mDurationUs;
6690 totalOffset += se->mSize;
6691 }
6692 mCurrentMoofOffset = totalOffset;
6693 mNextMoofOffset = -1;
6694 mCurrentSamples.clear();
6695 mCurrentSampleIndex = 0;
6696 status_t err = parseChunk(&totalOffset);
6697 if (err != OK) {
6698 return AMEDIA_ERROR_UNKNOWN;
6699 }
6700 mCurrentTime = totalTime * mTimescale / 1000000ll;
6701 } else {
6702 // without sidx boxes, we can only seek to 0
6703 mCurrentMoofOffset = mFirstMoofOffset;
6704 mNextMoofOffset = -1;
6705 mCurrentSamples.clear();
6706 mCurrentSampleIndex = 0;
6707 off64_t tmp = mCurrentMoofOffset;
6708 status_t err = parseChunk(&tmp);
6709 if (err != OK) {
6710 return AMEDIA_ERROR_UNKNOWN;
6711 }
6712 mCurrentTime = 0;
6713 }
6714
6715 if (mBuffer != NULL) {
6716 mBuffer->release();
6717 mBuffer = NULL;
6718 }
6719
6720 // fall through
6721 }
6722
6723 off64_t offset = 0;
6724 size_t size = 0;
6725 int64_t cts = 0;
6726 bool isSyncSample = false;
6727 bool newBuffer = false;
6728 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6729 newBuffer = true;
6730
6731 if (mBuffer != NULL) {
6732 mBuffer->release();
6733 mBuffer = NULL;
6734 }
6735 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6736 // move to next fragment if there is one
6737 if (mNextMoofOffset <= mCurrentMoofOffset) {
6738 return AMEDIA_ERROR_END_OF_STREAM;
6739 }
6740 off64_t nextMoof = mNextMoofOffset;
6741 mCurrentMoofOffset = nextMoof;
6742 mCurrentSamples.clear();
6743 mCurrentSampleIndex = 0;
6744 status_t err = parseChunk(&nextMoof);
6745 if (err != OK) {
6746 return AMEDIA_ERROR_UNKNOWN;
6747 }
6748 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6749 return AMEDIA_ERROR_END_OF_STREAM;
6750 }
6751 }
6752
6753 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6754 offset = smpl->offset;
6755 size = smpl->size;
6756 cts = (int64_t)mCurrentTime + (int64_t)smpl->compositionOffset;
6757
6758 if (mElstInitialEmptyEditTicks > 0) {
6759 cts += mElstInitialEmptyEditTicks;
6760 }
6761 if (mElstShiftStartTicks > 0) {
6762 // cts can be negative. for example, initial audio samples for gapless playback.
6763 cts -= (int64_t)mElstShiftStartTicks;
6764 }
6765
6766 mCurrentTime += smpl->duration;
6767 isSyncSample = (mCurrentSampleIndex == 0);
6768
6769 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6770
6771 if (err != OK) {
6772 CHECK(mBuffer == NULL);
6773 ALOGV("acquire_buffer returned %d", err);
6774 return AMEDIA_ERROR_UNKNOWN;
6775 }
6776 if (size > mBuffer->size()) {
6777 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6778 mBuffer->release();
6779 mBuffer = NULL;
6780 return AMEDIA_ERROR_UNKNOWN;
6781 }
6782 }
6783
6784 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6785 AMediaFormat *bufmeta = mBuffer->meta_data();
6786 AMediaFormat_clear(bufmeta);
6787 if (smpl->encryptedsizes.size()) {
6788 // store clear/encrypted lengths in metadata
6789 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6790 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6791 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6792 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6793 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6794 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6795 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6796 AMediaFormat_setInt32(bufmeta,
6797 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6798 AMediaFormat_setInt32(bufmeta,
6799 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6800
6801 void *iv = NULL;
6802 size_t ivlength = 0;
6803 if (!AMediaFormat_getBuffer(mFormat,
6804 "crypto-iv", &iv, &ivlength)) {
6805 iv = (void *) smpl->iv;
6806 ivlength = 16; // use 16 or the actual size?
6807 }
6808 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6809 }
6810
6811 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6812 if (newBuffer) {
6813 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6814 mBuffer->release();
6815 mBuffer = NULL;
6816
6817 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6818 return AMEDIA_ERROR_MALFORMED;
6819 }
6820
6821 ssize_t num_bytes_read =
6822 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6823
6824 if (num_bytes_read < (ssize_t)size) {
6825 mBuffer->release();
6826 mBuffer = NULL;
6827
6828 ALOGE("i/o error");
6829 return AMEDIA_ERROR_IO;
6830 }
6831
6832 CHECK(mBuffer != NULL);
6833 mBuffer->set_range(0, size);
6834 AMediaFormat_setInt64(bufmeta,
6835 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6836 AMediaFormat_setInt64(bufmeta,
6837 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6838
6839 if (targetSampleTimeUs >= 0) {
6840 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6841 }
6842
6843 if (mIsAVC) {
6844 uint32_t layerId = FindAVCLayerId(
6845 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6846 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6847 } else if (mIsHEVC) {
6848 int32_t layerId = parseHEVCLayerId(
6849 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6850 if (layerId >= 0) {
6851 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6852 }
6853 }
6854
6855 if (isSyncSample) {
6856 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6857 }
6858
6859 ++mCurrentSampleIndex;
6860 }
6861
6862 *out = mBuffer;
6863 mBuffer = NULL;
6864
6865 return AMEDIA_OK;
6866
6867 } else {
6868 ALOGV("whole NAL");
6869 // Whole NAL units are returned but each fragment is prefixed by
6870 // the start code (0x00 00 00 01).
6871 ssize_t num_bytes_read = 0;
6872 void *data = NULL;
6873 bool isMalFormed = false;
6874 int32_t max_size;
6875 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6876 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6877 isMalFormed = true;
6878 } else {
6879 data = mSrcBuffer;
6880 }
6881
6882 if (isMalFormed || data == NULL) {
6883 ALOGE("isMalFormed size %zu", size);
6884 if (mBuffer != NULL) {
6885 mBuffer->release();
6886 mBuffer = NULL;
6887 }
6888 return AMEDIA_ERROR_MALFORMED;
6889 }
6890 num_bytes_read = mDataSource->readAt(offset, data, size);
6891
6892 if (num_bytes_read < (ssize_t)size) {
6893 mBuffer->release();
6894 mBuffer = NULL;
6895
6896 ALOGE("i/o error");
6897 return AMEDIA_ERROR_IO;
6898 }
6899
6900 uint8_t *dstData = (uint8_t *)mBuffer->data();
6901 size_t srcOffset = 0;
6902 size_t dstOffset = 0;
6903
6904 while (srcOffset < size) {
6905 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6906 size_t nalLength = 0;
6907 if (!isMalFormed) {
6908 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6909 srcOffset += mNALLengthSize;
6910 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6911 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6912 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6913 }
6914
6915 if (isMalFormed) {
6916 ALOGE("Video is malformed; nalLength %zu", nalLength);
6917 mBuffer->release();
6918 mBuffer = NULL;
6919 return AMEDIA_ERROR_MALFORMED;
6920 }
6921
6922 if (nalLength == 0) {
6923 continue;
6924 }
6925
6926 if (dstOffset > SIZE_MAX - 4 ||
6927 dstOffset + 4 > SIZE_MAX - nalLength ||
6928 dstOffset + 4 + nalLength > mBuffer->size()) {
6929 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6930 android_errorWriteLog(0x534e4554, "26365349");
6931 mBuffer->release();
6932 mBuffer = NULL;
6933 return AMEDIA_ERROR_MALFORMED;
6934 }
6935
6936 dstData[dstOffset++] = 0;
6937 dstData[dstOffset++] = 0;
6938 dstData[dstOffset++] = 0;
6939 dstData[dstOffset++] = 1;
6940 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6941 srcOffset += nalLength;
6942 dstOffset += nalLength;
6943 }
6944 CHECK_EQ(srcOffset, size);
6945 CHECK(mBuffer != NULL);
6946 mBuffer->set_range(0, dstOffset);
6947
6948 AMediaFormat *bufmeta = mBuffer->meta_data();
6949 AMediaFormat_setInt64(bufmeta,
6950 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6951 AMediaFormat_setInt64(bufmeta,
6952 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6953
6954 if (targetSampleTimeUs >= 0) {
6955 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6956 }
6957
6958 if (isSyncSample) {
6959 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6960 }
6961
6962 ++mCurrentSampleIndex;
6963
6964 *out = mBuffer;
6965 mBuffer = NULL;
6966
6967 return AMEDIA_OK;
6968 }
6969
6970 return AMEDIA_OK;
6971 }
6972
findTrackByMimePrefix(const char * mimePrefix)6973 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6974 const char *mimePrefix) {
6975 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6976 const char *mime;
6977 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6978 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6979 return track;
6980 }
6981 }
6982
6983 return NULL;
6984 }
6985
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6986 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6987 uint8_t header[8];
6988
6989 ssize_t n = source->readAt(4, header, sizeof(header));
6990 if (n < (ssize_t)sizeof(header)) {
6991 return false;
6992 }
6993
6994 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6995 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6996 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6997 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6998 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6999 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
7000 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
7001 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
7002 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
7003 *confidence = 0.4;
7004
7005 return true;
7006 }
7007
7008 return false;
7009 }
7010
isCompatibleBrand(uint32_t fourcc)7011 static bool isCompatibleBrand(uint32_t fourcc) {
7012 static const uint32_t kCompatibleBrands[] = {
7013 FOURCC("isom"),
7014 FOURCC("iso2"),
7015 FOURCC("avc1"),
7016 FOURCC("hvc1"),
7017 FOURCC("hev1"),
7018 FOURCC("av01"),
7019 FOURCC("vp09"),
7020 FOURCC("3gp4"),
7021 FOURCC("mp41"),
7022 FOURCC("mp42"),
7023 FOURCC("dash"),
7024 FOURCC("nvr1"),
7025
7026 // Won't promise that the following file types can be played.
7027 // Just give these file types a chance.
7028 FOURCC("qt "), // Apple's QuickTime
7029 FOURCC("MSNV"), // Sony's PSP
7030 FOURCC("wmf "),
7031
7032 FOURCC("3g2a"), // 3GPP2
7033 FOURCC("3g2b"),
7034 FOURCC("mif1"), // HEIF image
7035 FOURCC("heic"), // HEIF image
7036 FOURCC("msf1"), // HEIF image sequence
7037 FOURCC("hevc"), // HEIF image sequence
7038 FOURCC("avif"), // AVIF image
7039 FOURCC("avis"), // AVIF image sequence
7040 };
7041
7042 for (size_t i = 0;
7043 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
7044 ++i) {
7045 if (kCompatibleBrands[i] == fourcc) {
7046 return true;
7047 }
7048 }
7049
7050 return false;
7051 }
7052
7053 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
7054 // compatible brand is present.
7055 // Also try to identify where this file's metadata ends
7056 // (end of the 'moov' atom) and report it to the caller as part of
7057 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7058 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7059 // We scan up to 128 bytes to identify this file as an MP4.
7060 static const off64_t kMaxScanOffset = 128ll;
7061
7062 off64_t offset = 0ll;
7063 bool foundGoodFileType = false;
7064 off64_t moovAtomEndOffset = -1ll;
7065 bool done = false;
7066
7067 while (!done && offset < kMaxScanOffset) {
7068 uint32_t hdr[2];
7069 if (source->readAt(offset, hdr, 8) < 8) {
7070 return false;
7071 }
7072
7073 uint64_t chunkSize = ntohl(hdr[0]);
7074 uint32_t chunkType = ntohl(hdr[1]);
7075 off64_t chunkDataOffset = offset + 8;
7076
7077 if (chunkSize == 1) {
7078 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7079 return false;
7080 }
7081
7082 chunkSize = ntoh64(chunkSize);
7083 chunkDataOffset += 8;
7084
7085 if (chunkSize < 16) {
7086 // The smallest valid chunk is 16 bytes long in this case.
7087 return false;
7088 }
7089 if (chunkSize > INT64_MAX) {
7090 // reject overly large chunk sizes that could
7091 // be interpreted as negative
7092 ALOGE("chunk size too large");
7093 return false;
7094 }
7095
7096 } else if (chunkSize < 8) {
7097 // The smallest valid chunk is 8 bytes long.
7098 return false;
7099 }
7100
7101 // (data_offset - offset) is either 8 or 16
7102 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7103 if (chunkDataSize < 0) {
7104 ALOGE("b/23540914");
7105 return false;
7106 }
7107
7108 char chunkstring[5];
7109 MakeFourCCString(chunkType, chunkstring);
7110 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7111 chunkstring, chunkSize, (long long)offset);
7112 switch (chunkType) {
7113 case FOURCC("ftyp"):
7114 {
7115 if (chunkDataSize < 8) {
7116 return false;
7117 }
7118
7119 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7120 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7121 if (i == 1) {
7122 // Skip this index, it refers to the minorVersion,
7123 // not a brand.
7124 continue;
7125 }
7126
7127 uint32_t brand;
7128 if (source->readAt(
7129 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7130 return false;
7131 }
7132
7133 brand = ntohl(brand);
7134
7135 if (isCompatibleBrand(brand)) {
7136 foundGoodFileType = true;
7137 break;
7138 }
7139 }
7140
7141 if (!foundGoodFileType) {
7142 return false;
7143 }
7144
7145 break;
7146 }
7147
7148 case FOURCC("moov"):
7149 {
7150 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7151 ALOGE("chunk size + offset would overflow");
7152 return false;
7153 }
7154
7155 done = true;
7156 break;
7157 }
7158
7159 default:
7160 break;
7161 }
7162
7163 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7164 ALOGE("chunk size + offset would overflow");
7165 return false;
7166 }
7167 }
7168
7169 if (!foundGoodFileType) {
7170 return false;
7171 }
7172
7173 *confidence = 0.4f;
7174
7175 return true;
7176 }
7177
CreateExtractor(CDataSource * source,void *)7178 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7179 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7180 }
7181
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7182 static CreatorFunc Sniff(
7183 CDataSource *source, float *confidence, void **,
7184 FreeMetaFunc *) {
7185 DataSourceHelper helper(source);
7186 if (BetterSniffMPEG4(&helper, confidence)) {
7187 return CreateExtractor;
7188 }
7189
7190 if (LegacySniffMPEG4(&helper, confidence)) {
7191 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7192 return CreateExtractor;
7193 }
7194
7195 return NULL;
7196 }
7197
7198 static const char *extensions[] = {
7199 "3g2",
7200 "3ga",
7201 "3gp",
7202 "3gpp",
7203 "3gpp2",
7204 "m4a",
7205 "m4r",
7206 "m4v",
7207 "mov",
7208 "mp4",
7209 "qt",
7210 NULL
7211 };
7212
7213 extern "C" {
7214 // This is the only symbol that needs to be exported
7215 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7216 ExtractorDef GETEXTRACTORDEF() {
7217 return {
7218 EXTRACTORDEF_VERSION,
7219 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7220 2, // version
7221 "MP4 Extractor",
7222 { .v3 = {Sniff, extensions} },
7223 };
7224 }
7225
7226 } // extern "C"
7227
7228 } // namespace android
7229