1 // Copyright (C) 2021 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include "host-common/H264NaluParser.h"
18 #include "host-common/MediaFfmpegVideoHelper.h"
19 #include "host-common/MediaSnapshotState.h"
20 #include "host-common/MediaTexturePool.h"
21 #include "host-common/MediaVideoHelper.h"
22 #include "host-common/YuvConverter.h"
23 
24 // this is apple's video tool box header
25 #include <VideoToolbox/VideoToolbox.h>
26 
27 #ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
28 #define kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder \
29     CFSTR("RequireHardwareAcceleratedVideoDecoder")
30 #endif
31 
32 #include <cstdint>
33 #include <list>
34 #include <map>
35 #include <mutex>
36 #include <string>
37 #include <vector>
38 
39 #include <stdio.h>
40 #include <string.h>
41 
42 #include <stddef.h>
43 
44 namespace android {
45 namespace emulation {
46 
47 class MediaVideoToolBoxVideoHelper : public MediaVideoHelper {
48 public:
49     enum class FrameStorageMode {
50         USE_BYTE_BUFFER = 1,
51         USE_GPU_TEXTURE = 2,
52     };
53 
54     enum class OutputTreatmentMode {
55         IGNORE_RESULT = 1,
56         SAVE_RESULT = 2,
57     };
58 
59     MediaVideoToolBoxVideoHelper(int w,
60                                  int h,
61                                  OutputTreatmentMode outMode,
62                                  FrameStorageMode fMode);
63     ~MediaVideoToolBoxVideoHelper() override;
64 
65     // return true if success; false otherwise
66     bool init() override;
67     void decode(const uint8_t* frame,
68                 size_t szBytes,
69                 uint64_t inputPts) override;
70     void flush() override;
71     void deInit() override;
72 
73     void resetTexturePool(MediaTexturePool* pool = nullptr) {
74         mTexturePool = pool;
75     }
76 
error()77     virtual int error() const override { return mErrorCode; }
good()78     virtual bool good() const override { return mIsGood; }
fatal()79     virtual bool fatal() const override { return false; }
80 
81 private:
82     // static
83     static void videoToolboxDecompressCallback(void* opaque,
84                                                void* sourceFrameRefCon,
85                                                OSStatus status,
86                                                VTDecodeInfoFlags flags,
87                                                CVPixelBufferRef image_buffer,
88                                                CMTime pts,
89                                                CMTime duration);
90     static CFDictionaryRef createOutputBufferAttributes(int width,
91                                                         int height,
92                                                         OSType pix_fmt);
93     static CMSampleBufferRef createSampleBuffer(CMFormatDescriptionRef fmtDesc,
94                                                 void* buffer,
95                                                 size_t sz);
96 
97     void copyFrame();
98     void copyFrameToTextures();
99     void copyFrameToCPU();
100     void createCMFormatDescription();
101     void recreateDecompressionSession();
102     void getOutputWH();
103     void resetDecoderSession();
104     void resetFormatDesc();
105 
106     struct InputFrame {
InputFrameInputFrame107         InputFrame(H264NaluParser::H264NaluType in_type,
108                    const uint8_t* in_data,
109                    int in_size)
110             : type(in_type), data(in_data), size(in_size) {}
111 
112         H264NaluParser::H264NaluType type;
113         const uint8_t* data;
114         int size;
115     };
116 
117     std::vector<InputFrame> mInputFrames;
118 
119     bool parseInputFrames(const uint8_t* frame, size_t sz);
120 
121     // returns the remaining part of the frame, nullptr if none
122     const uint8_t* parseOneFrame(const uint8_t* frame, size_t szBytes);
123 
124     void handleIDRFrame(const uint8_t* ptr, size_t szBytes, uint64_t pts);
125 
126     std::vector<uint8_t> mSPS;  // sps NALU
127     std::vector<uint8_t> mPPS;  // pps NALU
128 
129     // turn on gpu texture mode
130     bool mUseGpuTexture = true;
131     MediaTexturePool* mTexturePool = nullptr;
132 
133     uint64_t mNumInputFrame{0};
134     uint64_t mNumOutputFrame{0};
135     int mErrorCode = 0;
136     bool mIsGood = true;
137     unsigned int mHeight = 0;
138     unsigned int mWidth = 0;
139     unsigned int mOutputHeight = 0;
140     unsigned int mOutputWidth = 0;
141     unsigned int mSurfaceHeight = 0;
142     unsigned int mBPP = 0;
143     unsigned int mSurfaceWidth = 0;
144     unsigned int mLumaWidth = 0;
145     unsigned int mLumaHeight = 0;
146     unsigned int mChromaHeight = 0;
147     unsigned int mOutBufferSize = 0;
148 
149     uint64_t mOutputPts = 0;
150     bool mImageReady = false;
151 
152     // used in vtb callback to saved decoded frame
153     std::vector<uint8_t> mSavedDecodedFrame;
154 
155     // TODO: get color aspects from some where
156     MediaSnapshotState::ColorAspects mColorAspects{0};
157 
158     std::mutex mFrameLock;
159 
160     // this is only set to true after video session is created without errors
161     // it is reset to false when new sps/pps comes
162     bool mVtbReady = false;
163     // videotoolbox stuff
164     // the fmt, this will be recreated each time
165     // we get a sps+pps frames
166     CMFormatDescriptionRef mCmFmtDesc = nullptr;
167     // The VideoToolbox decoder session: this could fail to
168     // create due to incompatible formats coming from android guest
169     VTDecompressionSessionRef mDecoderSession = nullptr;
170     // where the decoded frame is stored
171     CVPixelBufferRef mDecodedFrame = nullptr;
172 
173     // need this ffmpeg helper to get the w/h/colorspace info
174     // TODO: replace it with webrtc h264 parser once it is built
175     // for all platforms
176     std::unique_ptr<MediaFfmpegVideoHelper> mFfmpegVideoHelper;
177     void extractFrameInfo();
178 
179     uint64_t mTotalFrames = 0;
180     // vtb decoder does not reorder output frames, that means
181     // the video could see jumps all the times
182     int mVtbBufferSize = 8;
183     using PtsPair = std::pair<uint64_t, uint64_t>;
184     std::map<PtsPair, MediaSnapshotState::FrameInfo> mVtbBufferMap;
185 
186 };  // MediaVideoToolBoxVideoHelper
187 
188 }  // namespace emulation
189 }  // namespace android
190