1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "host-common/MediaCudaVideoHelper.h"
16 #include "host-common/MediaCudaDriverHelper.h"
17 #include "host-common/MediaCudaUtils.h"
18 #include "host-common/YuvConverter.h"
19 #include "android/utils/debug.h"
20
21 extern "C" {
22 #define INIT_CUDA_GL 1
23 #include "host-common/dynlink_cuda.h"
24 #include "host-common/dynlink_cudaGL.h"
25 #include "host-common/dynlink_nvcuvid.h"
26 }
27 #define MEDIA_CUDA_DEBUG 0
28
29 #if MEDIA_CUDA_DEBUG
30 #define CUDA_DPRINT(fmt, ...) \
31 fprintf(stderr, "media-cuda-video-helper: %s:%d " fmt "\n", __func__, \
32 __LINE__, ##__VA_ARGS__);
33 #else
34 #define CUDA_DPRINT(fmt, ...)
35 #endif
36
37 #define NVDEC_API_CALL(cuvidAPI) \
38 do { \
39 CUresult errorCode = cuvidAPI; \
40 if (errorCode != CUDA_SUCCESS) { \
41 CUDA_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
42 (int)errorCode); \
43 } \
44 } while (0)
45
46 namespace android {
47 namespace emulation {
48
49 bool MediaCudaVideoHelper::s_isCudaDecoderGood = true;
50
51 using TextureFrame = MediaTexturePool::TextureFrame;
52 using FrameInfo = MediaSnapshotState::FrameInfo;
53 using ColorAspects = MediaSnapshotState::ColorAspects;
54
MediaCudaVideoHelper(OutputTreatmentMode oMode,FrameStorageMode fMode,cudaVideoCodec cudaVideoCodecType)55 MediaCudaVideoHelper::MediaCudaVideoHelper(OutputTreatmentMode oMode,
56 FrameStorageMode fMode,
57 cudaVideoCodec cudaVideoCodecType)
58 : mUseGpuTexture(fMode == FrameStorageMode::USE_GPU_TEXTURE),
59 mCudaVideoCodecType(cudaVideoCodecType) {
60 mIgnoreDecoderOutput = (oMode == OutputTreatmentMode::IGNORE_RESULT);
61 }
62
~MediaCudaVideoHelper()63 MediaCudaVideoHelper::~MediaCudaVideoHelper() {
64 deInit();
65 }
66
deInit()67 void MediaCudaVideoHelper::deInit() {
68 CUDA_DPRINT("deInit calling");
69
70 mSavedDecodedFrames.clear();
71 if (mCudaContext != nullptr) {
72 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
73 if (mCudaParser != nullptr) {
74 NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
75 mCudaParser = nullptr;
76 }
77
78 if (mCudaDecoder != nullptr) {
79 NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
80 mCudaDecoder = nullptr;
81 }
82 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
83 NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
84 }
85
86 if (mCudaContext != nullptr) {
87 CUresult myres = cuCtxDestroy(mCudaContext);
88 if (myres != CUDA_SUCCESS) {
89 CUDA_DPRINT("Failed to destroy cuda context; error code %d",
90 (int)myres);
91 }
92 mCudaContext = nullptr;
93 }
94 }
95
init()96 bool MediaCudaVideoHelper::init() {
97 if (!s_isCudaDecoderGood) {
98 CUDA_DPRINT(
99 "Already verified: cuda decoder does not work on this host");
100 return false;
101 }
102 if (!MediaCudaDriverHelper::initCudaDrivers()) {
103 CUDA_DPRINT("Failed to initCudaDrivers");
104 mIsGood = false;
105 mErrorCode = 1;
106 s_isCudaDecoderGood = false;
107 return false;
108 }
109
110 if (mCudaContext != nullptr) {
111 deInit();
112 }
113
114 // cudat stuff
115 const int gpuIndex = 0;
116 const int cudaFlags = 0;
117 CUdevice cudaDevice = 0;
118 CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
119 if (myres != CUDA_SUCCESS) {
120 mIsGood = false;
121 mErrorCode = 2;
122 s_isCudaDecoderGood = false;
123 CUDA_DPRINT("Failed to get cuda device, error code %d", (int)myres);
124 return false;
125 }
126
127 char buf[1024];
128 myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
129 if (myres != CUDA_SUCCESS) {
130 mIsGood = false;
131 mErrorCode = 3;
132 s_isCudaDecoderGood = false;
133 CUDA_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
134 return false;
135 }
136
137 CUDA_DPRINT("using gpu device %s", buf);
138
139 myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
140 if (myres != CUDA_SUCCESS) {
141 mIsGood = false;
142 s_isCudaDecoderGood = false;
143 CUDA_DPRINT("Failed to create cuda context, error code %d", (int)myres);
144 return false;
145 }
146
147 NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
148
149 CUVIDPARSERPARAMS videoParserParameters = {};
150 // videoParserParameters.CodecType = (mType == MediaCodecType::VP8Codec) ?
151 // cudaVideoCodec_VP8 : cudaVideoCodec_VP9;
152 videoParserParameters.CodecType = mCudaVideoCodecType;
153
154 videoParserParameters.ulMaxNumDecodeSurfaces = 1;
155 videoParserParameters.ulMaxDisplayDelay = 1;
156 videoParserParameters.pUserData = this;
157 videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
158 videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
159 videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
160 NVDEC_API_CALL(
161 cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
162
163 CUDA_DPRINT("Successfully created cuda context %p", mCudaContext);
164 dprint("successfully created cuda video decoder for %s, with gpu texture "
165 "mode %s",
166 mCudaVideoCodecType == cudaVideoCodec_H264
167 ? "H264"
168 : (mCudaVideoCodecType == cudaVideoCodec_VP8 ? "VP8"
169 : "VP9"),
170 mUseGpuTexture ? "on" : "off");
171
172 return true;
173 }
174
decode(const uint8_t * frame,size_t szBytes,uint64_t inputPts)175 void MediaCudaVideoHelper::decode(const uint8_t* frame,
176 size_t szBytes,
177 uint64_t inputPts) {
178 CUDA_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
179
180 CUVIDSOURCEDATAPACKET packet = {0};
181 packet.payload = frame;
182 packet.payload_size = szBytes;
183 packet.flags = CUVID_PKT_TIMESTAMP;
184 packet.timestamp = inputPts;
185 if (!frame || szBytes == 0) {
186 packet.flags |= CUVID_PKT_ENDOFSTREAM;
187 } else {
188 ++mNumInputFrame;
189 }
190 NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
191 }
192
flush()193 void MediaCudaVideoHelper::flush() {
194 CUDA_DPRINT("started flushing");
195 CUVIDSOURCEDATAPACKET packet = {0};
196 packet.payload = NULL;
197 packet.payload_size = 0;
198 packet.flags |= CUVID_PKT_ENDOFSTREAM;
199 NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
200 CUDA_DPRINT("done one flushing");
201 }
202
HandleVideoSequence(CUVIDEOFORMAT * pVideoFormat)203 int MediaCudaVideoHelper::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
204 int nDecodeSurface = 8; // need 8 for 4K video
205
206 CUVIDDECODECAPS decodecaps;
207 memset(&decodecaps, 0, sizeof(decodecaps));
208
209 decodecaps.eCodecType = pVideoFormat->codec;
210 decodecaps.eChromaFormat = pVideoFormat->chroma_format;
211 decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
212
213 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
214 NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
215 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
216
217 if (!decodecaps.bIsSupported) {
218 mIsGood = false;
219 mErrorCode = 4;
220 CUDA_DPRINT("Codec not supported on this GPU.");
221 return nDecodeSurface;
222 }
223
224 if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
225 (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
226 CUDA_DPRINT("Resolution not supported on this GPU");
227 mIsGood = false;
228 mErrorCode = 5;
229 return nDecodeSurface;
230 }
231
232 if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
233 decodecaps.nMaxMBCount) {
234 CUDA_DPRINT("MBCount not supported on this GPU");
235 mIsGood = false;
236 mErrorCode = 6;
237 return nDecodeSurface;
238 }
239
240 mLumaWidth =
241 pVideoFormat->display_area.right - pVideoFormat->display_area.left;
242 mLumaHeight =
243 pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
244 mChromaHeight = mLumaHeight * 0.5; // NV12
245 mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
246
247 if (mCudaVideoCodecType == cudaVideoCodec_H264) {
248 if (pVideoFormat->video_signal_description.video_full_range_flag)
249 mColorRange = 2;
250 else
251 mColorRange = 0;
252
253 mColorPrimaries =
254 pVideoFormat->video_signal_description.color_primaries;
255 mColorTransfer =
256 pVideoFormat->video_signal_description.transfer_characteristics;
257 mColorSpace =
258 pVideoFormat->video_signal_description.matrix_coefficients;
259 }
260
261 CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
262 videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
263 videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
264 videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
265 CUDA_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
266 videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
267 if (pVideoFormat->progressive_sequence)
268 videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
269 else
270 videoDecodeCreateInfo.DeinterlaceMode =
271 cudaVideoDeinterlaceMode_Adaptive;
272 videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
273 // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
274 // NVDEC hardware
275 videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
276 videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
277 videoDecodeCreateInfo.vidLock = mCtxLock;
278 videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
279 videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
280 if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
281 CUDA_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
282 mOutputWidth = mLumaWidth;
283 mOutputHeight = mLumaHeight;
284 CUDA_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
285 unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
286 if (mOutBufferSize < newOutBufferSize) {
287 mOutBufferSize = newOutBufferSize;
288 }
289 }
290
291 videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
292 videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
293
294 mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
295 mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
296
297 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
298 if (mCudaDecoder != nullptr) {
299 NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
300 mCudaDecoder = nullptr;
301 }
302 {
303 size_t free, total;
304 cuMemGetInfo(&free, &total);
305 CUDA_DPRINT("free memory %g M, total %g M", free / 1048576.0,
306 total / 1048576.0);
307 }
308 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
309 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
310 NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
311 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
312 CUDA_DPRINT("successfully called. decoder %p", mCudaDecoder);
313 return nDecodeSurface;
314 }
315
HandlePictureDecode(CUVIDPICPARAMS * pPicParams)316 int MediaCudaVideoHelper::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
317 NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
318 CUDA_DPRINT("successfully called.");
319 return 1;
320 }
321
HandlePictureDisplay(CUVIDPARSERDISPINFO * pDispInfo)322 int MediaCudaVideoHelper::HandlePictureDisplay(CUVIDPARSERDISPINFO* pDispInfo) {
323 if (mIgnoreDecoderOutput) {
324 return 1;
325 }
326 constexpr int MAX_NUM_INPUT_WITHOUT_OUTPUT = 16;
327 if (mNumOutputFrame == 0 && mNumInputFrame > MAX_NUM_INPUT_WITHOUT_OUTPUT) {
328 // after more than 16 inputs, there is still no output,
329 // probably corrupted stream, ignore everything from now on
330 dprint("WARNING: %d frames decoded witout any output, possibly bad "
331 "input stream. Ignore output frames (they might be corrupted) "
332 "from now on.",
333 MAX_NUM_INPUT_WITHOUT_OUTPUT);
334 return 0;
335 }
336
337 CUVIDPROCPARAMS videoProcessingParameters = {};
338 videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
339 videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
340 videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
341 videoProcessingParameters.unpaired_field =
342 pDispInfo->repeat_first_field < 0;
343 videoProcessingParameters.output_stream = 0;
344 uint64_t myOutputPts = pDispInfo->timestamp;
345
346 CUdeviceptr dpSrcFrame = 0;
347 unsigned int nSrcPitch = 0;
348 CUresult errorCode = cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
349 &dpSrcFrame, &nSrcPitch,
350 &videoProcessingParameters);
351 if (errorCode != CUDA_SUCCESS) {
352 CUDA_DPRINT("failed to call cuvidMapVideoFrame with error code %d\n", (int)errorCode);
353 return 0;
354 }
355
356 NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
357 unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
358 std::vector<uint8_t> myFrame;
359 TextureFrame texFrame;
360 if (mUseGpuTexture && mTexturePool != nullptr) {
361 media_cuda_utils_copy_context my_copy_context{
362 .src_frame = dpSrcFrame,
363 .src_pitch = nSrcPitch,
364 .src_surface_height = mSurfaceHeight,
365 .dest_width = mOutputWidth,
366 .dest_height = mOutputHeight,
367 };
368 texFrame = mTexturePool->getTextureFrame(mOutputWidth, mOutputHeight);
369 mTexturePool->saveDecodedFrameToTexture(
370 texFrame, &my_copy_context,
371 (void*)media_cuda_utils_nv12_updater);
372 } else {
373 myFrame.resize(newOutBufferSize);
374 uint8_t* pDecodedFrame = &(myFrame[0]);
375
376 CUDA_MEMCPY2D m = {0};
377 m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
378 m.srcDevice = dpSrcFrame;
379 m.srcPitch = nSrcPitch;
380 m.dstMemoryType = CU_MEMORYTYPE_HOST;
381 m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
382 m.dstPitch = mOutputWidth * mBPP;
383 m.WidthInBytes = mOutputWidth * mBPP;
384 m.Height = mLumaHeight;
385 CUDA_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
386 m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
387 (int)m.Height);
388
389 NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
390
391 m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
392 m.srcPitch * mSurfaceHeight);
393 m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
394 m.dstPitch * mLumaHeight);
395 m.Height = mChromaHeight;
396 NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
397 YuvConverter<uint8_t> convert8(mOutputWidth, mOutputHeight);
398 convert8.UVInterleavedToPlanar(pDecodedFrame);
399 }
400
401 NVDEC_API_CALL(cuStreamSynchronize(0));
402 NVDEC_API_CALL(cuCtxPopCurrent(NULL));
403
404 NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
405 {
406 std::lock_guard<std::mutex> g(mFrameLock);
407
408 mSavedDecodedFrames.push_back(MediaSnapshotState::FrameInfo{
409 std::move(myFrame),
410 std::vector<uint32_t>{texFrame.Ytex, texFrame.UVtex},
411 (int)mOutputWidth, (int)mOutputHeight, myOutputPts,
412 ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
413 mColorSpace}});
414 }
415 ++mNumOutputFrame;
416 CUDA_DPRINT("successfully called.");
417 return 1;
418 }
419
420 } // namespace emulation
421 } // namespace android
422