1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "host-common/MediaCudaVideoHelper.h"
16 #include "host-common/MediaCudaDriverHelper.h"
17 #include "host-common/MediaCudaUtils.h"
18 #include "host-common/YuvConverter.h"
19 #include "android/utils/debug.h"
20 
21 extern "C" {
22 #define INIT_CUDA_GL 1
23 #include "host-common/dynlink_cuda.h"
24 #include "host-common/dynlink_cudaGL.h"
25 #include "host-common/dynlink_nvcuvid.h"
26 }
27 #define MEDIA_CUDA_DEBUG 0
28 
29 #if MEDIA_CUDA_DEBUG
30 #define CUDA_DPRINT(fmt, ...)                                             \
31     fprintf(stderr, "media-cuda-video-helper: %s:%d " fmt "\n", __func__, \
32             __LINE__, ##__VA_ARGS__);
33 #else
34 #define CUDA_DPRINT(fmt, ...)
35 #endif
36 
37 #define NVDEC_API_CALL(cuvidAPI)                                     \
38     do {                                                             \
39         CUresult errorCode = cuvidAPI;                               \
40         if (errorCode != CUDA_SUCCESS) {                             \
41             CUDA_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
42                         (int)errorCode);                             \
43         }                                                            \
44     } while (0)
45 
46 namespace android {
47 namespace emulation {
48 
49 bool MediaCudaVideoHelper::s_isCudaDecoderGood = true;
50 
51 using TextureFrame = MediaTexturePool::TextureFrame;
52 using FrameInfo = MediaSnapshotState::FrameInfo;
53 using ColorAspects = MediaSnapshotState::ColorAspects;
54 
MediaCudaVideoHelper(OutputTreatmentMode oMode,FrameStorageMode fMode,cudaVideoCodec cudaVideoCodecType)55 MediaCudaVideoHelper::MediaCudaVideoHelper(OutputTreatmentMode oMode,
56                                            FrameStorageMode fMode,
57                                            cudaVideoCodec cudaVideoCodecType)
58     : mUseGpuTexture(fMode == FrameStorageMode::USE_GPU_TEXTURE),
59       mCudaVideoCodecType(cudaVideoCodecType) {
60     mIgnoreDecoderOutput = (oMode == OutputTreatmentMode::IGNORE_RESULT);
61 }
62 
~MediaCudaVideoHelper()63 MediaCudaVideoHelper::~MediaCudaVideoHelper() {
64     deInit();
65 }
66 
deInit()67 void MediaCudaVideoHelper::deInit() {
68     CUDA_DPRINT("deInit calling");
69 
70     mSavedDecodedFrames.clear();
71     if (mCudaContext != nullptr) {
72         NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
73         if (mCudaParser != nullptr) {
74             NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
75             mCudaParser = nullptr;
76         }
77 
78         if (mCudaDecoder != nullptr) {
79             NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
80             mCudaDecoder = nullptr;
81         }
82         NVDEC_API_CALL(cuCtxPopCurrent(NULL));
83         NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
84     }
85 
86     if (mCudaContext != nullptr) {
87         CUresult myres = cuCtxDestroy(mCudaContext);
88         if (myres != CUDA_SUCCESS) {
89             CUDA_DPRINT("Failed to destroy cuda context; error code %d",
90                         (int)myres);
91         }
92         mCudaContext = nullptr;
93     }
94 }
95 
init()96 bool MediaCudaVideoHelper::init() {
97     if (!s_isCudaDecoderGood) {
98         CUDA_DPRINT(
99                 "Already verified: cuda decoder does not work on this host");
100         return false;
101     }
102     if (!MediaCudaDriverHelper::initCudaDrivers()) {
103         CUDA_DPRINT("Failed to initCudaDrivers");
104         mIsGood = false;
105         mErrorCode = 1;
106         s_isCudaDecoderGood = false;
107         return false;
108     }
109 
110     if (mCudaContext != nullptr) {
111         deInit();
112     }
113 
114     // cudat stuff
115     const int gpuIndex = 0;
116     const int cudaFlags = 0;
117     CUdevice cudaDevice = 0;
118     CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
119     if (myres != CUDA_SUCCESS) {
120         mIsGood = false;
121         mErrorCode = 2;
122         s_isCudaDecoderGood = false;
123         CUDA_DPRINT("Failed to get cuda device, error code %d", (int)myres);
124         return false;
125     }
126 
127     char buf[1024];
128     myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
129     if (myres != CUDA_SUCCESS) {
130         mIsGood = false;
131         mErrorCode = 3;
132         s_isCudaDecoderGood = false;
133         CUDA_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
134         return false;
135     }
136 
137     CUDA_DPRINT("using gpu device %s", buf);
138 
139     myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
140     if (myres != CUDA_SUCCESS) {
141         mIsGood = false;
142         s_isCudaDecoderGood = false;
143         CUDA_DPRINT("Failed to create cuda context, error code %d", (int)myres);
144         return false;
145     }
146 
147     NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
148 
149     CUVIDPARSERPARAMS videoParserParameters = {};
150     // videoParserParameters.CodecType = (mType == MediaCodecType::VP8Codec) ?
151     // cudaVideoCodec_VP8 : cudaVideoCodec_VP9;
152     videoParserParameters.CodecType = mCudaVideoCodecType;
153 
154     videoParserParameters.ulMaxNumDecodeSurfaces = 1;
155     videoParserParameters.ulMaxDisplayDelay = 1;
156     videoParserParameters.pUserData = this;
157     videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
158     videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
159     videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
160     NVDEC_API_CALL(
161             cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
162 
163     CUDA_DPRINT("Successfully created cuda context %p", mCudaContext);
164     dprint("successfully created cuda video decoder for %s, with gpu texture "
165            "mode %s",
166            mCudaVideoCodecType == cudaVideoCodec_H264
167                    ? "H264"
168                    : (mCudaVideoCodecType == cudaVideoCodec_VP8 ? "VP8"
169                                                                 : "VP9"),
170            mUseGpuTexture ? "on" : "off");
171 
172     return true;
173 }
174 
decode(const uint8_t * frame,size_t szBytes,uint64_t inputPts)175 void MediaCudaVideoHelper::decode(const uint8_t* frame,
176                                   size_t szBytes,
177                                   uint64_t inputPts) {
178     CUDA_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
179 
180     CUVIDSOURCEDATAPACKET packet = {0};
181     packet.payload = frame;
182     packet.payload_size = szBytes;
183     packet.flags = CUVID_PKT_TIMESTAMP;
184     packet.timestamp = inputPts;
185     if (!frame || szBytes == 0) {
186         packet.flags |= CUVID_PKT_ENDOFSTREAM;
187     } else {
188         ++mNumInputFrame;
189     }
190     NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
191 }
192 
flush()193 void MediaCudaVideoHelper::flush() {
194     CUDA_DPRINT("started flushing");
195     CUVIDSOURCEDATAPACKET packet = {0};
196     packet.payload = NULL;
197     packet.payload_size = 0;
198     packet.flags |= CUVID_PKT_ENDOFSTREAM;
199     NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
200     CUDA_DPRINT("done one flushing");
201 }
202 
HandleVideoSequence(CUVIDEOFORMAT * pVideoFormat)203 int MediaCudaVideoHelper::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
204     int nDecodeSurface = 8;  // need 8 for 4K video
205 
206     CUVIDDECODECAPS decodecaps;
207     memset(&decodecaps, 0, sizeof(decodecaps));
208 
209     decodecaps.eCodecType = pVideoFormat->codec;
210     decodecaps.eChromaFormat = pVideoFormat->chroma_format;
211     decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
212 
213     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
214     NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
215     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
216 
217     if (!decodecaps.bIsSupported) {
218         mIsGood = false;
219         mErrorCode = 4;
220         CUDA_DPRINT("Codec not supported on this GPU.");
221         return nDecodeSurface;
222     }
223 
224     if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
225         (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
226         CUDA_DPRINT("Resolution not supported on this GPU");
227         mIsGood = false;
228         mErrorCode = 5;
229         return nDecodeSurface;
230     }
231 
232     if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
233         decodecaps.nMaxMBCount) {
234         CUDA_DPRINT("MBCount not supported on this GPU");
235         mIsGood = false;
236         mErrorCode = 6;
237         return nDecodeSurface;
238     }
239 
240     mLumaWidth =
241             pVideoFormat->display_area.right - pVideoFormat->display_area.left;
242     mLumaHeight =
243             pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
244     mChromaHeight = mLumaHeight * 0.5;  // NV12
245     mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
246 
247     if (mCudaVideoCodecType == cudaVideoCodec_H264) {
248         if (pVideoFormat->video_signal_description.video_full_range_flag)
249             mColorRange = 2;
250         else
251             mColorRange = 0;
252 
253         mColorPrimaries =
254                 pVideoFormat->video_signal_description.color_primaries;
255         mColorTransfer =
256                 pVideoFormat->video_signal_description.transfer_characteristics;
257         mColorSpace =
258                 pVideoFormat->video_signal_description.matrix_coefficients;
259     }
260 
261     CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
262     videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
263     videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
264     videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
265     CUDA_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
266     videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
267     if (pVideoFormat->progressive_sequence)
268         videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
269     else
270         videoDecodeCreateInfo.DeinterlaceMode =
271                 cudaVideoDeinterlaceMode_Adaptive;
272     videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
273     // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
274     // NVDEC hardware
275     videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
276     videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
277     videoDecodeCreateInfo.vidLock = mCtxLock;
278     videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
279     videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
280     if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
281         CUDA_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
282         mOutputWidth = mLumaWidth;
283         mOutputHeight = mLumaHeight;
284         CUDA_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
285         unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
286         if (mOutBufferSize < newOutBufferSize) {
287             mOutBufferSize = newOutBufferSize;
288         }
289     }
290 
291     videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
292     videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
293 
294     mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
295     mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
296 
297     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
298     if (mCudaDecoder != nullptr) {
299         NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
300         mCudaDecoder = nullptr;
301     }
302     {
303         size_t free, total;
304         cuMemGetInfo(&free, &total);
305         CUDA_DPRINT("free memory %g M, total %g M", free / 1048576.0,
306                     total / 1048576.0);
307     }
308     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
309     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
310     NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
311     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
312     CUDA_DPRINT("successfully called. decoder %p", mCudaDecoder);
313     return nDecodeSurface;
314 }
315 
HandlePictureDecode(CUVIDPICPARAMS * pPicParams)316 int MediaCudaVideoHelper::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
317     NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
318     CUDA_DPRINT("successfully called.");
319     return 1;
320 }
321 
HandlePictureDisplay(CUVIDPARSERDISPINFO * pDispInfo)322 int MediaCudaVideoHelper::HandlePictureDisplay(CUVIDPARSERDISPINFO* pDispInfo) {
323     if (mIgnoreDecoderOutput) {
324         return 1;
325     }
326     constexpr int MAX_NUM_INPUT_WITHOUT_OUTPUT = 16;
327     if (mNumOutputFrame == 0 && mNumInputFrame > MAX_NUM_INPUT_WITHOUT_OUTPUT) {
328         // after more than 16 inputs, there is still no output,
329         // probably corrupted stream, ignore everything from now on
330         dprint("WARNING: %d frames decoded witout any output, possibly bad "
331                "input stream. Ignore output frames (they might be corrupted) "
332                "from now on.",
333                MAX_NUM_INPUT_WITHOUT_OUTPUT);
334         return 0;
335     }
336 
337     CUVIDPROCPARAMS videoProcessingParameters = {};
338     videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
339     videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
340     videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
341     videoProcessingParameters.unpaired_field =
342             pDispInfo->repeat_first_field < 0;
343     videoProcessingParameters.output_stream = 0;
344     uint64_t myOutputPts = pDispInfo->timestamp;
345 
346     CUdeviceptr dpSrcFrame = 0;
347     unsigned int nSrcPitch = 0;
348     CUresult errorCode = cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
349                                       &dpSrcFrame, &nSrcPitch,
350                                       &videoProcessingParameters);
351     if (errorCode != CUDA_SUCCESS) {
352         CUDA_DPRINT("failed to call cuvidMapVideoFrame with error code %d\n", (int)errorCode);
353         return 0;
354     }
355 
356     NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
357     unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
358     std::vector<uint8_t> myFrame;
359     TextureFrame texFrame;
360     if (mUseGpuTexture && mTexturePool != nullptr) {
361         media_cuda_utils_copy_context my_copy_context{
362                 .src_frame = dpSrcFrame,
363                 .src_pitch = nSrcPitch,
364                 .src_surface_height = mSurfaceHeight,
365                 .dest_width = mOutputWidth,
366                 .dest_height = mOutputHeight,
367         };
368         texFrame = mTexturePool->getTextureFrame(mOutputWidth, mOutputHeight);
369         mTexturePool->saveDecodedFrameToTexture(
370                 texFrame, &my_copy_context,
371                 (void*)media_cuda_utils_nv12_updater);
372     } else {
373         myFrame.resize(newOutBufferSize);
374         uint8_t* pDecodedFrame = &(myFrame[0]);
375 
376         CUDA_MEMCPY2D m = {0};
377         m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
378         m.srcDevice = dpSrcFrame;
379         m.srcPitch = nSrcPitch;
380         m.dstMemoryType = CU_MEMORYTYPE_HOST;
381         m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
382         m.dstPitch = mOutputWidth * mBPP;
383         m.WidthInBytes = mOutputWidth * mBPP;
384         m.Height = mLumaHeight;
385         CUDA_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
386                     m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
387                     (int)m.Height);
388 
389         NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
390 
391         m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
392                                     m.srcPitch * mSurfaceHeight);
393         m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
394                                                 m.dstPitch * mLumaHeight);
395         m.Height = mChromaHeight;
396         NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
397         YuvConverter<uint8_t> convert8(mOutputWidth, mOutputHeight);
398         convert8.UVInterleavedToPlanar(pDecodedFrame);
399     }
400 
401     NVDEC_API_CALL(cuStreamSynchronize(0));
402     NVDEC_API_CALL(cuCtxPopCurrent(NULL));
403 
404     NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
405     {
406         std::lock_guard<std::mutex> g(mFrameLock);
407 
408         mSavedDecodedFrames.push_back(MediaSnapshotState::FrameInfo{
409                 std::move(myFrame),
410                 std::vector<uint32_t>{texFrame.Ytex, texFrame.UVtex},
411                 (int)mOutputWidth, (int)mOutputHeight, myOutputPts,
412                 ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
413                              mColorSpace}});
414     }
415     ++mNumOutputFrame;
416     CUDA_DPRINT("successfully called.");
417     return 1;
418 }
419 
420 }  // namespace emulation
421 }  // namespace android
422