1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "RenderThread.h"
17 
18 #include "ChannelStream.h"
19 #include "FrameBuffer.h"
20 #include "ReadBuffer.h"
21 #include "RenderChannelImpl.h"
22 #include "RenderThreadInfo.h"
23 #include "RingStream.h"
24 #include "VkDecoderContext.h"
25 #include "aemu/base/HealthMonitor.h"
26 #include "aemu/base/Metrics.h"
27 #include "aemu/base/files/StreamSerializing.h"
28 #include "aemu/base/synchronization/Lock.h"
29 #include "aemu/base/synchronization/MessageChannel.h"
30 #include "aemu/base/system/System.h"
31 #include "apigen-codec-common/ChecksumCalculatorThreadInfo.h"
32 #include "host-common/logging.h"
33 #include "vulkan/VkCommonOperations.h"
34 
35 #if GFXSTREAM_ENABLE_HOST_GLES
36 #include "RenderControl.h"
37 #endif
38 
39 #define EMUGL_DEBUG_LEVEL 0
40 #include "host-common/debug.h"
41 
42 #ifndef _WIN32
43 #include <unistd.h>
44 #endif
45 
46 #include <assert.h>
47 #include <string.h>
48 
49 #include <unordered_map>
50 
51 namespace gfxstream {
52 
53 using android::base::AutoLock;
54 using android::base::EventHangMetadata;
55 using android::base::MessageChannel;
56 using emugl::GfxApiLogger;
57 using vk::VkDecoderContext;
58 
59 struct RenderThread::SnapshotObjects {
60     RenderThreadInfo* threadInfo;
61     ChecksumCalculator* checksumCalc;
62     ChannelStream* channelStream;
63     RingStream* ringStream;
64     ReadBuffer* readBuffer;
65 };
66 
getBenchmarkEnabledFromEnv()67 static bool getBenchmarkEnabledFromEnv() {
68     auto threadEnabled = android::base::getEnvironmentVariable("ANDROID_EMUGL_RENDERTHREAD_STATS");
69     if (threadEnabled == "1") return true;
70     return false;
71 }
72 
73 // Start with a smaller buffer to not waste memory on a low-used render threads.
74 static constexpr int kStreamBufferSize = 128 * 1024;
75 
76 // Requires this many threads on the system available to run unlimited.
77 static constexpr int kMinThreadsToRunUnlimited = 5;
78 
79 // A thread run limiter that limits render threads to run one slice at a time.
80 static android::base::Lock sThreadRunLimiter;
81 
RenderThread(RenderChannelImpl * channel,android::base::Stream * loadStream,uint32_t virtioGpuContextId)82 RenderThread::RenderThread(RenderChannelImpl* channel,
83                            android::base::Stream* loadStream,
84                            uint32_t virtioGpuContextId)
85     : android::base::Thread(android::base::ThreadFlags::MaskSignals, 2 * 1024 * 1024),
86       mChannel(channel),
87       mRunInLimitedMode(android::base::getCpuCoreCount() < kMinThreadsToRunUnlimited),
88       mContextId(virtioGpuContextId)
89 {
90     if (loadStream) {
91         const bool success = loadStream->getByte();
92         if (success) {
93             mStream.emplace(0);
94             android::base::loadStream(loadStream, &*mStream);
95             mState = SnapshotState::StartLoading;
96         } else {
97             mFinished.store(true, std::memory_order_relaxed);
98         }
99     }
100 }
101 
RenderThread(struct asg_context context,android::base::Stream * loadStream,android::emulation::asg::ConsumerCallbacks callbacks,uint32_t contextId,uint32_t capsetId,std::optional<std::string> nameOpt)102 RenderThread::RenderThread(
103         struct asg_context context,
104         android::base::Stream* loadStream,
105         android::emulation::asg::ConsumerCallbacks callbacks,
106         uint32_t contextId, uint32_t capsetId,
107         std::optional<std::string> nameOpt)
108     : android::base::Thread(android::base::ThreadFlags::MaskSignals, 2 * 1024 * 1024,
109                             std::move(nameOpt)),
110       mRingStream(
111           new RingStream(context, callbacks, kStreamBufferSize)),
112       mContextId(contextId), mCapsetId(capsetId) {
113     if (loadStream) {
114         const bool success = loadStream->getByte();
115         if (success) {
116             mStream.emplace(0);
117             android::base::loadStream(loadStream, &*mStream);
118             mState = SnapshotState::StartLoading;
119         } else {
120             mFinished.store(true, std::memory_order_relaxed);
121         }
122     }
123 }
124 
125 // Note: the RenderThread destructor might be called from a different thread
126 // than from RenderThread::main() so thread specific cleanup likely belongs at
127 // the end of RenderThread::main().
128 RenderThread::~RenderThread() = default;
129 
pausePreSnapshot()130 void RenderThread::pausePreSnapshot() {
131     AutoLock lock(mLock);
132     assert(mState == SnapshotState::Empty);
133     mStream.emplace();
134     mState = SnapshotState::StartSaving;
135     if (mRingStream) {
136         mRingStream->pausePreSnapshot();
137         // mCondVar.broadcastAndUnlock(&lock);
138     }
139     if (mChannel) {
140         mChannel->pausePreSnapshot();
141         mCondVar.broadcastAndUnlock(&lock);
142     }
143 }
144 
resume(bool waitForSave)145 void RenderThread::resume(bool waitForSave) {
146     AutoLock lock(mLock);
147     // This function can be called for a thread from pre-snapshot loading
148     // state; it doesn't need to do anything.
149     if (mState == SnapshotState::Empty) {
150         return;
151     }
152     if (mRingStream) mRingStream->resume();
153     if (waitForSave) {
154         waitForSnapshotCompletion(&lock);
155     }
156     mNeedReloadProcessResources = true;
157     mStream.clear();
158     mState = SnapshotState::Empty;
159     if (mChannel) mChannel->resume();
160     if (mRingStream) mRingStream->resume();
161     mCondVar.broadcastAndUnlock(&lock);
162 }
163 
save(android::base::Stream * stream)164 void RenderThread::save(android::base::Stream* stream) {
165     bool success;
166     {
167         AutoLock lock(mLock);
168         assert(mState == SnapshotState::StartSaving ||
169                mState == SnapshotState::InProgress ||
170                mState == SnapshotState::Finished);
171         waitForSnapshotCompletion(&lock);
172         success = mState == SnapshotState::Finished;
173     }
174 
175     if (success) {
176         assert(mStream);
177         stream->putByte(1);
178         android::base::saveStream(stream, *mStream);
179     } else {
180         stream->putByte(0);
181     }
182 }
183 
waitForSnapshotCompletion(AutoLock * lock)184 void RenderThread::waitForSnapshotCompletion(AutoLock* lock) {
185     while (mState != SnapshotState::Finished &&
186            !mFinished.load(std::memory_order_relaxed)) {
187         mCondVar.wait(lock);
188     }
189 }
190 
191 template <class OpImpl>
snapshotOperation(AutoLock * lock,OpImpl && implFunc)192 void RenderThread::snapshotOperation(AutoLock* lock, OpImpl&& implFunc) {
193     assert(isPausedForSnapshotLocked());
194     mState = SnapshotState::InProgress;
195     mCondVar.broadcastAndUnlock(lock);
196 
197     implFunc();
198 
199     lock->lock();
200 
201     mState = SnapshotState::Finished;
202     mCondVar.broadcast();
203 
204     // Only return after we're allowed to proceed.
205     while (isPausedForSnapshotLocked()) {
206         mCondVar.wait(lock);
207     }
208 }
209 
loadImpl(AutoLock * lock,const SnapshotObjects & objects)210 void RenderThread::loadImpl(AutoLock* lock, const SnapshotObjects& objects) {
211     snapshotOperation(lock, [this, &objects] {
212         objects.readBuffer->onLoad(&*mStream);
213         if (objects.channelStream) objects.channelStream->load(&*mStream);
214         if (objects.ringStream) objects.ringStream->load(&*mStream);
215         objects.checksumCalc->load(&*mStream);
216         objects.threadInfo->onLoad(&*mStream);
217     });
218 }
219 
saveImpl(AutoLock * lock,const SnapshotObjects & objects)220 void RenderThread::saveImpl(AutoLock* lock, const SnapshotObjects& objects) {
221     snapshotOperation(lock, [this, &objects] {
222         objects.readBuffer->onSave(&*mStream);
223         if (objects.channelStream) objects.channelStream->save(&*mStream);
224         if (objects.ringStream) objects.ringStream->save(&*mStream);
225         objects.checksumCalc->save(&*mStream);
226         objects.threadInfo->onSave(&*mStream);
227     });
228 }
229 
isPausedForSnapshotLocked() const230 bool RenderThread::isPausedForSnapshotLocked() const {
231     return mState != SnapshotState::Empty;
232 }
233 
doSnapshotOperation(const SnapshotObjects & objects,SnapshotState state)234 bool RenderThread::doSnapshotOperation(const SnapshotObjects& objects,
235                                        SnapshotState state) {
236     AutoLock lock(mLock);
237     if (mState == state) {
238         switch (state) {
239             case SnapshotState::StartLoading:
240                 loadImpl(&lock, objects);
241                 return true;
242             case SnapshotState::StartSaving:
243                 saveImpl(&lock, objects);
244                 return true;
245             default:
246                 return false;
247         }
248     }
249     return false;
250 }
251 
setFinished()252 void RenderThread::setFinished() {
253     // Make sure it never happens that we wait forever for the thread to
254     // save to snapshot while it was not even going to.
255     AutoLock lock(mLock);
256     mFinished.store(true, std::memory_order_relaxed);
257     if (mState != SnapshotState::Empty) {
258         mCondVar.broadcastAndUnlock(&lock);
259     }
260 }
261 
main()262 intptr_t RenderThread::main() {
263     if (mFinished.load(std::memory_order_relaxed)) {
264         ERR("Error: fail loading a RenderThread @%p", this);
265         return 0;
266     }
267 
268     RenderThreadInfo tInfo;
269     ChecksumCalculatorThreadInfo tChecksumInfo;
270     ChecksumCalculator& checksumCalc = tChecksumInfo.get();
271     bool needRestoreFromSnapshot = false;
272 
273     //
274     // initialize decoders
275 #if GFXSTREAM_ENABLE_HOST_GLES
276     if (!FrameBuffer::getFB()->getFeatures().GuestUsesAngle.enabled) {
277         tInfo.initGl();
278     }
279 
280     initRenderControlContext(&tInfo.m_rcDec);
281 #endif
282 
283     if (!mChannel && !mRingStream) {
284         GL_LOG("Exited a loader RenderThread @%p", this);
285         mFinished.store(true, std::memory_order_relaxed);
286         return 0;
287     }
288 
289     ChannelStream stream(mChannel, RenderChannel::Buffer::kSmallSize);
290     IOStream* ioStream =
291         mChannel ? (IOStream*)&stream : (IOStream*)mRingStream.get();
292 
293     ReadBuffer readBuf(kStreamBufferSize);
294     if (mRingStream) {
295         readBuf.setNeededFreeTailSize(0);
296     }
297 
298     const SnapshotObjects snapshotObjects = {
299         &tInfo, &checksumCalc, &stream, mRingStream.get(), &readBuf,
300     };
301 
302     // Framebuffer initialization is asynchronous, so we need to make sure
303     // it's completely initialized before running any GL commands.
304     FrameBuffer::waitUntilInitialized();
305     if (vk::getGlobalVkEmulation()) {
306         tInfo.m_vkInfo.emplace();
307     }
308 
309 #if GFXSTREAM_ENABLE_HOST_MAGMA
310     tInfo.m_magmaInfo.emplace(mContextId);
311 #endif
312 
313     // This is the only place where we try loading from snapshot.
314     // But the context bind / restoration will be delayed after receiving
315     // the first GL command.
316     if (doSnapshotOperation(snapshotObjects, SnapshotState::StartLoading)) {
317         GL_LOG("Loaded RenderThread @%p from snapshot", this);
318         needRestoreFromSnapshot = true;
319     } else {
320         // Not loading from a snapshot: continue regular startup, read
321         // the |flags|.
322         uint32_t flags = 0;
323         while (ioStream->read(&flags, sizeof(flags)) != sizeof(flags)) {
324             // Stream read may fail because of a pending snapshot.
325             if (!doSnapshotOperation(snapshotObjects, SnapshotState::StartSaving)) {
326                 setFinished();
327                 GL_LOG("Exited a RenderThread @%p early", this);
328                 return 0;
329             }
330         }
331 
332         // |flags| used to mean something, now they're not used.
333         (void)flags;
334     }
335 
336     int stats_totalBytes = 0;
337     uint64_t stats_progressTimeUs = 0;
338     auto stats_t0 = android::base::getHighResTimeUs() / 1000;
339     bool benchmarkEnabled = getBenchmarkEnabledFromEnv();
340 
341     //
342     // open dump file if RENDER_DUMP_DIR is defined
343     //
344     const char* dump_dir = getenv("RENDERER_DUMP_DIR");
345     FILE* dumpFP = nullptr;
346     if (dump_dir) {
347         // size_t bsize = strlen(dump_dir) + 32;
348         // char* fname = new char[bsize];
349         // snprintf(fname, bsize, "%s" PATH_SEP "stream_%p", dump_dir, this);
350         // dumpFP = android_fopen(fname, "wb");
351         // if (!dumpFP) {
352         //     fprintf(stderr, "Warning: stream dump failed to open file %s\n",
353         //             fname);
354         // }
355         // delete[] fname;
356     }
357 
358     GfxApiLogger gfxLogger;
359     auto& metricsLogger = FrameBuffer::getFB()->getMetricsLogger();
360 
361     const ProcessResources* processResources = nullptr;
362     bool anyProgress = false;
363     while (true) {
364         // Let's make sure we read enough data for at least some processing.
365         uint32_t packetSize;
366         if (readBuf.validData() >= 8) {
367             // We know that packet size is the second int32_t from the start.
368             packetSize = *(uint32_t*)(readBuf.buf() + 4);
369             if (!packetSize) {
370                 // Emulator will get live-stuck here if packet size is read to be zero;
371                 // crash right away so we can see these events.
372                 // emugl::emugl_crash_reporter(
373                 //     "Guest should never send a size-0 GL packet\n");
374             }
375         } else {
376             // Read enough data to at least be able to get the packet size next
377             // time.
378             packetSize = 8;
379         }
380         if (!anyProgress) {
381             // If we didn't make any progress last time, then make sure we read at least one
382             // extra byte.
383             packetSize = std::max(packetSize, static_cast<uint32_t>(readBuf.validData() + 1));
384         }
385         int stat = 0;
386         if (packetSize > readBuf.validData()) {
387             stat = readBuf.getData(ioStream, packetSize);
388             if (stat <= 0) {
389                 if (doSnapshotOperation(snapshotObjects, SnapshotState::StartSaving)) {
390                     continue;
391                 } else {
392                     D("Warning: render thread could not read data from stream");
393                     break;
394                 }
395             } else if (needRestoreFromSnapshot) {
396                 // If we're using RingStream that might load before FrameBuffer
397                 // restores the contexts from the handles, so check again here.
398 
399                 tInfo.postLoadRefreshCurrentContextSurfacePtrs();
400                 needRestoreFromSnapshot = false;
401             }
402             if (mNeedReloadProcessResources) {
403                 processResources = nullptr;
404                 mNeedReloadProcessResources = false;
405             }
406         }
407 
408         DD("render thread read %i bytes, op %i, packet size %i",
409            readBuf.validData(), *(uint32_t*)readBuf.buf(),
410            *(uint32_t*)(readBuf.buf() + 4));
411 
412         //
413         // log received bandwidth statistics
414         //
415         if (benchmarkEnabled) {
416             stats_totalBytes += readBuf.validData();
417             auto dt = android::base::getHighResTimeUs() / 1000 - stats_t0;
418             if (dt > 1000) {
419                 float dts = (float)dt / 1000.0f;
420                 printf("Used Bandwidth %5.3f MB/s, time in progress %f ms total %f ms\n", ((float)stats_totalBytes / dts) / (1024.0f*1024.0f),
421                         stats_progressTimeUs / 1000.0f,
422                         (float)dt);
423                 readBuf.printStats();
424                 stats_t0 = android::base::getHighResTimeUs() / 1000;
425                 stats_progressTimeUs = 0;
426                 stats_totalBytes = 0;
427             }
428         }
429 
430         //
431         // dump stream to file if needed
432         //
433         if (dumpFP) {
434             int skip = readBuf.validData() - stat;
435             fwrite(readBuf.buf() + skip, 1, readBuf.validData() - skip, dumpFP);
436             fflush(dumpFP);
437         }
438 
439         bool progress = false;
440         anyProgress = false;
441         do {
442             anyProgress |= progress;
443             std::unique_ptr<EventHangMetadata::HangAnnotations> renderThreadData =
444                 std::make_unique<EventHangMetadata::HangAnnotations>();
445 
446             const char* contextName = nullptr;
447             if (mNameOpt) {
448                 contextName = (*mNameOpt).c_str();
449             }
450 
451             auto* healthMonitor = FrameBuffer::getFB()->getHealthMonitor();
452             if (healthMonitor) {
453                 if (contextName) {
454                     renderThreadData->insert(
455                         {{"renderthread_guest_process", contextName}});
456                 }
457                 if (readBuf.validData() >= 4) {
458                     renderThreadData->insert(
459                         {{"first_opcode", std::to_string(*(uint32_t*)readBuf.buf())},
460                          {"buffer_length", std::to_string(readBuf.validData())}});
461                 }
462             }
463             auto watchdog = WATCHDOG_BUILDER(healthMonitor, "RenderThread decode operation")
464                                 .setHangType(EventHangMetadata::HangType::kRenderThread)
465                                 .setAnnotations(std::move(renderThreadData))
466                                 .build();
467 
468             if (!tInfo.m_puid) {
469                 tInfo.m_puid = mContextId;
470             }
471 
472             if (!processResources && tInfo.m_puid && tInfo.m_puid != INVALID_CONTEXT_ID) {
473                 processResources = FrameBuffer::getFB()->getProcessResources(tInfo.m_puid);
474             }
475 
476             progress = false;
477             size_t last;
478 
479             //
480             // try to process some of the command buffer using the
481             // Vulkan decoder
482             //
483             // Note: It's risky to limit Vulkan decoding to one thread,
484             // so we do it outside the limiter
485             if (tInfo.m_vkInfo) {
486                 tInfo.m_vkInfo->ctx_id = mContextId;
487                 VkDecoderContext context = {
488                     .processName = contextName,
489                     .gfxApiLogger = &gfxLogger,
490                     .healthMonitor = FrameBuffer::getFB()->getHealthMonitor(),
491                     .metricsLogger = &metricsLogger,
492                 };
493                 last = tInfo.m_vkInfo->m_vkDec.decode(readBuf.buf(), readBuf.validData(), ioStream,
494                                                       processResources, context);
495                 if (last > 0) {
496                     if (!processResources) {
497                         ERR("Processed some Vulkan packets without process resources created. "
498                             "That's problematic.");
499                     }
500                     readBuf.consume(last);
501                     progress = true;
502                 }
503             }
504 
505             if (mRunInLimitedMode) {
506                 sThreadRunLimiter.lock();
507             }
508 
509             // try to process some of the command buffer using the GLESv1
510             // decoder
511             //
512             // DRIVER WORKAROUND:
513             // On Linux with NVIDIA GPU's at least, we need to avoid performing
514             // GLES ops while someone else holds the FrameBuffer write lock.
515             //
516             // To be more specific, on Linux with NVIDIA Quadro K2200 v361.xx,
517             // we get a segfault in the NVIDIA driver when glTexSubImage2D
518             // is called at the same time as glXMake(Context)Current.
519             //
520             // To fix, this driver workaround avoids calling
521             // any sort of GLES call when we are creating/destroying EGL
522             // contexts.
523             {
524                 FrameBuffer::getFB()->lockContextStructureRead();
525             }
526 
527 #if GFXSTREAM_ENABLE_HOST_GLES
528             if (tInfo.m_glInfo) {
529                 {
530                     last = tInfo.m_glInfo->m_glDec.decode(
531                             readBuf.buf(), readBuf.validData(), ioStream, &checksumCalc);
532                     if (last > 0) {
533                         progress = true;
534                         readBuf.consume(last);
535                     }
536                 }
537 
538                 //
539                 // try to process some of the command buffer using the GLESv2
540                 // decoder
541                 //
542                 {
543                     last = tInfo.m_glInfo->m_gl2Dec.decode(readBuf.buf(), readBuf.validData(),
544                                                            ioStream, &checksumCalc);
545 
546                     if (last > 0) {
547                         progress = true;
548                         readBuf.consume(last);
549                     }
550                 }
551             }
552 #endif
553 
554             FrameBuffer::getFB()->unlockContextStructureRead();
555             //
556             // try to process some of the command buffer using the
557             // renderControl decoder
558             //
559 #if GFXSTREAM_ENABLE_HOST_GLES
560             {
561                 last = tInfo.m_rcDec.decode(readBuf.buf(), readBuf.validData(),
562                                             ioStream, &checksumCalc);
563                 if (last > 0) {
564                     readBuf.consume(last);
565                     progress = true;
566                 }
567             }
568 #endif
569 
570             //
571             // try to process some of the command buffer using the Magma
572             // decoder
573             //
574 #if GFXSTREAM_ENABLE_HOST_MAGMA
575             if (tInfo.m_magmaInfo && tInfo.m_magmaInfo->mMagmaDec)
576             {
577                 last = tInfo.m_magmaInfo->mMagmaDec->decode(readBuf.buf(), readBuf.validData(),
578                                                             ioStream, &checksumCalc);
579                 if (last > 0) {
580                     readBuf.consume(last);
581                     progress = true;
582                 }
583             }
584 #endif
585 
586             if (mRunInLimitedMode) {
587                 sThreadRunLimiter.unlock();
588             }
589 
590         } while (progress);
591     }
592 
593     if (dumpFP) {
594         fclose(dumpFP);
595     }
596 
597 #if GFXSTREAM_ENABLE_HOST_GLES
598     if (tInfo.m_glInfo) {
599         FrameBuffer::getFB()->drainGlRenderThreadResources();
600     }
601 #endif
602 
603     setFinished();
604 
605     GL_LOG("Exited a RenderThread @%p", this);
606     return 0;
607 }
608 
609 }  // namespace gfxstream
610