/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "RenderThread.h" #include "ChannelStream.h" #include "FrameBuffer.h" #include "ReadBuffer.h" #include "RenderChannelImpl.h" #include "RenderThreadInfo.h" #include "RingStream.h" #include "VkDecoderContext.h" #include "aemu/base/HealthMonitor.h" #include "aemu/base/Metrics.h" #include "aemu/base/files/StreamSerializing.h" #include "aemu/base/synchronization/Lock.h" #include "aemu/base/synchronization/MessageChannel.h" #include "aemu/base/system/System.h" #include "apigen-codec-common/ChecksumCalculatorThreadInfo.h" #include "host-common/logging.h" #include "vulkan/VkCommonOperations.h" #if GFXSTREAM_ENABLE_HOST_GLES #include "RenderControl.h" #endif #define EMUGL_DEBUG_LEVEL 0 #include "host-common/debug.h" #ifndef _WIN32 #include #endif #include #include #include namespace gfxstream { using android::base::AutoLock; using android::base::EventHangMetadata; using android::base::MessageChannel; using emugl::GfxApiLogger; using vk::VkDecoderContext; struct RenderThread::SnapshotObjects { RenderThreadInfo* threadInfo; ChecksumCalculator* checksumCalc; ChannelStream* channelStream; RingStream* ringStream; ReadBuffer* readBuffer; }; static bool getBenchmarkEnabledFromEnv() { auto threadEnabled = android::base::getEnvironmentVariable("ANDROID_EMUGL_RENDERTHREAD_STATS"); if (threadEnabled == "1") return true; return false; } // Start with a smaller buffer to not waste memory on a low-used render threads. static constexpr int kStreamBufferSize = 128 * 1024; // Requires this many threads on the system available to run unlimited. static constexpr int kMinThreadsToRunUnlimited = 5; // A thread run limiter that limits render threads to run one slice at a time. static android::base::Lock sThreadRunLimiter; RenderThread::RenderThread(RenderChannelImpl* channel, android::base::Stream* loadStream, uint32_t virtioGpuContextId) : android::base::Thread(android::base::ThreadFlags::MaskSignals, 2 * 1024 * 1024), mChannel(channel), mRunInLimitedMode(android::base::getCpuCoreCount() < kMinThreadsToRunUnlimited), mContextId(virtioGpuContextId) { if (loadStream) { const bool success = loadStream->getByte(); if (success) { mStream.emplace(0); android::base::loadStream(loadStream, &*mStream); mState = SnapshotState::StartLoading; } else { mFinished.store(true, std::memory_order_relaxed); } } } RenderThread::RenderThread( struct asg_context context, android::base::Stream* loadStream, android::emulation::asg::ConsumerCallbacks callbacks, uint32_t contextId, uint32_t capsetId, std::optional nameOpt) : android::base::Thread(android::base::ThreadFlags::MaskSignals, 2 * 1024 * 1024, std::move(nameOpt)), mRingStream( new RingStream(context, callbacks, kStreamBufferSize)), mContextId(contextId), mCapsetId(capsetId) { if (loadStream) { const bool success = loadStream->getByte(); if (success) { mStream.emplace(0); android::base::loadStream(loadStream, &*mStream); mState = SnapshotState::StartLoading; } else { mFinished.store(true, std::memory_order_relaxed); } } } // Note: the RenderThread destructor might be called from a different thread // than from RenderThread::main() so thread specific cleanup likely belongs at // the end of RenderThread::main(). RenderThread::~RenderThread() = default; void RenderThread::pausePreSnapshot() { AutoLock lock(mLock); assert(mState == SnapshotState::Empty); mStream.emplace(); mState = SnapshotState::StartSaving; if (mRingStream) { mRingStream->pausePreSnapshot(); // mCondVar.broadcastAndUnlock(&lock); } if (mChannel) { mChannel->pausePreSnapshot(); mCondVar.broadcastAndUnlock(&lock); } } void RenderThread::resume(bool waitForSave) { AutoLock lock(mLock); // This function can be called for a thread from pre-snapshot loading // state; it doesn't need to do anything. if (mState == SnapshotState::Empty) { return; } if (mRingStream) mRingStream->resume(); if (waitForSave) { waitForSnapshotCompletion(&lock); } mNeedReloadProcessResources = true; mStream.clear(); mState = SnapshotState::Empty; if (mChannel) mChannel->resume(); if (mRingStream) mRingStream->resume(); mCondVar.broadcastAndUnlock(&lock); } void RenderThread::save(android::base::Stream* stream) { bool success; { AutoLock lock(mLock); assert(mState == SnapshotState::StartSaving || mState == SnapshotState::InProgress || mState == SnapshotState::Finished); waitForSnapshotCompletion(&lock); success = mState == SnapshotState::Finished; } if (success) { assert(mStream); stream->putByte(1); android::base::saveStream(stream, *mStream); } else { stream->putByte(0); } } void RenderThread::waitForSnapshotCompletion(AutoLock* lock) { while (mState != SnapshotState::Finished && !mFinished.load(std::memory_order_relaxed)) { mCondVar.wait(lock); } } template void RenderThread::snapshotOperation(AutoLock* lock, OpImpl&& implFunc) { assert(isPausedForSnapshotLocked()); mState = SnapshotState::InProgress; mCondVar.broadcastAndUnlock(lock); implFunc(); lock->lock(); mState = SnapshotState::Finished; mCondVar.broadcast(); // Only return after we're allowed to proceed. while (isPausedForSnapshotLocked()) { mCondVar.wait(lock); } } void RenderThread::loadImpl(AutoLock* lock, const SnapshotObjects& objects) { snapshotOperation(lock, [this, &objects] { objects.readBuffer->onLoad(&*mStream); if (objects.channelStream) objects.channelStream->load(&*mStream); if (objects.ringStream) objects.ringStream->load(&*mStream); objects.checksumCalc->load(&*mStream); objects.threadInfo->onLoad(&*mStream); }); } void RenderThread::saveImpl(AutoLock* lock, const SnapshotObjects& objects) { snapshotOperation(lock, [this, &objects] { objects.readBuffer->onSave(&*mStream); if (objects.channelStream) objects.channelStream->save(&*mStream); if (objects.ringStream) objects.ringStream->save(&*mStream); objects.checksumCalc->save(&*mStream); objects.threadInfo->onSave(&*mStream); }); } bool RenderThread::isPausedForSnapshotLocked() const { return mState != SnapshotState::Empty; } bool RenderThread::doSnapshotOperation(const SnapshotObjects& objects, SnapshotState state) { AutoLock lock(mLock); if (mState == state) { switch (state) { case SnapshotState::StartLoading: loadImpl(&lock, objects); return true; case SnapshotState::StartSaving: saveImpl(&lock, objects); return true; default: return false; } } return false; } void RenderThread::setFinished() { // Make sure it never happens that we wait forever for the thread to // save to snapshot while it was not even going to. AutoLock lock(mLock); mFinished.store(true, std::memory_order_relaxed); if (mState != SnapshotState::Empty) { mCondVar.broadcastAndUnlock(&lock); } } intptr_t RenderThread::main() { if (mFinished.load(std::memory_order_relaxed)) { ERR("Error: fail loading a RenderThread @%p", this); return 0; } RenderThreadInfo tInfo; ChecksumCalculatorThreadInfo tChecksumInfo; ChecksumCalculator& checksumCalc = tChecksumInfo.get(); bool needRestoreFromSnapshot = false; // // initialize decoders #if GFXSTREAM_ENABLE_HOST_GLES if (!FrameBuffer::getFB()->getFeatures().GuestUsesAngle.enabled) { tInfo.initGl(); } initRenderControlContext(&tInfo.m_rcDec); #endif if (!mChannel && !mRingStream) { GL_LOG("Exited a loader RenderThread @%p", this); mFinished.store(true, std::memory_order_relaxed); return 0; } ChannelStream stream(mChannel, RenderChannel::Buffer::kSmallSize); IOStream* ioStream = mChannel ? (IOStream*)&stream : (IOStream*)mRingStream.get(); ReadBuffer readBuf(kStreamBufferSize); if (mRingStream) { readBuf.setNeededFreeTailSize(0); } const SnapshotObjects snapshotObjects = { &tInfo, &checksumCalc, &stream, mRingStream.get(), &readBuf, }; // Framebuffer initialization is asynchronous, so we need to make sure // it's completely initialized before running any GL commands. FrameBuffer::waitUntilInitialized(); if (vk::getGlobalVkEmulation()) { tInfo.m_vkInfo.emplace(); } #if GFXSTREAM_ENABLE_HOST_MAGMA tInfo.m_magmaInfo.emplace(mContextId); #endif // This is the only place where we try loading from snapshot. // But the context bind / restoration will be delayed after receiving // the first GL command. if (doSnapshotOperation(snapshotObjects, SnapshotState::StartLoading)) { GL_LOG("Loaded RenderThread @%p from snapshot", this); needRestoreFromSnapshot = true; } else { // Not loading from a snapshot: continue regular startup, read // the |flags|. uint32_t flags = 0; while (ioStream->read(&flags, sizeof(flags)) != sizeof(flags)) { // Stream read may fail because of a pending snapshot. if (!doSnapshotOperation(snapshotObjects, SnapshotState::StartSaving)) { setFinished(); GL_LOG("Exited a RenderThread @%p early", this); return 0; } } // |flags| used to mean something, now they're not used. (void)flags; } int stats_totalBytes = 0; uint64_t stats_progressTimeUs = 0; auto stats_t0 = android::base::getHighResTimeUs() / 1000; bool benchmarkEnabled = getBenchmarkEnabledFromEnv(); // // open dump file if RENDER_DUMP_DIR is defined // const char* dump_dir = getenv("RENDERER_DUMP_DIR"); FILE* dumpFP = nullptr; if (dump_dir) { // size_t bsize = strlen(dump_dir) + 32; // char* fname = new char[bsize]; // snprintf(fname, bsize, "%s" PATH_SEP "stream_%p", dump_dir, this); // dumpFP = android_fopen(fname, "wb"); // if (!dumpFP) { // fprintf(stderr, "Warning: stream dump failed to open file %s\n", // fname); // } // delete[] fname; } GfxApiLogger gfxLogger; auto& metricsLogger = FrameBuffer::getFB()->getMetricsLogger(); const ProcessResources* processResources = nullptr; bool anyProgress = false; while (true) { // Let's make sure we read enough data for at least some processing. uint32_t packetSize; if (readBuf.validData() >= 8) { // We know that packet size is the second int32_t from the start. packetSize = *(uint32_t*)(readBuf.buf() + 4); if (!packetSize) { // Emulator will get live-stuck here if packet size is read to be zero; // crash right away so we can see these events. // emugl::emugl_crash_reporter( // "Guest should never send a size-0 GL packet\n"); } } else { // Read enough data to at least be able to get the packet size next // time. packetSize = 8; } if (!anyProgress) { // If we didn't make any progress last time, then make sure we read at least one // extra byte. packetSize = std::max(packetSize, static_cast(readBuf.validData() + 1)); } int stat = 0; if (packetSize > readBuf.validData()) { stat = readBuf.getData(ioStream, packetSize); if (stat <= 0) { if (doSnapshotOperation(snapshotObjects, SnapshotState::StartSaving)) { continue; } else { D("Warning: render thread could not read data from stream"); break; } } else if (needRestoreFromSnapshot) { // If we're using RingStream that might load before FrameBuffer // restores the contexts from the handles, so check again here. tInfo.postLoadRefreshCurrentContextSurfacePtrs(); needRestoreFromSnapshot = false; } if (mNeedReloadProcessResources) { processResources = nullptr; mNeedReloadProcessResources = false; } } DD("render thread read %i bytes, op %i, packet size %i", readBuf.validData(), *(uint32_t*)readBuf.buf(), *(uint32_t*)(readBuf.buf() + 4)); // // log received bandwidth statistics // if (benchmarkEnabled) { stats_totalBytes += readBuf.validData(); auto dt = android::base::getHighResTimeUs() / 1000 - stats_t0; if (dt > 1000) { float dts = (float)dt / 1000.0f; printf("Used Bandwidth %5.3f MB/s, time in progress %f ms total %f ms\n", ((float)stats_totalBytes / dts) / (1024.0f*1024.0f), stats_progressTimeUs / 1000.0f, (float)dt); readBuf.printStats(); stats_t0 = android::base::getHighResTimeUs() / 1000; stats_progressTimeUs = 0; stats_totalBytes = 0; } } // // dump stream to file if needed // if (dumpFP) { int skip = readBuf.validData() - stat; fwrite(readBuf.buf() + skip, 1, readBuf.validData() - skip, dumpFP); fflush(dumpFP); } bool progress = false; anyProgress = false; do { anyProgress |= progress; std::unique_ptr renderThreadData = std::make_unique(); const char* contextName = nullptr; if (mNameOpt) { contextName = (*mNameOpt).c_str(); } auto* healthMonitor = FrameBuffer::getFB()->getHealthMonitor(); if (healthMonitor) { if (contextName) { renderThreadData->insert( {{"renderthread_guest_process", contextName}}); } if (readBuf.validData() >= 4) { renderThreadData->insert( {{"first_opcode", std::to_string(*(uint32_t*)readBuf.buf())}, {"buffer_length", std::to_string(readBuf.validData())}}); } } auto watchdog = WATCHDOG_BUILDER(healthMonitor, "RenderThread decode operation") .setHangType(EventHangMetadata::HangType::kRenderThread) .setAnnotations(std::move(renderThreadData)) .build(); if (!tInfo.m_puid) { tInfo.m_puid = mContextId; } if (!processResources && tInfo.m_puid && tInfo.m_puid != INVALID_CONTEXT_ID) { processResources = FrameBuffer::getFB()->getProcessResources(tInfo.m_puid); } progress = false; size_t last; // // try to process some of the command buffer using the // Vulkan decoder // // Note: It's risky to limit Vulkan decoding to one thread, // so we do it outside the limiter if (tInfo.m_vkInfo) { tInfo.m_vkInfo->ctx_id = mContextId; VkDecoderContext context = { .processName = contextName, .gfxApiLogger = &gfxLogger, .healthMonitor = FrameBuffer::getFB()->getHealthMonitor(), .metricsLogger = &metricsLogger, }; last = tInfo.m_vkInfo->m_vkDec.decode(readBuf.buf(), readBuf.validData(), ioStream, processResources, context); if (last > 0) { if (!processResources) { ERR("Processed some Vulkan packets without process resources created. " "That's problematic."); } readBuf.consume(last); progress = true; } } if (mRunInLimitedMode) { sThreadRunLimiter.lock(); } // try to process some of the command buffer using the GLESv1 // decoder // // DRIVER WORKAROUND: // On Linux with NVIDIA GPU's at least, we need to avoid performing // GLES ops while someone else holds the FrameBuffer write lock. // // To be more specific, on Linux with NVIDIA Quadro K2200 v361.xx, // we get a segfault in the NVIDIA driver when glTexSubImage2D // is called at the same time as glXMake(Context)Current. // // To fix, this driver workaround avoids calling // any sort of GLES call when we are creating/destroying EGL // contexts. { FrameBuffer::getFB()->lockContextStructureRead(); } #if GFXSTREAM_ENABLE_HOST_GLES if (tInfo.m_glInfo) { { last = tInfo.m_glInfo->m_glDec.decode( readBuf.buf(), readBuf.validData(), ioStream, &checksumCalc); if (last > 0) { progress = true; readBuf.consume(last); } } // // try to process some of the command buffer using the GLESv2 // decoder // { last = tInfo.m_glInfo->m_gl2Dec.decode(readBuf.buf(), readBuf.validData(), ioStream, &checksumCalc); if (last > 0) { progress = true; readBuf.consume(last); } } } #endif FrameBuffer::getFB()->unlockContextStructureRead(); // // try to process some of the command buffer using the // renderControl decoder // #if GFXSTREAM_ENABLE_HOST_GLES { last = tInfo.m_rcDec.decode(readBuf.buf(), readBuf.validData(), ioStream, &checksumCalc); if (last > 0) { readBuf.consume(last); progress = true; } } #endif // // try to process some of the command buffer using the Magma // decoder // #if GFXSTREAM_ENABLE_HOST_MAGMA if (tInfo.m_magmaInfo && tInfo.m_magmaInfo->mMagmaDec) { last = tInfo.m_magmaInfo->mMagmaDec->decode(readBuf.buf(), readBuf.validData(), ioStream, &checksumCalc); if (last > 0) { readBuf.consume(last); progress = true; } } #endif if (mRunInLimitedMode) { sThreadRunLimiter.unlock(); } } while (progress); } if (dumpFP) { fclose(dumpFP); } #if GFXSTREAM_ENABLE_HOST_GLES if (tInfo.m_glInfo) { FrameBuffer::getFB()->drainGlRenderThreadResources(); } #endif setFinished(); GL_LOG("Exited a RenderThread @%p", this); return 0; } } // namespace gfxstream