1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "AstcTexture.h"
16 
17 #include <atomic>
18 #include <chrono>
19 #include <cstring>
20 #include <optional>
21 #include <vector>
22 
23 #include "aemu/base/HealthMonitor.h"
24 #include "host-common/logging.h"
25 #include "vulkan/vk_util.h"
26 
27 namespace gfxstream {
28 namespace vk {
29 namespace {
30 
31 using std::chrono::milliseconds;
32 
33 // Print stats each time we decompress this many pixels:
34 constexpr uint64_t kProcessedPixelsLogInterval = 10'000'000;
35 
36 std::atomic<uint64_t> pixels_processed = 0;
37 std::atomic<uint64_t> ms_elapsed = 0;
38 std::atomic<int64_t> bytes_used = 0;
39 
mipmapSize(uint32_t size,uint32_t mipLevel)40 uint32_t mipmapSize(uint32_t size, uint32_t mipLevel) {
41     return std::max<uint32_t>(size >> mipLevel, 1);
42 }
43 
isRegionValid(const VkBufferImageCopy & region,uint32_t width,uint32_t height)44 bool isRegionValid(const VkBufferImageCopy& region, uint32_t width, uint32_t height) {
45     // TODO(gregschlom) deal with those cases. See details at:
46     // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/chap20.html#copies-buffers-images-addressing
47     // https://stackoverflow.com/questions/46501832/vulkan-vkbufferimagecopy-for-partial-transfer
48 
49     if (region.bufferRowLength != 0 || region.bufferImageHeight != 0) {
50         WARN("ASTC CPU decompression skipped: non-packed buffer");
51         return false;
52     }
53     if (region.imageOffset.x != 0 || region.imageOffset.y != 0) {
54         WARN("ASTC CPU decompression skipped: imageOffset is non-zero");
55         return false;
56     }
57     if (region.imageExtent.width != width || region.imageExtent.height != height) {
58         WARN("ASTC CPU decompression skipped: imageExtent is less than the entire image");
59         return false;
60     }
61     return true;
62 }
63 
64 }  // namespace
65 
AstcTexture(VulkanDispatch * vk,VkDevice device,VkPhysicalDevice physicalDevice,VkExtent3D imgSize,uint32_t blockWidth,uint32_t blockHeight,AstcCpuDecompressor * decompressor)66 AstcTexture::AstcTexture(VulkanDispatch* vk, VkDevice device, VkPhysicalDevice physicalDevice,
67                          VkExtent3D imgSize, uint32_t blockWidth, uint32_t blockHeight,
68                          AstcCpuDecompressor* decompressor)
69     : mVk(vk),
70       mDevice(device),
71       mPhysicalDevice(physicalDevice),
72       mImgSize(imgSize),
73       mBlockWidth(blockWidth),
74       mBlockHeight(blockHeight),
75       mDecompressor(decompressor) {}
76 
~AstcTexture()77 AstcTexture::~AstcTexture() { destroyVkBuffer(); }
78 
canDecompressOnCpu() const79 bool AstcTexture::canDecompressOnCpu() const { return mDecompressor->available(); }
80 
createVkBufferAndMapMemory(size_t bufferSize)81 uint8_t* AstcTexture::createVkBufferAndMapMemory(size_t bufferSize) {
82     VkResult res;
83     mBufferSize = bufferSize;  // Save the buffer size, for statistics purpose only
84     bytes_used += bufferSize;
85 
86     if (mDecompBuffer || mDecompBufferMemory) {
87         WARN("ASTC CPU decompression failed: tried to decompress same image more than once.");
88         return nullptr;
89     }
90 
91     VkBufferCreateInfo bufferInfo = {
92         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
93         .size = bufferSize,
94         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
95         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
96     };
97     res = mVk->vkCreateBuffer(mDevice, &bufferInfo, nullptr, &mDecompBuffer);
98     if (res != VK_SUCCESS) {
99         WARN("ASTC CPU decompression: vkCreateBuffer failed: %d", res);
100         mDecompBuffer = VK_NULL_HANDLE;
101         return nullptr;
102     }
103 
104     VkMemoryRequirements memRequirements;
105     mVk->vkGetBufferMemoryRequirements(mDevice, mDecompBuffer, &memRequirements);
106 
107     std::optional<uint32_t> memIndex = vk_util::findMemoryType(
108         mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
109         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
110             VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
111     if (!memIndex) {
112         // Do it again, but without VK_MEMORY_PROPERTY_HOST_CACHED_BIT this time
113         memIndex = vk_util::findMemoryType(
114             mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
115             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
116     }
117     if (!memIndex) {
118         WARN("ASTC CPU decompression: no suitable memory type to decompress the image");
119         return nullptr;
120     }
121 
122     VkMemoryAllocateInfo allocInfo = {
123         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
124         .allocationSize = memRequirements.size,
125         .memoryTypeIndex = *memIndex,
126     };
127     res = mVk->vkAllocateMemory(mDevice, &allocInfo, nullptr, &mDecompBufferMemory);
128     if (res != VK_SUCCESS) {
129         WARN("ASTC CPU decompression: vkAllocateMemory failed: %d", res);
130         mDecompBufferMemory = VK_NULL_HANDLE;
131         return nullptr;
132     }
133 
134     res = mVk->vkBindBufferMemory(mDevice, mDecompBuffer, mDecompBufferMemory, 0);
135     if (res != VK_SUCCESS) {
136         WARN("ASTC CPU decompression: vkBindBufferMemory failed: %d", res);
137         return nullptr;
138     }
139 
140     uint8_t* decompData;
141     res = mVk->vkMapMemory(mDevice, mDecompBufferMemory, 0, bufferSize, 0, (void**)&decompData);
142     if (res != VK_SUCCESS) {
143         WARN("ASTC CPU decompression: vkMapMemory failed: %d", res);
144         return nullptr;
145     }
146 
147     return decompData;
148 }
149 
destroyVkBuffer()150 void AstcTexture::destroyVkBuffer() {
151     bytes_used -= mBufferSize;
152     if (mVk && mDevice) {
153         mVk->vkDestroyBuffer(mDevice, mDecompBuffer, nullptr);
154         mVk->vkFreeMemory(mDevice, mDecompBufferMemory, nullptr);
155         mDecompBuffer = VK_NULL_HANDLE;
156         mDecompBufferMemory = VK_NULL_HANDLE;
157     }
158 }
159 
160 template<typename T>
on_vkCmdCopyBufferToImageImpl(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const T * pRegions,const VkDecoderContext & context)161 void AstcTexture::on_vkCmdCopyBufferToImageImpl(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
162                                             size_t astcDataSize, VkImage dstImage,
163                                             VkImageLayout dstImageLayout, uint32_t regionCount,
164                                             const T* pRegions,
165                                             const VkDecoderContext& context) {
166     auto watchdog =
167         WATCHDOG_BUILDER(context.healthMonitor, "AstcTexture::on_vkCmdCopyBufferToImageImpl").build();
168     auto start_time = std::chrono::steady_clock::now();
169     mSuccess = false;
170     size_t decompSize = 0;  // How many bytes we need to hold the decompressed data
171 
172     // Holds extra data about the region
173     struct RegionInfo {
174         uint32_t width;           // actual width (ie: mipmap width)
175         uint32_t height;          // actual height (ie: mipmap height)
176         uint32_t compressedSize;  // size of ASTC data for that region
177     };
178 
179     std::vector<RegionInfo> regionInfos;
180     regionInfos.reserve(regionCount);
181 
182     // Make a copy of the regions and update the buffer offset of each to reflect the
183     // correct location of the decompressed data
184     std::vector<VkBufferImageCopy> decompRegions(regionCount);
185     for (size_t i = 0; i < regionCount; ++i) {
186         decompRegions[i] = VkBufferImageCopy {
187             pRegions[i].bufferOffset,
188             pRegions[i].bufferRowLength,
189             pRegions[i].bufferImageHeight,
190             pRegions[i].imageSubresource,
191             pRegions[i].imageOffset,
192             pRegions[i].imageExtent
193         };
194     }
195     for (auto& decompRegion : decompRegions) {
196         const uint32_t mipLevel = decompRegion.imageSubresource.mipLevel;
197         const uint32_t width = mipmapSize(mImgSize.width, mipLevel);
198         const uint32_t height = mipmapSize(mImgSize.height, mipLevel);
199         const uint32_t numAstcBlocks = ((width + mBlockWidth - 1) / mBlockWidth) *
200                                        ((height + mBlockHeight - 1) / mBlockHeight);
201         const uint32_t compressedSize = numAstcBlocks * 16;
202         // We haven't updated decompRegion.bufferOffset yet, so it's still the _compressed_ offset.
203         const uint32_t compressedDataOffset = decompRegion.bufferOffset;
204 
205         // Do all the precondition checks
206         if (!isRegionValid(decompRegion, width, height)) return;
207         if (compressedDataOffset + compressedSize > astcDataSize) {
208             WARN("ASTC CPU decompression: data out of bounds. Offset: %llu, Size: %llu, Total %llu",
209                  compressedDataOffset, compressedSize, astcDataSize);
210             return;
211         }
212 
213         decompRegion.bufferOffset = decompSize;
214         decompSize += width * height * 4;
215         regionInfos.push_back({width, height, compressedSize});
216     }
217 
218     // Create a new VkBuffer to hold the decompressed data
219     uint8_t* decompData = createVkBufferAndMapMemory(decompSize);
220     if (!decompData) {
221         destroyVkBuffer();  // The destructor would have done it anyway, but may as well do it early
222         return;
223     }
224 
225     // Decompress each region
226     for (int i = 0; i < regionCount; i++) {
227         const auto& compRegion = pRegions[i];
228         const auto& decompRegion = decompRegions[i];
229         const auto& regionInfo = regionInfos[i];
230 
231         int32_t status = mDecompressor->decompress(
232             regionInfo.width, regionInfo.height, mBlockWidth, mBlockHeight,
233             srcAstcData + compRegion.bufferOffset, regionInfo.compressedSize,
234             decompData + decompRegion.bufferOffset);
235 
236         if (status != 0) {
237             WARN("ASTC CPU decompression failed: %s.", mDecompressor->getStatusString(status));
238             mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
239             destroyVkBuffer();
240             return;
241         }
242     }
243 
244     mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
245 
246     // Finally, actually copy the buffer to the image
247     mVk->vkCmdCopyBufferToImage(commandBuffer, mDecompBuffer, dstImage, dstImageLayout,
248                                 decompRegions.size(), decompRegions.data());
249 
250     mSuccess = true;
251     auto end_time = std::chrono::steady_clock::now();
252 
253     // Compute stats
254     pixels_processed += decompSize / 4;
255     ms_elapsed += std::chrono::duration_cast<milliseconds>(end_time - start_time).count();
256 
257     uint64_t total_pixels = pixels_processed.load();
258     uint64_t total_time = ms_elapsed.load();
259 
260     if (total_pixels >= kProcessedPixelsLogInterval && total_time > 0) {
261         pixels_processed.store(0);
262         ms_elapsed.store(0);
263         INFO("ASTC CPU decompression: %.2f Mpix in %.2f seconds (%.2f Mpix/s). Total mem: %.2f MB",
264              total_pixels / 1'000'000.0, total_time / 1000.0,
265              (float)total_pixels / total_time / 1000.0, bytes_used / 1000000.0);
266     }
267 }
268 
on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions,const VkDecoderContext & context)269 void AstcTexture::on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
270                                 size_t astcDataSize, VkImage dstImage,
271                                 VkImageLayout dstImageLayout, uint32_t regionCount,
272                                 const VkBufferImageCopy* pRegions,
273                                 const VkDecoderContext& context) {
274     on_vkCmdCopyBufferToImageImpl(commandBuffer, srcAstcData, astcDataSize, dstImage, dstImageLayout, regionCount, pRegions, context);
275 }
276 
on_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkDecoderContext & context)277 void AstcTexture::on_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
278                                 size_t astcDataSize, const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
279                                 const VkDecoderContext& context) {
280     on_vkCmdCopyBufferToImageImpl(commandBuffer,
281                                   srcAstcData,
282                                   astcDataSize,
283                                   pCopyBufferToImageInfo->dstImage,
284                                   pCopyBufferToImageInfo->dstImageLayout,
285                                   pCopyBufferToImageInfo->regionCount,
286                                   pCopyBufferToImageInfo->pRegions,
287                                   context);
288 }
289 
290 }  // namespace vk
291 }  // namespace gfxstream
292