1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "AstcTexture.h"
16
17 #include <atomic>
18 #include <chrono>
19 #include <cstring>
20 #include <optional>
21 #include <vector>
22
23 #include "aemu/base/HealthMonitor.h"
24 #include "host-common/logging.h"
25 #include "vulkan/vk_util.h"
26
27 namespace gfxstream {
28 namespace vk {
29 namespace {
30
31 using std::chrono::milliseconds;
32
33 // Print stats each time we decompress this many pixels:
34 constexpr uint64_t kProcessedPixelsLogInterval = 10'000'000;
35
36 std::atomic<uint64_t> pixels_processed = 0;
37 std::atomic<uint64_t> ms_elapsed = 0;
38 std::atomic<int64_t> bytes_used = 0;
39
mipmapSize(uint32_t size,uint32_t mipLevel)40 uint32_t mipmapSize(uint32_t size, uint32_t mipLevel) {
41 return std::max<uint32_t>(size >> mipLevel, 1);
42 }
43
isRegionValid(const VkBufferImageCopy & region,uint32_t width,uint32_t height)44 bool isRegionValid(const VkBufferImageCopy& region, uint32_t width, uint32_t height) {
45 // TODO(gregschlom) deal with those cases. See details at:
46 // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/chap20.html#copies-buffers-images-addressing
47 // https://stackoverflow.com/questions/46501832/vulkan-vkbufferimagecopy-for-partial-transfer
48
49 if (region.bufferRowLength != 0 || region.bufferImageHeight != 0) {
50 WARN("ASTC CPU decompression skipped: non-packed buffer");
51 return false;
52 }
53 if (region.imageOffset.x != 0 || region.imageOffset.y != 0) {
54 WARN("ASTC CPU decompression skipped: imageOffset is non-zero");
55 return false;
56 }
57 if (region.imageExtent.width != width || region.imageExtent.height != height) {
58 WARN("ASTC CPU decompression skipped: imageExtent is less than the entire image");
59 return false;
60 }
61 return true;
62 }
63
64 } // namespace
65
AstcTexture(VulkanDispatch * vk,VkDevice device,VkPhysicalDevice physicalDevice,VkExtent3D imgSize,uint32_t blockWidth,uint32_t blockHeight,AstcCpuDecompressor * decompressor)66 AstcTexture::AstcTexture(VulkanDispatch* vk, VkDevice device, VkPhysicalDevice physicalDevice,
67 VkExtent3D imgSize, uint32_t blockWidth, uint32_t blockHeight,
68 AstcCpuDecompressor* decompressor)
69 : mVk(vk),
70 mDevice(device),
71 mPhysicalDevice(physicalDevice),
72 mImgSize(imgSize),
73 mBlockWidth(blockWidth),
74 mBlockHeight(blockHeight),
75 mDecompressor(decompressor) {}
76
~AstcTexture()77 AstcTexture::~AstcTexture() { destroyVkBuffer(); }
78
canDecompressOnCpu() const79 bool AstcTexture::canDecompressOnCpu() const { return mDecompressor->available(); }
80
createVkBufferAndMapMemory(size_t bufferSize)81 uint8_t* AstcTexture::createVkBufferAndMapMemory(size_t bufferSize) {
82 VkResult res;
83 mBufferSize = bufferSize; // Save the buffer size, for statistics purpose only
84 bytes_used += bufferSize;
85
86 if (mDecompBuffer || mDecompBufferMemory) {
87 WARN("ASTC CPU decompression failed: tried to decompress same image more than once.");
88 return nullptr;
89 }
90
91 VkBufferCreateInfo bufferInfo = {
92 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
93 .size = bufferSize,
94 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
96 };
97 res = mVk->vkCreateBuffer(mDevice, &bufferInfo, nullptr, &mDecompBuffer);
98 if (res != VK_SUCCESS) {
99 WARN("ASTC CPU decompression: vkCreateBuffer failed: %d", res);
100 mDecompBuffer = VK_NULL_HANDLE;
101 return nullptr;
102 }
103
104 VkMemoryRequirements memRequirements;
105 mVk->vkGetBufferMemoryRequirements(mDevice, mDecompBuffer, &memRequirements);
106
107 std::optional<uint32_t> memIndex = vk_util::findMemoryType(
108 mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
109 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
110 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
111 if (!memIndex) {
112 // Do it again, but without VK_MEMORY_PROPERTY_HOST_CACHED_BIT this time
113 memIndex = vk_util::findMemoryType(
114 mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
115 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
116 }
117 if (!memIndex) {
118 WARN("ASTC CPU decompression: no suitable memory type to decompress the image");
119 return nullptr;
120 }
121
122 VkMemoryAllocateInfo allocInfo = {
123 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
124 .allocationSize = memRequirements.size,
125 .memoryTypeIndex = *memIndex,
126 };
127 res = mVk->vkAllocateMemory(mDevice, &allocInfo, nullptr, &mDecompBufferMemory);
128 if (res != VK_SUCCESS) {
129 WARN("ASTC CPU decompression: vkAllocateMemory failed: %d", res);
130 mDecompBufferMemory = VK_NULL_HANDLE;
131 return nullptr;
132 }
133
134 res = mVk->vkBindBufferMemory(mDevice, mDecompBuffer, mDecompBufferMemory, 0);
135 if (res != VK_SUCCESS) {
136 WARN("ASTC CPU decompression: vkBindBufferMemory failed: %d", res);
137 return nullptr;
138 }
139
140 uint8_t* decompData;
141 res = mVk->vkMapMemory(mDevice, mDecompBufferMemory, 0, bufferSize, 0, (void**)&decompData);
142 if (res != VK_SUCCESS) {
143 WARN("ASTC CPU decompression: vkMapMemory failed: %d", res);
144 return nullptr;
145 }
146
147 return decompData;
148 }
149
destroyVkBuffer()150 void AstcTexture::destroyVkBuffer() {
151 bytes_used -= mBufferSize;
152 if (mVk && mDevice) {
153 mVk->vkDestroyBuffer(mDevice, mDecompBuffer, nullptr);
154 mVk->vkFreeMemory(mDevice, mDecompBufferMemory, nullptr);
155 mDecompBuffer = VK_NULL_HANDLE;
156 mDecompBufferMemory = VK_NULL_HANDLE;
157 }
158 }
159
160 template<typename T>
on_vkCmdCopyBufferToImageImpl(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const T * pRegions,const VkDecoderContext & context)161 void AstcTexture::on_vkCmdCopyBufferToImageImpl(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
162 size_t astcDataSize, VkImage dstImage,
163 VkImageLayout dstImageLayout, uint32_t regionCount,
164 const T* pRegions,
165 const VkDecoderContext& context) {
166 auto watchdog =
167 WATCHDOG_BUILDER(context.healthMonitor, "AstcTexture::on_vkCmdCopyBufferToImageImpl").build();
168 auto start_time = std::chrono::steady_clock::now();
169 mSuccess = false;
170 size_t decompSize = 0; // How many bytes we need to hold the decompressed data
171
172 // Holds extra data about the region
173 struct RegionInfo {
174 uint32_t width; // actual width (ie: mipmap width)
175 uint32_t height; // actual height (ie: mipmap height)
176 uint32_t compressedSize; // size of ASTC data for that region
177 };
178
179 std::vector<RegionInfo> regionInfos;
180 regionInfos.reserve(regionCount);
181
182 // Make a copy of the regions and update the buffer offset of each to reflect the
183 // correct location of the decompressed data
184 std::vector<VkBufferImageCopy> decompRegions(regionCount);
185 for (size_t i = 0; i < regionCount; ++i) {
186 decompRegions[i] = VkBufferImageCopy {
187 pRegions[i].bufferOffset,
188 pRegions[i].bufferRowLength,
189 pRegions[i].bufferImageHeight,
190 pRegions[i].imageSubresource,
191 pRegions[i].imageOffset,
192 pRegions[i].imageExtent
193 };
194 }
195 for (auto& decompRegion : decompRegions) {
196 const uint32_t mipLevel = decompRegion.imageSubresource.mipLevel;
197 const uint32_t width = mipmapSize(mImgSize.width, mipLevel);
198 const uint32_t height = mipmapSize(mImgSize.height, mipLevel);
199 const uint32_t numAstcBlocks = ((width + mBlockWidth - 1) / mBlockWidth) *
200 ((height + mBlockHeight - 1) / mBlockHeight);
201 const uint32_t compressedSize = numAstcBlocks * 16;
202 // We haven't updated decompRegion.bufferOffset yet, so it's still the _compressed_ offset.
203 const uint32_t compressedDataOffset = decompRegion.bufferOffset;
204
205 // Do all the precondition checks
206 if (!isRegionValid(decompRegion, width, height)) return;
207 if (compressedDataOffset + compressedSize > astcDataSize) {
208 WARN("ASTC CPU decompression: data out of bounds. Offset: %llu, Size: %llu, Total %llu",
209 compressedDataOffset, compressedSize, astcDataSize);
210 return;
211 }
212
213 decompRegion.bufferOffset = decompSize;
214 decompSize += width * height * 4;
215 regionInfos.push_back({width, height, compressedSize});
216 }
217
218 // Create a new VkBuffer to hold the decompressed data
219 uint8_t* decompData = createVkBufferAndMapMemory(decompSize);
220 if (!decompData) {
221 destroyVkBuffer(); // The destructor would have done it anyway, but may as well do it early
222 return;
223 }
224
225 // Decompress each region
226 for (int i = 0; i < regionCount; i++) {
227 const auto& compRegion = pRegions[i];
228 const auto& decompRegion = decompRegions[i];
229 const auto& regionInfo = regionInfos[i];
230
231 int32_t status = mDecompressor->decompress(
232 regionInfo.width, regionInfo.height, mBlockWidth, mBlockHeight,
233 srcAstcData + compRegion.bufferOffset, regionInfo.compressedSize,
234 decompData + decompRegion.bufferOffset);
235
236 if (status != 0) {
237 WARN("ASTC CPU decompression failed: %s.", mDecompressor->getStatusString(status));
238 mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
239 destroyVkBuffer();
240 return;
241 }
242 }
243
244 mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
245
246 // Finally, actually copy the buffer to the image
247 mVk->vkCmdCopyBufferToImage(commandBuffer, mDecompBuffer, dstImage, dstImageLayout,
248 decompRegions.size(), decompRegions.data());
249
250 mSuccess = true;
251 auto end_time = std::chrono::steady_clock::now();
252
253 // Compute stats
254 pixels_processed += decompSize / 4;
255 ms_elapsed += std::chrono::duration_cast<milliseconds>(end_time - start_time).count();
256
257 uint64_t total_pixels = pixels_processed.load();
258 uint64_t total_time = ms_elapsed.load();
259
260 if (total_pixels >= kProcessedPixelsLogInterval && total_time > 0) {
261 pixels_processed.store(0);
262 ms_elapsed.store(0);
263 INFO("ASTC CPU decompression: %.2f Mpix in %.2f seconds (%.2f Mpix/s). Total mem: %.2f MB",
264 total_pixels / 1'000'000.0, total_time / 1000.0,
265 (float)total_pixels / total_time / 1000.0, bytes_used / 1000000.0);
266 }
267 }
268
on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions,const VkDecoderContext & context)269 void AstcTexture::on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
270 size_t astcDataSize, VkImage dstImage,
271 VkImageLayout dstImageLayout, uint32_t regionCount,
272 const VkBufferImageCopy* pRegions,
273 const VkDecoderContext& context) {
274 on_vkCmdCopyBufferToImageImpl(commandBuffer, srcAstcData, astcDataSize, dstImage, dstImageLayout, regionCount, pRegions, context);
275 }
276
on_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkDecoderContext & context)277 void AstcTexture::on_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
278 size_t astcDataSize, const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
279 const VkDecoderContext& context) {
280 on_vkCmdCopyBufferToImageImpl(commandBuffer,
281 srcAstcData,
282 astcDataSize,
283 pCopyBufferToImageInfo->dstImage,
284 pCopyBufferToImageInfo->dstImageLayout,
285 pCopyBufferToImageInfo->regionCount,
286 pCopyBufferToImageInfo->pRegions,
287 context);
288 }
289
290 } // namespace vk
291 } // namespace gfxstream
292