1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <array>
16 #include <future>
17 #include <unordered_map>
18
19 #include "AstcCpuDecompressor.h"
20 #include "astcenc.h"
21
22 namespace gfxstream {
23 namespace vk {
24 namespace {
25
26 constexpr uint32_t kNumThreads = 2;
27
28 const astcenc_swizzle kSwizzle = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
29
30 // Used by std::unique_ptr to release the context when the pointer is destroyed
31 struct AstcencContextDeleter {
operator ()gfxstream::vk::__anon5855e3120111::AstcencContextDeleter32 void operator()(astcenc_context* c) { astcenc_context_free(c); }
33 };
34
35 using AstcencContextUniquePtr = std::unique_ptr<astcenc_context, AstcencContextDeleter>;
36
37 // Creates a new astcenc_context and wraps it in a smart pointer.
38 // It is not needed to call astcenc_context_free() on the returned pointer.
39 // blockWith, blockSize: ASTC block size for the context
40 // Error: (output param) Where to put the error status. Must not be null.
41 // Returns nullptr in case of error.
makeDecoderContext(uint32_t blockWidth,uint32_t blockHeight,astcenc_error * error)42 AstcencContextUniquePtr makeDecoderContext(uint32_t blockWidth, uint32_t blockHeight,
43 astcenc_error* error) {
44 astcenc_config config = {};
45 *error =
46 // TODO(gregschlom): Do we need to pass ASTCENC_PRF_LDR_SRGB here?
47 astcenc_config_init(ASTCENC_PRF_LDR, blockWidth, blockHeight, 1, ASTCENC_PRE_FASTEST,
48 ASTCENC_FLG_DECOMPRESS_ONLY, &config);
49 if (*error != ASTCENC_SUCCESS) {
50 return nullptr;
51 }
52
53 astcenc_context* context;
54 *error = astcenc_context_alloc(&config, kNumThreads, &context);
55 if (*error != ASTCENC_SUCCESS) {
56 return nullptr;
57 }
58 return AstcencContextUniquePtr(context);
59 }
60
61
62 #if !defined(__clang__) && defined(_MSC_VER)
63 // AVX2 support detection for Visual Studio
64 #include <intrin.h>
cpuSupportsAvx2()65 bool cpuSupportsAvx2()
66 {
67 int data[4];
68 __cpuid(data, 0);
69 if (data[0] >= 7) {
70 __cpuidex(data, 7, 0);
71 return data[1] & (1 << 5); // AVX2 = Bank 7, EBX, bit 5
72 }
73 return false;
74 }
75 #elif defined(__aarch64__)
cpuSupportsAvx2()76 bool cpuSupportsAvx2()
77 {
78 return false;
79 }
80 #else
81 // AVX2 support detection for GCC and Clang
82 #include <cpuid.h>
cpuSupportsAvx2()83 bool cpuSupportsAvx2()
84 {
85 unsigned int data[4];
86 if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3])) {
87 return data[1] & (1 << 5); // AVX2 = Bank 7, EBX, bit 5
88 }
89 return false;
90 }
91 #endif
92
93 // Returns whether the ASTC decoder can be used on this machine. It might not be available if the
94 // CPU doesn't support AVX2 instructions for example. Since this call is a bit expensive and never
95 // changes, the result should be cached.
isAstcDecoderAvailable()96 bool isAstcDecoderAvailable() {
97 if (!cpuSupportsAvx2()) return false;
98 astcenc_error error;
99 // Try getting an arbitrary context. If it works, the decoder is available.
100 auto context = makeDecoderContext(5, 5, &error);
101 return context != nullptr;
102 }
103
104 // Caches and manages astcenc_context objects.
105 //
106 // Each context is fairly large (around 30 MB) and takes a while to construct, so it's important to
107 // reuse them as much as possible.
108 //
109 // While context objects can be reused across multiple threads, they must be used sequentially. To
110 // avoid having to lock and manage access between threads, we keep one cache per thread. This avoids
111 // any concurrency issues, at the cost of extra memory.
112 //
113 // Currently, there is no eviction strategy. Each cache could grow to a maximum of ~400 MB in size
114 // since they are 13 possible ASTC block sizes.
115 //
116 // Thread-safety: not thread safe.
117 class AstcDecoderContextCache {
118 public:
119 // Returns a context object for a given ASTC block size, along with the error code if the
120 // context initialization failed.
121 // In this case, the context will be null, and the status code will be non-zero.
get(uint32_t blockWidth,uint32_t blockHeight)122 std::pair<astcenc_context*, astcenc_error> get(uint32_t blockWidth, uint32_t blockHeight) {
123 Value& value = mContexts[{blockWidth, blockHeight}];
124 if (value.context == nullptr) {
125 value.context = makeDecoderContext(blockWidth, blockHeight, &value.error);
126 }
127 return {value.context.get(), value.error};
128 }
129
130 private:
131 // Holds the data we use as the cache key
132 struct Key {
133 uint32_t blockWidth;
134 uint32_t blockHeight;
135
operator ==gfxstream::vk::__anon5855e3120111::AstcDecoderContextCache::Key136 bool operator==(const Key& other) const {
137 return blockWidth == other.blockWidth && blockHeight == other.blockHeight;
138 }
139 };
140
141 struct Value {
142 AstcencContextUniquePtr context = nullptr;
143 astcenc_error error = ASTCENC_SUCCESS;
144 };
145
146 // Computes the hash of a Key
147 struct KeyHash {
operator ()gfxstream::vk::__anon5855e3120111::AstcDecoderContextCache::KeyHash148 std::size_t operator()(const Key& k) const {
149 // blockWidth and blockHeight are < 256 (actually, < 16), so this is safe
150 return k.blockWidth << 8 | k.blockHeight;
151 }
152 };
153
154 std::unordered_map<Key, Value, KeyHash> mContexts;
155 };
156
157 // Thread-safety: all public methods are thread-safe
158 class WorkerThread {
159 public:
WorkerThread()160 explicit WorkerThread() : mThread(&WorkerThread::main, this) {}
161
162 // Terminates the thread. Call wait() to wait until the thread fully exits.
terminate()163 void terminate() {
164 std::lock_guard lock(mWorkerMutex);
165 mTerminated = true;
166 mWorkerCondition.notify_one();
167 }
168
169 // Blocks until the thread exits.
wait()170 void wait() { mThread.join(); }
171
decompress(astcenc_context * context,uint32_t threadIndex,const uint8_t * data,size_t dataLength,astcenc_image * image)172 std::future<astcenc_error> decompress(astcenc_context* context, uint32_t threadIndex,
173 const uint8_t* data, size_t dataLength,
174 astcenc_image* image) {
175 std::lock_guard lock(mWorkerMutex);
176 mTask = std::packaged_task<astcenc_error()>{[=] {
177 return astcenc_decompress_image(context, data, dataLength, image, &kSwizzle,
178 threadIndex);
179 }};
180 mWorkerCondition.notify_one();
181 return mTask.get_future();
182 }
183
184 private:
185 // Thread's main loop
main()186 void main() {
187 while (true) {
188 std::packaged_task<astcenc_error()> task;
189 {
190 std::unique_lock lock(mWorkerMutex);
191 mWorkerCondition.wait(lock, [this] { return mTask.valid() || mTerminated; });
192 if (mTerminated) return;
193 task = std::move(mTask);
194 }
195 task();
196 }
197 }
198
199 bool mTerminated = false;
200 std::condition_variable mWorkerCondition = {}; // Signals availability of work
201 std::mutex mWorkerMutex = {}; // Mutex used with mWorkerCondition.
202 std::packaged_task<astcenc_error()> mTask = {};
203 std::thread mThread = {};
204 };
205
206 // Performs ASTC decompression of an image on the CPU
207 class AstcCpuDecompressorImpl : public AstcCpuDecompressor {
208 public:
AstcCpuDecompressorImpl()209 AstcCpuDecompressorImpl()
210 : AstcCpuDecompressor(), mContextCache(std::make_unique<AstcDecoderContextCache>()) {}
211
~AstcCpuDecompressorImpl()212 ~AstcCpuDecompressorImpl() override {
213 // Stop the worker threads, otherwise the process would hang upon exit.
214 std::lock_guard global_lock(mMutex);
215 for (auto& worker : mWorkerThreads) {
216 worker.terminate();
217 worker.wait();
218 }
219 }
220
available() const221 bool available() const override {
222 static bool available = isAstcDecoderAvailable();
223 return available;
224 }
225
decompress(const uint32_t imgWidth,const uint32_t imgHeight,const uint32_t blockWidth,const uint32_t blockHeight,const uint8_t * astcData,size_t astcDataLength,uint8_t * output)226 int32_t decompress(const uint32_t imgWidth, const uint32_t imgHeight, const uint32_t blockWidth,
227 const uint32_t blockHeight, const uint8_t* astcData, size_t astcDataLength,
228 uint8_t* output) override {
229 std::array<std::future<astcenc_error>, kNumThreads> futures;
230
231 std::lock_guard global_lock(mMutex);
232
233 auto [context, context_status] = mContextCache->get(blockWidth, blockHeight);
234 if (context_status != ASTCENC_SUCCESS) return context_status;
235
236 astcenc_image image = {
237 .dim_x = imgWidth,
238 .dim_y = imgHeight,
239 .dim_z = 1,
240 .data_type = ASTCENC_TYPE_U8,
241 .data = reinterpret_cast<void**>(&output),
242 };
243
244 for (uint32_t i = 0; i < kNumThreads; ++i) {
245 futures[i] = mWorkerThreads[i].decompress(context, i, astcData, astcDataLength, &image);
246 }
247
248 astcenc_error result = ASTCENC_SUCCESS;
249
250 // Wait for all threads to be done
251 for (auto& future : futures) {
252 astcenc_error status = future.get();
253 if (status != ASTCENC_SUCCESS) {
254 result = status;
255 }
256 }
257
258 astcenc_decompress_reset(context);
259
260 return result;
261 }
262
getStatusString(int32_t statusCode) const263 const char* getStatusString(int32_t statusCode) const override {
264 const char* msg = astcenc_get_error_string((astcenc_error)statusCode);
265 return msg ? msg : "ASTCENC_UNKNOWN_STATUS";
266 }
267
268 private:
269 std::unique_ptr<AstcDecoderContextCache> mContextCache;
270 std::mutex mMutex; // Locked while calling `decompress()`
271 std::array<WorkerThread, kNumThreads> mWorkerThreads;
272 };
273
274 } // namespace
275
get()276 AstcCpuDecompressor& AstcCpuDecompressor::get() {
277 static AstcCpuDecompressorImpl instance;
278 return instance;
279 }
280
281 } // namespace vk
282 } // namespace gfxstream
283