1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <array>
16 #include <future>
17 #include <unordered_map>
18 
19 #include "AstcCpuDecompressor.h"
20 #include "astcenc.h"
21 
22 namespace gfxstream {
23 namespace vk {
24 namespace {
25 
26 constexpr uint32_t kNumThreads = 2;
27 
28 const astcenc_swizzle kSwizzle = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
29 
30 // Used by std::unique_ptr to release the context when the pointer is destroyed
31 struct AstcencContextDeleter {
operator ()gfxstream::vk::__anon5855e3120111::AstcencContextDeleter32     void operator()(astcenc_context* c) { astcenc_context_free(c); }
33 };
34 
35 using AstcencContextUniquePtr = std::unique_ptr<astcenc_context, AstcencContextDeleter>;
36 
37 // Creates a new astcenc_context and wraps it in a smart pointer.
38 // It is not needed to call astcenc_context_free() on the returned pointer.
39 // blockWith, blockSize: ASTC block size for the context
40 // Error: (output param) Where to put the error status. Must not be null.
41 // Returns nullptr in case of error.
makeDecoderContext(uint32_t blockWidth,uint32_t blockHeight,astcenc_error * error)42 AstcencContextUniquePtr makeDecoderContext(uint32_t blockWidth, uint32_t blockHeight,
43                                            astcenc_error* error) {
44     astcenc_config config = {};
45     *error =
46         // TODO(gregschlom): Do we need to pass ASTCENC_PRF_LDR_SRGB here?
47         astcenc_config_init(ASTCENC_PRF_LDR, blockWidth, blockHeight, 1, ASTCENC_PRE_FASTEST,
48                             ASTCENC_FLG_DECOMPRESS_ONLY, &config);
49     if (*error != ASTCENC_SUCCESS) {
50         return nullptr;
51     }
52 
53     astcenc_context* context;
54     *error = astcenc_context_alloc(&config, kNumThreads, &context);
55     if (*error != ASTCENC_SUCCESS) {
56         return nullptr;
57     }
58     return AstcencContextUniquePtr(context);
59 }
60 
61 
62 #if !defined(__clang__) && defined(_MSC_VER)
63 // AVX2 support detection for Visual Studio
64 #include <intrin.h>
cpuSupportsAvx2()65 bool cpuSupportsAvx2()
66 {
67     int data[4];
68     __cpuid(data, 0);
69     if (data[0] >= 7) {
70         __cpuidex(data, 7, 0);
71         return data[1] & (1 << 5);  // AVX2 = Bank 7, EBX, bit 5
72     }
73     return false;
74 }
75 #elif defined(__aarch64__)
cpuSupportsAvx2()76 bool cpuSupportsAvx2()
77 {
78     return false;
79 }
80 #else
81 // AVX2 support detection for GCC and Clang
82 #include <cpuid.h>
cpuSupportsAvx2()83 bool cpuSupportsAvx2()
84 {
85     unsigned int data[4];
86     if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3])) {
87         return data[1] & (1 << 5);  // AVX2 = Bank 7, EBX, bit 5
88     }
89     return false;
90 }
91 #endif
92 
93 // Returns whether the ASTC decoder can be used on this machine. It might not be available if the
94 // CPU doesn't support AVX2 instructions for example. Since this call is a bit expensive and never
95 // changes, the result should be cached.
isAstcDecoderAvailable()96 bool isAstcDecoderAvailable() {
97     if (!cpuSupportsAvx2()) return false;
98     astcenc_error error;
99     // Try getting an arbitrary context. If it works, the decoder is available.
100     auto context = makeDecoderContext(5, 5, &error);
101     return context != nullptr;
102 }
103 
104 // Caches and manages astcenc_context objects.
105 //
106 // Each context is fairly large (around 30 MB) and takes a while to construct, so it's important to
107 // reuse them as much as possible.
108 //
109 // While context objects can be reused across multiple threads, they must be used sequentially. To
110 // avoid having to lock and manage access between threads, we keep one cache per thread. This avoids
111 // any concurrency issues, at the cost of extra memory.
112 //
113 // Currently, there is no eviction strategy. Each cache could grow to a maximum of ~400 MB in size
114 // since they are 13 possible ASTC block sizes.
115 //
116 // Thread-safety: not thread safe.
117 class AstcDecoderContextCache {
118    public:
119     // Returns a context object for a given ASTC block size, along with the error code if the
120     // context initialization failed.
121     // In this case, the context will be null, and the status code will be non-zero.
get(uint32_t blockWidth,uint32_t blockHeight)122     std::pair<astcenc_context*, astcenc_error> get(uint32_t blockWidth, uint32_t blockHeight) {
123         Value& value = mContexts[{blockWidth, blockHeight}];
124         if (value.context == nullptr) {
125             value.context = makeDecoderContext(blockWidth, blockHeight, &value.error);
126         }
127         return {value.context.get(), value.error};
128     }
129 
130    private:
131     // Holds the data we use as the cache key
132     struct Key {
133         uint32_t blockWidth;
134         uint32_t blockHeight;
135 
operator ==gfxstream::vk::__anon5855e3120111::AstcDecoderContextCache::Key136         bool operator==(const Key& other) const {
137             return blockWidth == other.blockWidth && blockHeight == other.blockHeight;
138         }
139     };
140 
141     struct Value {
142         AstcencContextUniquePtr context = nullptr;
143         astcenc_error error = ASTCENC_SUCCESS;
144     };
145 
146     // Computes the hash of a Key
147     struct KeyHash {
operator ()gfxstream::vk::__anon5855e3120111::AstcDecoderContextCache::KeyHash148         std::size_t operator()(const Key& k) const {
149             // blockWidth and blockHeight are < 256 (actually, < 16), so this is safe
150             return k.blockWidth << 8 | k.blockHeight;
151         }
152     };
153 
154     std::unordered_map<Key, Value, KeyHash> mContexts;
155 };
156 
157 // Thread-safety: all public methods are thread-safe
158 class WorkerThread {
159    public:
WorkerThread()160     explicit WorkerThread() : mThread(&WorkerThread::main, this) {}
161 
162     // Terminates the thread. Call wait() to wait until the thread fully exits.
terminate()163     void terminate() {
164         std::lock_guard lock(mWorkerMutex);
165         mTerminated = true;
166         mWorkerCondition.notify_one();
167     }
168 
169     // Blocks until the thread exits.
wait()170     void wait() { mThread.join(); }
171 
decompress(astcenc_context * context,uint32_t threadIndex,const uint8_t * data,size_t dataLength,astcenc_image * image)172     std::future<astcenc_error> decompress(astcenc_context* context, uint32_t threadIndex,
173                                           const uint8_t* data, size_t dataLength,
174                                           astcenc_image* image) {
175         std::lock_guard lock(mWorkerMutex);
176         mTask = std::packaged_task<astcenc_error()>{[=] {
177             return astcenc_decompress_image(context, data, dataLength, image, &kSwizzle,
178                                             threadIndex);
179         }};
180         mWorkerCondition.notify_one();
181         return mTask.get_future();
182     }
183 
184    private:
185     // Thread's main loop
main()186     void main() {
187         while (true) {
188             std::packaged_task<astcenc_error()> task;
189             {
190                 std::unique_lock lock(mWorkerMutex);
191                 mWorkerCondition.wait(lock, [this] { return mTask.valid() || mTerminated; });
192                 if (mTerminated) return;
193                 task = std::move(mTask);
194             }
195             task();
196         }
197     }
198 
199     bool mTerminated = false;
200     std::condition_variable mWorkerCondition = {};  // Signals availability of work
201     std::mutex mWorkerMutex = {};                   // Mutex used with mWorkerCondition.
202     std::packaged_task<astcenc_error()> mTask = {};
203     std::thread mThread = {};
204 };
205 
206 // Performs ASTC decompression of an image on the CPU
207 class AstcCpuDecompressorImpl : public AstcCpuDecompressor {
208    public:
AstcCpuDecompressorImpl()209     AstcCpuDecompressorImpl()
210         : AstcCpuDecompressor(), mContextCache(std::make_unique<AstcDecoderContextCache>()) {}
211 
~AstcCpuDecompressorImpl()212     ~AstcCpuDecompressorImpl() override {
213         // Stop the worker threads, otherwise the process would hang upon exit.
214         std::lock_guard global_lock(mMutex);
215         for (auto& worker : mWorkerThreads) {
216             worker.terminate();
217             worker.wait();
218         }
219     }
220 
available() const221     bool available() const override {
222         static bool available = isAstcDecoderAvailable();
223         return available;
224     }
225 
decompress(const uint32_t imgWidth,const uint32_t imgHeight,const uint32_t blockWidth,const uint32_t blockHeight,const uint8_t * astcData,size_t astcDataLength,uint8_t * output)226     int32_t decompress(const uint32_t imgWidth, const uint32_t imgHeight, const uint32_t blockWidth,
227                        const uint32_t blockHeight, const uint8_t* astcData, size_t astcDataLength,
228                        uint8_t* output) override {
229         std::array<std::future<astcenc_error>, kNumThreads> futures;
230 
231         std::lock_guard global_lock(mMutex);
232 
233         auto [context, context_status] = mContextCache->get(blockWidth, blockHeight);
234         if (context_status != ASTCENC_SUCCESS) return context_status;
235 
236         astcenc_image image = {
237             .dim_x = imgWidth,
238             .dim_y = imgHeight,
239             .dim_z = 1,
240             .data_type = ASTCENC_TYPE_U8,
241             .data = reinterpret_cast<void**>(&output),
242         };
243 
244         for (uint32_t i = 0; i < kNumThreads; ++i) {
245             futures[i] = mWorkerThreads[i].decompress(context, i, astcData, astcDataLength, &image);
246         }
247 
248         astcenc_error result = ASTCENC_SUCCESS;
249 
250         // Wait for all threads to be done
251         for (auto& future : futures) {
252             astcenc_error status = future.get();
253             if (status != ASTCENC_SUCCESS) {
254                 result = status;
255             }
256         }
257 
258         astcenc_decompress_reset(context);
259 
260         return result;
261     }
262 
getStatusString(int32_t statusCode) const263     const char* getStatusString(int32_t statusCode) const override {
264         const char* msg = astcenc_get_error_string((astcenc_error)statusCode);
265         return msg ? msg : "ASTCENC_UNKNOWN_STATUS";
266     }
267 
268    private:
269     std::unique_ptr<AstcDecoderContextCache> mContextCache;
270     std::mutex mMutex;  // Locked while calling `decompress()`
271     std::array<WorkerThread, kNumThreads> mWorkerThreads;
272 };
273 
274 }  // namespace
275 
get()276 AstcCpuDecompressor& AstcCpuDecompressor::get() {
277     static AstcCpuDecompressorImpl instance;
278     return instance;
279 }
280 
281 }  // namespace vk
282 }  // namespace gfxstream
283