1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2020-2021 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Platform-specific function implementations.
20  *
21  * This module contains functions for querying the host extended ISA support.
22  */
23 
24 // Include before the defines below to pick up any auto-setup based on compiler
25 // built-in config, if not being set explicitly by the build system
26 #include "astcenc_internal.h"
27 
28 #if (ASTCENC_SSE > 0)    || (ASTCENC_AVX > 0) || \
29     (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
30 
31 static bool g_init { false };
32 
33 /** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
34 static bool g_cpu_has_sse41 { false };
35 
36 /** Does this CPU support AVX2? Set to -1 if not yet initialized. */
37 static bool g_cpu_has_avx2 { false };
38 
39 /** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
40 static bool g_cpu_has_popcnt { false };
41 
42 /** Does this CPU support F16C? Set to -1 if not yet initialized. */
43 static bool g_cpu_has_f16c { false };
44 
45 /* ============================================================================
46    Platform code for Visual Studio
47 ============================================================================ */
48 #if !defined(__clang__) && defined(_MSC_VER)
49 #define WIN32_LEAN_AND_MEAN
50 #include <Windows.h>
51 #include <intrin.h>
52 
53 /**
54  * @brief Detect platform CPU ISA support and update global trackers.
55  */
detect_cpu_isa()56 static void detect_cpu_isa()
57 {
58 	int data[4];
59 
60 	__cpuid(data, 0);
61 	int num_id = data[0];
62 
63 	if (num_id >= 1)
64 	{
65 		__cpuidex(data, 1, 0);
66 		// SSE41 = Bank 1, ECX, bit 19
67 		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
68 		// POPCNT = Bank 1, ECX, bit 23
69 		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
70 		// F16C = Bank 1, ECX, bit 29
71 		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
72 	}
73 
74 	if (num_id >= 7)
75 	{
76 		__cpuidex(data, 7, 0);
77 		// AVX2 = Bank 7, EBX, bit 5
78 		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
79 	}
80 
81 	// Ensure state bits are updated before init flag is updated
82 	MemoryBarrier();
83 	g_init = true;
84 }
85 
86 /* ============================================================================
87    Platform code for GCC and Clang
88 ============================================================================ */
89 #else
90 #include <cpuid.h>
91 
92 /**
93  * @brief Detect platform CPU ISA support and update global trackers.
94  */
detect_cpu_isa()95 static void detect_cpu_isa()
96 {
97 	unsigned int data[4];
98 
99 	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
100 	{
101 		// SSE41 = Bank 1, ECX, bit 19
102 		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
103 		// POPCNT = Bank 1, ECX, bit 23
104 		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
105 		// F16C = Bank 1, ECX, bit 29
106 		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
107 	}
108 
109 	g_cpu_has_avx2 = 0;
110 	if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
111 	{
112 		// AVX2 = Bank 7, EBX, bit 5
113 		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
114 	}
115 
116 	// Ensure state bits are updated before init flag is updated
117 	__sync_synchronize();
118 	g_init = true;
119 }
120 #endif
121 
122 /* See header for documentation. */
cpu_supports_popcnt()123 bool cpu_supports_popcnt()
124 {
125 	if (!g_init)
126 	{
127 		detect_cpu_isa();
128 	}
129 
130 	return g_cpu_has_popcnt;
131 }
132 
133 /* See header for documentation. */
cpu_supports_f16c()134 bool cpu_supports_f16c()
135 {
136 	if (!g_init)
137 	{
138 		detect_cpu_isa();
139 	}
140 
141 	return g_cpu_has_f16c;
142 }
143 
144 /* See header for documentation. */
cpu_supports_sse41()145 bool cpu_supports_sse41()
146 {
147 	if (!g_init)
148 	{
149 		detect_cpu_isa();
150 	}
151 
152 	return g_cpu_has_sse41;
153 }
154 
155 /* See header for documentation. */
cpu_supports_avx2()156 bool cpu_supports_avx2()
157 {
158 	if (!g_init)
159 	{
160 		detect_cpu_isa();
161 	}
162 
163 	return g_cpu_has_avx2;
164 }
165 
166 #endif
167