1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package other;
18 
19 /**
20  * Tests for dot product idiom vectorization: byte case.
21  */
22 public class TestByte {
23 
24   public static final int ARRAY_SIZE = 1024;
25 
26   /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before)
27   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
28   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
29   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
30   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
31   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
32   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
33   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
34   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
35   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
36 
37   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after)
38   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
39   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
40   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
41   //
42   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
43   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
44   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
45   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
46   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
47   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
48   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>,<<LoopP>>] type:Int8   loop:<<Loop>>      outer_loop:none
49   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
50   //
51   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
52   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
53   //
54   /// CHECK-ELSE:
55   //
56   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
57   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
58   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
59   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
60   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
61   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
62   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none
63   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
64   //
65   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
66   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
67   //
68   /// CHECK-FI:
69 
70 
71   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) disassembly (after)
72   /// CHECK:        VecDotProd
73   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
74   ///               CHECK:        sdot z{{\d+}}.s, z{{\d+}}.b, z{{\d+}}.b
75   /// CHECK-ELIF:   hasIsaFeature("dotprod")
76   ///               CHECK-NEXT:   sdot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
77   /// CHECK-ELSE:
78   ///               CHECK-NOT:    sdot
79   ///               CHECK-NOT:    udot
80   /// CHECK-FI:
testDotProdSimple(byte[] a, byte[] b)81   public static final int testDotProdSimple(byte[] a, byte[] b) {
82     int s = 1;
83     for (int i = 0; i < b.length; i++) {
84       int temp = a[i] * b[i];
85       s += temp;
86     }
87     return s - 1;
88   }
89 
90   /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before)
91   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
92   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
93   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
94   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
95   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
96   /// CHECK-DAG: <<AddC1:i\d+>>   Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
97   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC1>>]                            loop:<<Loop>>      outer_loop:none
98   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
99   /// CHECK-DAG: <<AddC2:i\d+>>   Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
100   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddC2>>]                            loop:<<Loop>>      outer_loop:none
101   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
102   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
103   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
104 
105   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after)
106   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
107   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
108   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
109   //
110   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>,{{j\d+}}]                        loop:none
111   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
112   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
113   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
114   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
115   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
116   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
117   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
118   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
119   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>,<<LoopP>>] type:Int8   loop:<<Loop>>      outer_loop:none
120   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
121   //
122   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
123   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
124   //
125   /// CHECK-ELSE:
126   //
127   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
128   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
129   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
130   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
131   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
132   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
133   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
134   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
135   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
136   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
137   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
138   //
139   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
140   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
141   //
142   /// CHECK-FI:
testDotProdComplex(byte[] a, byte[] b)143   public static final int testDotProdComplex(byte[] a, byte[] b) {
144     int s = 1;
145     for (int i = 0; i < b.length; i++) {
146       int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1));
147       s += temp;
148     }
149     return s - 1;
150   }
151 
152   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before)
153   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
154   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
155   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
156   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
157   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
158   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
159   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
160   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
161   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
162 
163   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after)
164   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
165   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
166   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
167   //
168   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
169   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
170   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
171   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
172   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
173   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
174   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>,<<LoopP>>] type:Uint8  loop:<<Loop>>      outer_loop:none
175   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
176   //
177   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
178   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
179   //
180   /// CHECK-ELSE:
181   //
182   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
183   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
184   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
185   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
186   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
187   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
188   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
189   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
190   //
191   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
192   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
193   //
194   /// CHECK-FI:
195 
196   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) disassembly (after)
197   /// CHECK:        VecDotProd
198   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
199   ///               CHECK:        udot z{{\d+}}.s, z{{\d+}}.b, z{{\d+}}.b
200   /// CHECK-ELIF:   hasIsaFeature("dotprod")
201   ///               CHECK-NEXT:   udot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
202   /// CHECK-ELSE:
203   ///               CHECK-NOT:    sdot
204   ///               CHECK-NOT:    udot
205   /// CHECK-FI:
testDotProdSimpleUnsigned(byte[] a, byte[] b)206   public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) {
207     int s = 1;
208     for (int i = 0; i < b.length; i++) {
209       int temp = (a[i] & 0xff) * (b[i] & 0xff);
210       s += temp;
211     }
212     return s - 1;
213   }
214 
215   /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before)
216   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
217   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
218   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
219   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
220   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
221   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
222   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
223   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
224   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
225   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
226   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
227   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
228   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
229 
230   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after)
231   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
232   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
233   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
234   //
235   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>,{{j\d+}}]                        loop:none
236   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
237   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
238   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
239   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
240   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
241   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
242   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
243   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
244   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>,<<LoopP>>] type:Uint8  loop:<<Loop>>      outer_loop:none
245   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
246   //
247   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
248   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
249   //
250   /// CHECK-ELSE:
251   //
252   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
253   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
254   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
255   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
256   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
257   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
258   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
259   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
260   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
261   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
262   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
263   //
264   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
265   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
266   //
267   /// CHECK-FI:
testDotProdComplexUnsigned(byte[] a, byte[] b)268   public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) {
269     int s = 1;
270     for (int i = 0; i < b.length; i++) {
271       int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff);
272       s += temp;
273     }
274     return s - 1;
275   }
276 
277   /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastToSigned(byte[], byte[]) loop_optimization (before)
278   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
279   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
280   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
281   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
282   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
283   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
284   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
285   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
286   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
287   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
288   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
289   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
290   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
291 
292   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsignedCastToSigned(byte[], byte[]) loop_optimization (after)
293   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
294   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
295   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
296   //
297   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>,{{j\d+}}]                        loop:none
298   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
299   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
300   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
301   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
302   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
303   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
304   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
305   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
306   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>,<<LoopP>>] type:Int8   loop:<<Loop>>      outer_loop:none
307   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
308   //
309   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
310   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
311   //
312   /// CHECK-ELSE:
313   //
314   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
315   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
316   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
317   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
318   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
319   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
320   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
321   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
322   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
323   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
324   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
325   //
326   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
327   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
328   //
329   /// CHECK-FI:
testDotProdComplexUnsignedCastToSigned(byte[] a, byte[] b)330   public static final int testDotProdComplexUnsignedCastToSigned(byte[] a, byte[] b) {
331     int s = 1;
332     for (int i = 0; i < b.length; i++) {
333       int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1));
334       s += temp;
335     }
336     return s - 1;
337   }
338 
339   /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastToUnsigned(byte[], byte[]) loop_optimization (before)
340   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
341   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
342   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
343   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
344   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
345   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
346   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
347   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
348   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
349   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
350   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
351   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
352   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
353 
354   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexSignedCastToUnsigned(byte[], byte[]) loop_optimization (after)
355   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
356   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
357   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
358   //
359   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>,{{j\d+}}]                        loop:none
360   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>,{{j\d+}}]                             loop:none
361   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                                       loop:<<Loop:B\d+>> outer_loop:none
362   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                          loop:<<Loop>>      outer_loop:none
363   ///     CHECK-DAG: <<LoopP:j\d+>>   VecPredWhile [<<Phi1>>,{{i\d+}}]                                loop:<<Loop>>      outer_loop:none
364   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
365   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
366   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
367   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>,<<LoopP>>]                           loop:<<Loop>>      outer_loop:none
368   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>,<<LoopP>>] type:Uint8  loop:<<Loop>>      outer_loop:none
369   ///     CHECK-DAG:                  Add [<<Phi1>>,{{i\d+}}]                                         loop:<<Loop>>      outer_loop:none
370   //
371   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>,{{j\d+}}]                                   loop:none
372   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>,{{j\d+}}]                          loop:none
373   //
374   /// CHECK-ELSE:
375   //
376   ///     CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
377   ///     CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
378   ///     CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
379   ///     CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
380   ///     CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
381   ///     CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
382   ///     CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
383   ///     CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
384   ///     CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
385   ///     CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
386   ///     CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
387   //
388   ///     CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
389   ///     CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
390   //
391   /// CHECK-FI:
testDotProdComplexSignedCastToUnsigned(byte[] a, byte[] b)392   public static final int testDotProdComplexSignedCastToUnsigned(byte[] a, byte[] b) {
393     int s = 1;
394     for (int i = 0; i < b.length; i++) {
395       int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff);
396       s += temp;
397     }
398     return s - 1;
399   }
400 
401   /// CHECK-START-ARM64: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after)
402   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdSignedWidening(byte[] a, byte[] b)403   public static final int testDotProdSignedWidening(byte[] a, byte[] b) {
404     int s = 1;
405     for (int i = 0; i < b.length; i++) {
406       int temp = ((short)(a[i])) * ((short)(b[i]));
407       s += temp;
408     }
409     return s - 1;
410   }
411 
412   /// CHECK-START-ARM64: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after)
413   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdParamSigned(int x, byte[] b)414   public static final int testDotProdParamSigned(int x, byte[] b) {
415     int s = 1;
416     for (int i = 0; i < b.length; i++) {
417       int temp = (byte)(x) * b[i];
418       s += temp;
419     }
420     return s - 1;
421   }
422 
423   /// CHECK-START-ARM64: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after)
424   /// CHECK-DAG:                  VecDotProd type:Uint8
testDotProdParamUnsigned(int x, byte[] b)425   public static final int testDotProdParamUnsigned(int x, byte[] b) {
426     int s = 1;
427     for (int i = 0; i < b.length; i++) {
428       int temp = (x & 0xff) * (b[i] & 0xff);
429       s += temp;
430     }
431     return s - 1;
432   }
433 
434   // No DOTPROD cases.
435 
436   /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after)
437   /// CHECK-NOT:                  VecDotProd
testDotProdIntParam(int x, byte[] b)438   public static final int testDotProdIntParam(int x, byte[] b) {
439     int s = 1;
440     for (int i = 0; i < b.length; i++) {
441       int temp = b[i] * (x);
442       s += temp;
443     }
444     return s - 1;
445   }
446 
447   /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after)
448   /// CHECK-NOT:                  VecDotProd
testDotProdSignedToChar(byte[] a, byte[] b)449   public static final int testDotProdSignedToChar(byte[] a, byte[] b) {
450     int s = 1;
451     for (int i = 0; i < b.length; i++) {
452       int temp = ((char)(a[i])) * ((char)(b[i]));
453       s += temp;
454     }
455     return s - 1;
456   }
457 
458   // Cases when result of Mul is type-converted are not supported.
459 
460   /// CHECK-START: int other.TestByte.testDotProdSimpleCastToSignedByte(byte[], byte[]) loop_optimization (after)
461   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastToSignedByte(byte[] a, byte[] b)462   public static final int testDotProdSimpleCastToSignedByte(byte[] a, byte[] b) {
463     int s = 1;
464     for (int i = 0; i < b.length; i++) {
465       byte temp = (byte)(a[i] * b[i]);
466       s += temp;
467     }
468     return s - 1;
469   }
470 
471   /// CHECK-START: int other.TestByte.testDotProdSimpleCastToUnsignedByte(byte[], byte[]) loop_optimization (after)
472   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastToUnsignedByte(byte[] a, byte[] b)473   public static final int testDotProdSimpleCastToUnsignedByte(byte[] a, byte[] b) {
474     int s = 1;
475     for (int i = 0; i < b.length; i++) {
476       s += (a[i] * b[i]) & 0xff;
477     }
478     return s - 1;
479   }
480 
481   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastToSignedByte(byte[], byte[]) loop_optimization (after)
482   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastToSignedByte(byte[] a, byte[] b)483   public static final int testDotProdSimpleUnsignedCastToSignedByte(byte[] a, byte[] b) {
484     int s = 1;
485     for (int i = 0; i < b.length; i++) {
486       byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff));
487       s += temp;
488     }
489     return s - 1;
490   }
491 
492   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastToUnsignedByte(byte[], byte[]) loop_optimization (after)
493   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastToUnsignedByte(byte[] a, byte[] b)494   public static final int testDotProdSimpleUnsignedCastToUnsignedByte(byte[] a, byte[] b) {
495     int s = 1;
496     for (int i = 0; i < b.length; i++) {
497       s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff;
498     }
499     return s - 1;
500   }
501 
502   /// CHECK-START: int other.TestByte.testDotProdSimpleCastToShort(byte[], byte[]) loop_optimization (after)
503   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastToShort(byte[] a, byte[] b)504   public static final int testDotProdSimpleCastToShort(byte[] a, byte[] b) {
505     int s = 1;
506     for (int i = 0; i < b.length; i++) {
507       short temp = (short)(a[i] * b[i]);
508       s += temp;
509     }
510     return s - 1;
511   }
512 
513   /// CHECK-START: int other.TestByte.testDotProdSimpleCastToChar(byte[], byte[]) loop_optimization (after)
514   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastToChar(byte[] a, byte[] b)515   public static final int testDotProdSimpleCastToChar(byte[] a, byte[] b) {
516     int s = 1;
517     for (int i = 0; i < b.length; i++) {
518       char temp = (char)(a[i] * b[i]);
519       s += temp;
520     }
521     return s - 1;
522   }
523 
524   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastToShort(byte[], byte[]) loop_optimization (after)
525   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastToShort(byte[] a, byte[] b)526   public static final int testDotProdSimpleUnsignedCastToShort(byte[] a, byte[] b) {
527     int s = 1;
528     for (int i = 0; i < b.length; i++) {
529       short temp = (short)((a[i] & 0xff) * (b[i] & 0xff));
530       s += temp;
531     }
532     return s - 1;
533   }
534 
535   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastToChar(byte[], byte[]) loop_optimization (after)
536   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastToChar(byte[] a, byte[] b)537   public static final int testDotProdSimpleUnsignedCastToChar(byte[] a, byte[] b) {
538     int s = 1;
539     for (int i = 0; i < b.length; i++) {
540       char temp = (char)((a[i] & 0xff) * (b[i] & 0xff));
541       s += temp;
542     }
543     return s - 1;
544   }
545 
546   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastToLong(byte[], byte[]) loop_optimization (after)
547   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastToLong(byte[] a, byte[] b)548   public static final int testDotProdSimpleUnsignedCastToLong(byte[] a, byte[] b) {
549     int s = 1;
550     for (int i = 0; i < b.length; i++) {
551       long temp = (long)((a[i] & 0xff) * (b[i] & 0xff));
552       s += temp;
553     }
554     return s - 1;
555   }
556 
557   /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after)
558   /// CHECK-NOT:                  VecDotProd
testDotProdUnsignedSigned(byte[] a, byte[] b)559   public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) {
560     int s = 1;
561     for (int i = 0; i < b.length; i++) {
562       int temp = (a[i] & 0xff) * b[i];
563       s += temp;
564     }
565     return s - 1;
566   }
567 
expectEquals(int expected, int result)568   private static void expectEquals(int expected, int result) {
569     if (expected != result) {
570       throw new Error("Expected: " + expected + ", found: " + result);
571     }
572   }
573 
testDotProd(byte[] b1, byte[] b2, int[] results)574   private static void testDotProd(byte[] b1, byte[] b2, int[] results) {
575     expectEquals(results[0], testDotProdSimple(b1, b2));
576     expectEquals(results[1], testDotProdComplex(b1, b2));
577     expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2));
578     expectEquals(results[3], testDotProdComplexUnsigned(b1, b2));
579     expectEquals(results[4], testDotProdComplexUnsignedCastToSigned(b1, b2));
580     expectEquals(results[5], testDotProdComplexSignedCastToUnsigned(b1, b2));
581     expectEquals(results[6], testDotProdSignedWidening(b1, b2));
582     expectEquals(results[7], testDotProdParamSigned(-128, b2));
583     expectEquals(results[8], testDotProdParamUnsigned(-128, b2));
584     expectEquals(results[9], testDotProdIntParam(-128, b2));
585     expectEquals(results[10], testDotProdSignedToChar(b1, b2));
586     expectEquals(results[11], testDotProdSimpleCastToSignedByte(b1, b2));
587     expectEquals(results[12], testDotProdSimpleCastToUnsignedByte(b1, b2));
588     expectEquals(results[13], testDotProdSimpleUnsignedCastToSignedByte(b1, b2));
589     expectEquals(results[14], testDotProdSimpleUnsignedCastToUnsignedByte(b1, b2));
590     expectEquals(results[15], testDotProdSimpleCastToShort(b1, b2));
591     expectEquals(results[16], testDotProdSimpleCastToChar(b1, b2));
592     expectEquals(results[17], testDotProdSimpleUnsignedCastToShort(b1, b2));
593     expectEquals(results[18], testDotProdSimpleUnsignedCastToChar(b1, b2));
594     expectEquals(results[19], testDotProdSimpleUnsignedCastToLong(b1, b2));
595     expectEquals(results[20], testDotProdUnsignedSigned(b1, b2));
596   }
597 
run()598   public static void run() {
599     byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
600     byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
601     int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024,
602                         64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 };
603     testDotProd(b1_1, b2_1, results_1);
604 
605     byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
606     byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
607     int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280,
608                         80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 };
609     testDotProd(b1_2, b2_2, results_2);
610 
611     byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
612     byte[] b2_3 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 };
613     int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280,
614                         41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 };
615     testDotProd(b1_3, b2_3, results_3);
616 
617     byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
618     byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
619     int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920,
620                        -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 };
621     testDotProd(b1_4, b2_4, results_4);
622   }
623 
main(String[] args)624   public static void main(String[] args) {
625     run();
626   }
627 }
628