1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Quantiser.h"
18 
BsearchLL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)19 XBT_INLINE_ int32_t BsearchLL(const int32_t absDiffSignalShifted,
20                               const int32_t delta,
21                               const int32_t* dqbitTablePrt) {
22   int32_t qCode = 0;
23   reg64_t tmp_acc;
24   int32_t tmp = 0;
25   int32_t lc_delta = delta << 8;
26 
27   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[128];
28   tmp_acc.s32.h -= absDiffSignalShifted;
29   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
30   if (tmp <= 0) {
31     qCode = 128;
32   }
33 
34   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 64];
35   tmp_acc.s32.h -= absDiffSignalShifted;
36   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
37   if (tmp <= 0) {
38     qCode += 64;
39   }
40 
41   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 32];
42   tmp_acc.s32.h -= absDiffSignalShifted;
43   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
44   if (tmp <= 0) {
45     qCode += 32;
46   }
47 
48   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 16];
49   tmp_acc.s32.h -= absDiffSignalShifted;
50   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
51   if (tmp <= 0) {
52     qCode += 16;
53   }
54   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
55   tmp_acc.s32.h -= absDiffSignalShifted;
56   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
57   if (tmp <= 0) {
58     qCode += 8;
59   }
60 
61   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
62   tmp_acc.s32.h -= absDiffSignalShifted;
63   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
64   if (tmp <= 0) {
65     qCode += 4;
66   }
67 
68   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
69   tmp_acc.s32.h -= absDiffSignalShifted;
70   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
71   if (tmp <= 0) {
72     qCode += 2;
73   }
74 
75   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
76   tmp_acc.s32.h -= absDiffSignalShifted;
77   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
78   if (tmp <= 0) {
79     qCode++;
80   }
81 
82   return (qCode);
83 }
84 
BsearchHL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)85 XBT_INLINE_ int32_t BsearchHL(const int32_t absDiffSignalShifted,
86                               const int32_t delta,
87                               const int32_t* dqbitTablePrt) {
88   int32_t qCode = 0;
89   reg64_t tmp_acc;
90   int32_t tmp = 0;
91   int32_t lc_delta = delta << 8;
92 
93   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[4];
94   tmp_acc.s32.h -= absDiffSignalShifted;
95   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
96   if (tmp <= 0) {
97     qCode = 4;
98   }
99 
100   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
101   tmp_acc.s32.h -= absDiffSignalShifted;
102   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
103   if (tmp <= 0) {
104     qCode += 2;
105   }
106 
107   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
108   tmp_acc.s32.h -= absDiffSignalShifted;
109   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
110   if (tmp <= 0) {
111     qCode++;
112   }
113 
114   return (qCode);
115 }
116 
BsearchHH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)117 XBT_INLINE_ int32_t BsearchHH(const int32_t absDiffSignalShifted,
118                               const int32_t delta,
119                               const int32_t* dqbitTablePrt) {
120   int32_t qCode = 0;
121   reg64_t tmp_acc;
122   int32_t tmp = 0;
123   int32_t lc_delta = delta << 8;
124 
125   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[8];
126   tmp_acc.s32.h -= absDiffSignalShifted;
127   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
128   if (tmp <= 0) {
129     qCode = 8;
130   }
131 
132   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
133   tmp_acc.s32.h -= absDiffSignalShifted;
134   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
135   if (tmp <= 0) {
136     qCode += 4;
137   }
138 
139   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
140   tmp_acc.s32.h -= absDiffSignalShifted;
141   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
142   if (tmp <= 0) {
143     qCode += 2;
144   }
145 
146   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
147   tmp_acc.s32.h -= absDiffSignalShifted;
148   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
149   if (tmp <= 0) {
150     qCode++;
151   }
152 
153   return (qCode);
154 }
155 
quantiseDifference_HDHL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)156 void quantiseDifference_HDHL(const int32_t diffSignal, const int32_t ditherVal,
157                              const int32_t delta, Quantiser_data* qdata_pt) {
158   int32_t absDiffSignal = 0;
159   int32_t absDiffSignalShifted = 0;
160   int32_t index = 0;
161   int32_t dithSquared = 0;
162   int32_t minusLambdaD = 0;
163   int32_t acc = 0;
164   int32_t threshDiff = 0;
165   reg64_t tmp_acc;
166   reg64_t tmp_reg64;
167   int32_t tmp_accL = 0;
168   int32_t tmp_qCode = 0;
169   int32_t tmp_altQcode = 0;
170   uint32_t tmp_round0 = 0;
171   int32_t _delta = 0;
172 
173   /* Form the absolute value of the difference signal and maintain a version
174    * that is right-shifted 4 places for delta scaling. */
175   absDiffSignal = -diffSignal;
176   if (diffSignal >= 0) {
177     absDiffSignal = diffSignal;
178   }
179   absDiffSignal = ssat24(absDiffSignal);
180   absDiffSignalShifted = absDiffSignal >> deltaScale;
181 
182   /* Binary search for the quantised code. This search terminates with the
183    * table index of the LARGEST threshold table value for which
184    * absDiffSignalShifted >= (delta * threshold)
185    */
186   index =
187       BsearchHL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
188 
189   /* We actually wanted the SMALLEST magnitude quantised code for which
190    * absDiffSignalShifted < (delta * threshold)
191    * i.e. the code with the next highest magnitude than the one we actually
192    * found. We could add +1 to the code magnitude to do this, but we need to
193    * subtract 1 from the code magnitude to compensate for the "phantom
194    * element" at the base of the quantisation table. These two effects cancel
195    * out, so we leave the value of code alone. However, we need to form code+1
196    * to get the proper index into the both the threshold and dither tables,
197    * since we must skip over the phantom element at the base. */
198   qdata_pt->qCode = index;
199 
200   /* Square the dither and get the value back from the ALU
201    * (saturated/rounded). */
202   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
203 
204   acc = tmp_acc.s32.h;
205   tmp_round0 = (uint32_t)acc << 8;
206 
207   acc = (acc >> 6) + 1;
208   acc >>= 1;
209   if (tmp_round0 == 0x40000000L) {
210     acc--;
211   }
212   acc = ssat24(acc);
213 
214   dithSquared = acc;
215 
216   /* Form the negative difference of the dither values at index and index-1.
217    * Load the accumulator with this value divided by 2. Ensure saturation is
218    * applied to the difference calculation. */
219   minusLambdaD = qdata_pt->minusLambdaDTable[index];
220 
221   tmp_accL = (1 << 23) - dithSquared;
222   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
223 
224   tmp_round0 = tmp_acc.s32.l << 8;
225 
226   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
227   acc++;
228   acc >>= 1;
229   if (tmp_round0 == 0x40000000L) {
230     acc--;
231   }
232 
233   /* Add the threshold table values at index and index-1 to the accumulated
234    * value. */
235   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
236   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
237   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
238   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
239 
240   // saturation required
241   acc = ssat24(acc);
242 
243   /* Form the threshold table difference at index and index-1. Ensure
244    * saturation is applied to the difference calculation. */
245   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
246                qdata_pt->thresholdTablePtr_sl1[index];
247 
248   /* Based on the sign of the difference signal, either add or subtract the
249    * threshold table difference from the accumulated value. Recover the final
250    * accumulated value (saturated/rounded) */
251   if (diffSignal < 0) {
252     threshDiff = -threshDiff;
253   }
254   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
255 
256   tmp_reg64.s32.h += acc;
257   acc = tmp_reg64.s32.h;
258 
259   if (tmp_reg64.u32.l >= 0x80000000) {
260     acc++;
261   }
262   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
263 
264   acc = ssat24(acc);
265 
266   if (tmp_round0 == 0x40000000L) {
267     acc--;
268   }
269   _delta = -delta << 8;
270 
271   acc = (int32_t)((uint32_t)acc << 4);
272 
273   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
274    * signal used to determine if dithering alters the quantised code value or
275    * not. */
276   // worst case value for delta is 0x7d400
277   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
278   tmp_reg64.s32.h += absDiffSignal;
279   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
280   acc = tmp_reg64.s32.h + (1 << 2);
281   acc >>= 3;
282   if (tmp_round0 == 0x40000000L) {
283     acc--;
284   }
285 
286   tmp_qCode = qdata_pt->qCode;
287   tmp_altQcode = tmp_qCode - 1;
288   /* Check the sign of the distance penalty. Get the sign from the
289    * full-precision accumulator, as done in the Kalimba code. */
290   if (tmp_reg64.s32.h < 0) {
291     /* The distance is -ve. The optimum code needs decremented by 1 and the
292      * alternative code is 1 greater than this. Get the rounded version of the
293      * -ve distance penalty and negate this (form distance magnitude) before
294      *  writing the value out */
295     tmp_qCode = tmp_altQcode;
296     tmp_altQcode++;
297     acc = -acc;
298   }
299 
300   qdata_pt->distPenalty = acc;
301   /* If the difference signal is negative, bitwise invert the code (restores
302    * sign to the magnitude). */
303   if (diffSignal < 0) {
304     tmp_qCode = ~tmp_qCode;
305     tmp_altQcode = ~tmp_altQcode;
306   }
307   qdata_pt->altQcode = tmp_altQcode;
308   qdata_pt->qCode = tmp_qCode;
309 }
310 
quantiseDifference_HDHH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)311 void quantiseDifference_HDHH(const int32_t diffSignal, const int32_t ditherVal,
312                              const int32_t delta, Quantiser_data* qdata_pt) {
313   int32_t absDiffSignal;
314   int32_t absDiffSignalShifted;
315   int32_t index;
316   int32_t dithSquared;
317   int32_t minusLambdaD;
318   int32_t acc;
319   int32_t threshDiff;
320   reg64_t tmp_acc;
321   reg64_t tmp_reg64;
322   int32_t tmp_accL;
323   int32_t tmp_qCode;
324   int32_t tmp_altQcode;
325   uint32_t tmp_round0;
326   int32_t _delta;
327 
328   /* Form the absolute value of the difference signal and maintain a version
329    * that is right-shifted 4 places for delta scaling. */
330   absDiffSignal = -diffSignal;
331   if (diffSignal >= 0) {
332     absDiffSignal = diffSignal;
333   }
334   absDiffSignal = ssat24(absDiffSignal);
335   absDiffSignalShifted = absDiffSignal >> deltaScale;
336 
337   /* Binary search for the quantised code. This search terminates with the
338    * table index of the LARGEST threshold table value for which
339    * absDiffSignalShifted >= (delta * threshold)
340    */
341   index =
342       BsearchHH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
343 
344   /* We actually wanted the SMALLEST magnitude quantised code for which
345    * absDiffSignalShifted < (delta * threshold)
346    * i.e. the code with the next highest magnitude than the one we actually
347    * found. We could add +1 to the code magnitude to do this, but we need to
348    * subtract 1 from the code magnitude to compensate for the "phantom
349    * element" at the base of the quantisation table. These two effects cancel
350    * out, so we leave the value of code alone. However, we need to form code+1
351    * to get the proper index into the both the threshold and dither tables,
352    * since we must skip over the phantom element at the base. */
353   qdata_pt->qCode = index;
354 
355   /* Square the dither and get the value back from the ALU
356    * (saturated/rounded). */
357   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
358 
359   acc = tmp_acc.s32.h;
360   tmp_round0 = (uint32_t)acc << 8;
361 
362   acc = (acc >> 6) + 1;
363   acc >>= 1;
364   if (tmp_round0 == 0x40000000L) {
365     acc--;
366   }
367 
368   acc = ssat24(acc);
369 
370   dithSquared = acc;
371 
372   /* Form the negative difference of the dither values at index and index-1.
373    * Load the accumulator with this value divided by 2. Ensure saturation is
374    * applied to the difference calculation. */
375   minusLambdaD = qdata_pt->minusLambdaDTable[index];
376 
377   tmp_accL = (1 << 23) - dithSquared;
378   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
379 
380   tmp_round0 = tmp_acc.s32.l << 8;
381 
382   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
383   acc++;
384   acc >>= 1;
385   if (tmp_round0 == 0x40000000L) {
386     acc--;
387   }
388 
389   /* Add the threshold table values at index and index-1 to the accumulated
390    * value. */
391   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
392   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
393   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
394   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
395 
396   // saturation required
397   acc = ssat24(acc);
398 
399   /* Form the threshold table difference at index and index-1. Ensure
400    * saturation is applied to the difference calculation. */
401   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
402                qdata_pt->thresholdTablePtr_sl1[index];
403 
404   /* Based on the sign of the difference signal, either add or subtract the
405    * threshold table difference from the accumulated value. Recover the final
406    * accumulated value (saturated/rounded) */
407   if (diffSignal < 0) {
408     threshDiff = -threshDiff;
409   }
410   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
411   tmp_reg64.s32.h += acc;
412   acc = tmp_reg64.s32.h;
413 
414   if (tmp_reg64.u32.l >= 0x80000000) {
415     acc++;
416   }
417   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
418 
419   acc = ssat24(acc);
420 
421   if (tmp_round0 == 0x40000000L) {
422     acc--;
423   }
424   _delta = -delta << 8;
425 
426   acc = (int32_t)((uint32_t)acc << 4);
427 
428   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
429    * signal used to determine if dithering alters the quantised code value or
430    * not. */
431   // worst case value for delta is 0x7d400
432   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
433   tmp_reg64.s32.h += absDiffSignal;
434   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
435   acc = tmp_reg64.s32.h + (1 << 2);
436   acc >>= 3;
437   if (tmp_round0 == 0x40000000L) {
438     acc--;
439   }
440 
441   tmp_qCode = qdata_pt->qCode;
442   tmp_altQcode = tmp_qCode - 1;
443   /* Check the sign of the distance penalty. Get the sign from the
444    * full-precision accumulator, as done in the Kalimba code. */
445   if (tmp_reg64.s32.h < 0) {
446     /* The distance is -ve. The optimum code needs decremented by 1 and the
447      * alternative code is 1 greater than this. Get the rounded version of the
448      * -ve distance penalty and negate this (form distance magnitude) before
449      *  writing the value out */
450     tmp_qCode = tmp_altQcode;
451     tmp_altQcode++;
452     acc = -acc;
453   }
454 
455   qdata_pt->distPenalty = acc;
456   /* If the difference signal is negative, bitwise invert the code (restores
457    * sign to the magnitude). */
458   if (diffSignal < 0) {
459     tmp_qCode = ~tmp_qCode;
460     tmp_altQcode = ~tmp_altQcode;
461   }
462   qdata_pt->altQcode = tmp_altQcode;
463   qdata_pt->qCode = tmp_qCode;
464 }
465 
quantiseDifference_HDLL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)466 void quantiseDifference_HDLL(const int32_t diffSignal, const int32_t ditherVal,
467                              const int32_t delta, Quantiser_data* qdata_pt) {
468   int32_t absDiffSignal;
469   int32_t absDiffSignalShifted;
470   int32_t index;
471   int32_t dithSquared;
472   int32_t minusLambdaD;
473   int32_t acc;
474   int32_t threshDiff;
475   reg64_t tmp_acc;
476   reg64_t tmp_reg64;
477   int32_t tmp_accL;
478   int32_t tmp_qCode;
479   int32_t tmp_altQcode;
480   uint32_t tmp_round0;
481   int32_t _delta;
482 
483   /* Form the absolute value of the difference signal and maintain a version
484    * that is right-shifted 4 places for delta scaling. */
485   absDiffSignal = -diffSignal;
486   if (diffSignal >= 0) {
487     absDiffSignal = diffSignal;
488   }
489   absDiffSignal = ssat24(absDiffSignal);
490   absDiffSignalShifted = absDiffSignal >> deltaScale;
491 
492   /* Binary search for the quantised code. This search terminates with the
493    * table index of the LARGEST threshold table value for which
494    * absDiffSignalShifted >= (delta * threshold)
495    */
496   index =
497       BsearchLL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
498 
499   /* We actually wanted the SMALLEST magnitude quantised code for which
500    * absDiffSignalShifted < (delta * threshold)
501    * i.e. the code with the next highest magnitude than the one we actually
502    * found. We could add +1 to the code magnitude to do this, but we need to
503    * subtract 1 from the code magnitude to compensate for the "phantom
504    * element" at the base of the quantisation table. These two effects cancel
505    * out, so we leave the value of code alone. However, we need to form code+1
506    * to get the proper index into the both the threshold and dither tables,
507    * since we must skip over the phantom element at the base. */
508   qdata_pt->qCode = index;
509 
510   /* Square the dither and get the value back from the ALU
511    * (saturated/rounded). */
512 
513   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
514 
515   acc = tmp_acc.s32.h;
516   tmp_round0 = (uint32_t)acc << 8;
517 
518   acc = (acc >> 6) + 1;
519   acc >>= 1;
520   if (tmp_round0 == 0x40000000L) {
521     acc--;
522   }
523 
524   acc = ssat24(acc);
525 
526   dithSquared = acc;
527 
528   /* Form the negative difference of the dither values at index and index-1.
529    * Load the accumulator with this value divided by 2. Ensure saturation is
530    * applied to the difference calculation. */
531   minusLambdaD = qdata_pt->minusLambdaDTable[index];
532 
533   tmp_accL = (1 << 23) - dithSquared;
534   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
535 
536   tmp_round0 = tmp_acc.s32.l << 8;
537 
538   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
539   acc++;
540   acc >>= 1;
541   if (tmp_round0 == 0x40000000L) {
542     acc--;
543   }
544 
545   /* Add the threshold table values at index and index-1 to the accumulated
546    * value. */
547 
548   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
549   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
550   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
551   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
552   // saturation required
553   acc = ssat24(acc);
554 
555   /* Form the threshold table difference at index and index-1. Ensure
556    * saturation is applied to the difference calculation. */
557   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
558                qdata_pt->thresholdTablePtr_sl1[index];
559 
560   /* Based on the sign of the difference signal, either add or subtract the
561    * threshold table difference from the accumulated value. Recover the final
562    * accumulated value (saturated/rounded) */
563 
564   if (diffSignal < 0) {
565     threshDiff = -threshDiff;
566   }
567   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
568   tmp_reg64.s32.h += acc;
569   acc = tmp_reg64.s32.h;
570 
571   if (tmp_reg64.u32.l >= 0x80000000) {
572     acc++;
573   }
574   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
575 
576   acc = ssat24(acc);
577 
578   if (tmp_round0 == 0x40000000L) {
579     acc--;
580   }
581   _delta = -delta << 8;
582 
583   acc = (int32_t)((uint32_t)acc << 4);
584 
585   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
586    * signal used to determine if dithering alters the quantised code value or
587    * not. */
588   // worst case value for delta is 0x7d400
589 
590   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
591   tmp_reg64.s32.h += absDiffSignal;
592   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
593   acc = tmp_reg64.s32.h + (1 << 2);
594   acc >>= 3;
595   if (tmp_round0 == 0x40000000L) {
596     acc--;
597   }
598 
599   tmp_qCode = qdata_pt->qCode;
600   tmp_altQcode = tmp_qCode - 1;
601   /* Check the sign of the distance penalty. Get the sign from the
602    * full-precision accumulator, as done in the Kalimba code. */
603 
604   if (tmp_reg64.s32.h < 0) {
605     /* The distance is -ve. The optimum code needs decremented by 1 and the
606      * alternative code is 1 greater than this. Get the rounded version of the
607      * -ve distance penalty and negate this (form distance magnitude) before
608      *  writing the value out */
609     tmp_qCode = tmp_altQcode;
610     tmp_altQcode++;
611     acc = -acc;
612   }
613 
614   qdata_pt->distPenalty = acc;
615   /* If the difference signal is negative, bitwise invert the code (restores
616    * sign to the magnitude). */
617   if (diffSignal < 0) {
618     tmp_qCode = ~tmp_qCode;
619     tmp_altQcode = ~tmp_altQcode;
620   }
621   qdata_pt->altQcode = tmp_altQcode;
622   qdata_pt->qCode = tmp_qCode;
623 }
624 
BsearchLH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)625 static int32_t BsearchLH(const int32_t absDiffSignalShifted,
626                          const int32_t delta, const int32_t* dqbitTablePrt) {
627   int32_t qCode;
628   reg64_t tmp_acc;
629   int32_t tmp;
630   int32_t lc_delta = delta << 8;
631 
632   qCode = 0;
633 
634   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[16];
635   tmp_acc.s32.h -= absDiffSignalShifted;
636   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
637   if (tmp <= 0) {
638     qCode = 16;
639   }
640 
641   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
642   tmp_acc.s32.h -= absDiffSignalShifted;
643   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
644   if (tmp <= 0) {
645     qCode += 8;
646   }
647 
648   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
649   tmp_acc.s32.h -= absDiffSignalShifted;
650   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
651   if (tmp <= 0) {
652     qCode += 4;
653   }
654 
655   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
656   tmp_acc.s32.h -= absDiffSignalShifted;
657   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
658   if (tmp <= 0) {
659     qCode += 2;
660   }
661 
662   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
663   tmp_acc.s32.h -= absDiffSignalShifted;
664   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
665   if (tmp <= 0) {
666     qCode++;
667   }
668 
669   return (qCode);
670 }
671 
quantiseDifference_HDLH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)672 void quantiseDifference_HDLH(const int32_t diffSignal, const int32_t ditherVal,
673                              const int32_t delta, Quantiser_data* qdata_pt) {
674   int32_t absDiffSignal = 0;
675   int32_t absDiffSignalShifted = 0;
676   int32_t index = 0;
677   int32_t dithSquared = 0;
678   int32_t minusLambdaD = 0;
679   int32_t acc = 0;
680   int32_t threshDiff = 0;
681   reg64_t tmp_acc;
682   reg64_t tmp_reg64;
683   int32_t tmp_accL = 0;
684   int32_t tmp_qCode = 0;
685   int32_t tmp_altQcode = 0;
686 
687   uint32_t tmp_round0 = 0;
688   int32_t _delta = 0;
689 
690   /* Form the absolute value of the difference signal and maintain a version
691    * that is right-shifted 4 places for delta scaling. */
692   absDiffSignal = -diffSignal;
693   if (diffSignal >= 0) {
694     absDiffSignal = diffSignal;
695   }
696   absDiffSignal = ssat24(absDiffSignal);
697   absDiffSignalShifted = absDiffSignal >> deltaScale;
698 
699   /* Binary search for the quantised code. This search terminates with the
700    * table index of the LARGEST threshold table value for which
701    * absDiffSignalShifted >= (delta * threshold)
702    */
703 
704   /* first iteration */
705   index =
706       BsearchLH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
707 
708   /* We actually wanted the SMALLEST magnitude quantised code for which
709    * absDiffSignalShifted < (delta * threshold)
710    * i.e. the code with the next highest magnitude than the one we actually
711    * found. We could add +1 to the code magnitude to do this, but we need to
712    * subtract 1 from the code magnitude to compensate for the "phantom
713    * element" at the base of the quantisation table. These two effects cancel
714    * out, so we leave the value of code alone. However, we need to form code+1
715    * to get the proper index into the both the threshold and dither tables,
716    * since we must skip over the phantom element at the base. */
717   qdata_pt->qCode = index;
718 
719   /* Square the dither and get the value back from the ALU
720    * (saturated/rounded). */
721 
722   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
723 
724   acc = tmp_reg64.s32.h;
725 
726   tmp_round0 = (uint32_t)acc << 8;
727 
728   acc = (acc >> 6) + 1;
729   acc >>= 1;
730   if (tmp_round0 == 0x40000000L) {
731     acc--;
732   }
733   acc = ssat24(acc);
734 
735   dithSquared = acc;
736 
737   /* Form the negative difference of the dither values at index and index-1.
738    * Load the accumulator with this value divided by 2. Ensure saturation is
739    * applied to the difference calculation. */
740 
741   minusLambdaD = qdata_pt->minusLambdaDTable[index];
742 
743   tmp_accL = (1 << 23) - dithSquared;
744   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
745 
746   tmp_round0 = tmp_acc.s32.l << 8;
747 
748   acc = (int32_t)(tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
749   if (tmp_round0 == 0x40000000L) {
750     acc -= 2;
751   }
752   acc++;
753 
754   /* Add the threshold table values at index and index-1 to the accumulated
755    * value. */
756 
757   acc += qdata_pt->thresholdTablePtr_sl1[index + 1];
758   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
759   acc += qdata_pt->thresholdTablePtr_sl1[index];
760   acc >>= 1;
761 
762   // saturation required
763   acc = ssat24(acc);
764 
765   /* Form the threshold table difference at index and index-1. Ensure
766    * saturation is applied to the difference calculation. */
767   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
768                qdata_pt->thresholdTablePtr_sl1[index];
769 
770   /* Based on the sign of the difference signal, either add or subtract the
771    * threshold table difference from the accumulated value. Recover the final
772    * accumulated value (saturated/rounded) */
773 
774   if (diffSignal < 0) {
775     threshDiff = -threshDiff;
776   }
777   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
778 
779   tmp_reg64.s32.h += acc;
780   acc = tmp_reg64.s32.h;
781 
782   if (tmp_reg64.u32.l >= 0x80000000) {
783     acc++;
784   }
785   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
786 
787   acc = ssat24(acc);
788 
789   if (tmp_round0 == 0x40000000L) {
790     acc--;
791   }
792   _delta = -delta << 8;
793 
794   acc = (int32_t)((uint32_t)acc << 4);
795 
796   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
797    * signal used to determine if dithering alters the quantised code value or
798    * not. */
799   // worst case value for delta is 0x7d400
800 
801   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
802   tmp_reg64.s32.h += absDiffSignal;
803   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
804   acc = tmp_reg64.s32.h + (1 << 2);
805   acc >>= 3;
806   if (tmp_round0 == 0x40000000L) {
807     acc--;
808   }
809 
810   tmp_qCode = qdata_pt->qCode;
811   tmp_altQcode = tmp_qCode - 1;
812   /* Check the sign of the distance penalty. Get the sign from the
813    * full-precision accumulator, as done in the Kalimba code. */
814 
815   if (tmp_reg64.s32.h < 0) {
816     /* The distance is -ve. The optimum code needs decremented by 1 and the
817      * alternative code is 1 greater than this. Get the rounded version of the
818      * -ve distance penalty and negate this (form distance magnitude) before
819      *  writing the value out */
820     tmp_qCode = tmp_altQcode;
821     tmp_altQcode++;
822     acc = -acc;
823   }
824 
825   qdata_pt->distPenalty = acc;
826   /* If the difference signal is negative, bitwise invert the code (restores
827    * sign to the magnitude). */
828   if (diffSignal < 0) {
829     tmp_qCode = ~tmp_qCode;
830     tmp_altQcode = ~tmp_altQcode;
831   }
832   qdata_pt->altQcode = tmp_altQcode;
833   qdata_pt->qCode = tmp_qCode;
834 }
835