1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*------------------------------------------------------------------------------
17  *
18  *  Subband processing consists of:
19  *  inverse quantisation (defined in a separate file),
20  *  predictor coefficient update (Pole and Zero Coeff update),
21  *  predictor filtering.
22  *
23  *----------------------------------------------------------------------------*/
24 
25 #ifndef SUBBANDFUNCTIONSCOMMON_H
26 #define SUBBANDFUNCTIONSCOMMON_H
27 
28 enum reg64_reg { reg64_H = 1, reg64_L = 0 };
29 
30 void processSubband(const int32_t qCode, const int32_t ditherVal,
31                     Subband_data* SubbandDataPt, IQuantiser_data* iqDataPt);
32 void processSubbandLL(const int32_t qCode, const int32_t ditherVal,
33                       Subband_data* SubbandDataPt, IQuantiser_data* iqDataPt);
34 void processSubbandHL(const int32_t qCode, const int32_t ditherVal,
35                       Subband_data* SubbandDataPt, IQuantiser_data* iqDataPt);
36 
37 /* Function to carry out inverse quantisation for LL, LH and HH subband types */
invertQuantisation(const int32_t qCode,const int32_t ditherVal,IQuantiser_data * iqdata_pt)38 XBT_INLINE_ void invertQuantisation(const int32_t qCode,
39                                     const int32_t ditherVal,
40                                     IQuantiser_data* iqdata_pt) {
41   int32_t invQ;
42   int32_t index;
43   int32_t acc;
44   reg64_t tmp_r64;
45   int64_t tmp_acc;
46   int32_t tmp_accL;
47   int32_t tmp_accH;
48   uint32_t tmp_round0;
49   uint32_t tmp_round1;
50 
51   unsigned u16t;
52   /* log delta leak value (Q23) */
53   const uint32_t logDeltaLeakVal = 0x7F6CL;
54 
55   /* Turn the quantised code back into an index into the threshold table. This
56    * involves bitwise inversion of the code (if -ve) and adding 1 (phantom
57    * element at table base). Then set invQ to be +/- the threshold value,
58    * depending on the code sign. */
59   index = qCode;
60   if (qCode < 0) {
61     index = (~index);
62   }
63   index = index + 1;
64   invQ = iqdata_pt->thresholdTablePtr_sl1[index];
65   if (qCode < 0) {
66     invQ = -invQ;
67   }
68 
69   /* Load invQ into the accumulator. Add the product of the dither value times
70    * the indexed dither table value. Then get the result back from the
71    * accumulator as an updated invQ. */
72   tmp_r64.s64 = ((int64_t)ditherVal * iqdata_pt->ditherTablePtr_sf1[index]);
73   tmp_r64.s32.h += invQ >> 1;
74 
75   acc = tmp_r64.s32.h;
76 
77   tmp_round1 = tmp_r64.s32.h & 0x00000001L;
78   if (tmp_r64.u32.l >= 0x80000000) {
79     acc++;
80   }
81   if (tmp_round1 == 0 && tmp_r64.s32.l == (int32_t)0x80000000L) {
82     acc--;
83   }
84   acc = ssat24(acc);
85 
86   invQ = acc;
87 
88   /* Scale invQ by the current delta value. Left-shift the result (in the
89    * accumulator) by 4 positions for the delta scaling. Get the updated invQ
90    * back from the accumulator. */
91 
92   u16t = iqdata_pt->logDelta;
93   tmp_acc = ((int64_t)invQ * iqdata_pt->delta);
94   tmp_accL = u16t * logDeltaLeakVal;
95   tmp_accH = iqdata_pt->incrTablePtr[index];
96   acc = (int32_t)(tmp_acc >> (23 - deltaScale));
97   invQ = ssat24(acc);
98 
99   /* Now update the value of logDelta. Load the accumulator with the index
100    * value of the logDelta increment table. Add the product of the current
101    * logDelta scaled by a leaky coefficient (16310 in Q14). Get the value back
102    * from the accumulator. */
103   tmp_accH += tmp_accL >> (32 - 17);
104 
105   acc = tmp_accH;
106 
107   tmp_r64.u32.l = ((uint32_t)tmp_accL << 17);
108   tmp_r64.s32.h = tmp_accH;
109 
110   tmp_round0 = tmp_r64.u32.l;
111   tmp_round1 = (int32_t)(tmp_r64.u64 >> 1);
112   if (tmp_round0 >= 0x80000000L) {
113     acc++;
114   }
115   if (tmp_round1 == 0x40000000L) {
116     acc--;
117   }
118 
119   /* Limit the updated logDelta between 0 and its subband-specific maximum. */
120   if (acc < 0) {
121     acc = 0;
122   }
123   if (acc > iqdata_pt->maxLogDelta) {
124     acc = iqdata_pt->maxLogDelta;
125   }
126 
127   iqdata_pt->logDelta = (uint16_t)acc;
128 
129   /* The updated value of delta is the logTable output (indexed by 5 bits from
130    * the updated logDelta) shifted by a value involving the logDelta minimum
131    * and the updated logDelta itself. */
132   iqdata_pt->delta = iqdata_pt->iquantTableLogPtr[(acc >> 3) & 0x1f] >>
133                      (22 - 25 - iqdata_pt->minLogDelta - (acc >> 8));
134 
135   iqdata_pt->invQ = invQ;
136 }
137 
138 /* Function to carry out inverse quantisation for a HL subband type */
invertQuantisationHL(const int32_t qCode,const int32_t ditherVal,IQuantiser_data * iqdata_pt)139 XBT_INLINE_ void invertQuantisationHL(const int32_t qCode,
140                                       const int32_t ditherVal,
141                                       IQuantiser_data* iqdata_pt) {
142   int32_t invQ;
143   int32_t index;
144   int32_t acc;
145   reg64_t tmp_r64;
146   int64_t tmp_acc;
147   int32_t tmp_accL;
148   int32_t tmp_accH;
149   uint32_t tmp_round0;
150   uint32_t tmp_round1;
151 
152   unsigned u16t;
153   /* log delta leak value (Q23) */
154   const uint32_t logDeltaLeakVal = 0x7F6CL;
155 
156   /* Turn the quantised code back into an index into the threshold table. This
157    * involves bitwise inversion of the code (if -ve) and adding 1 (phantom
158    * element at table base). Then set invQ to be +/- the threshold value,
159    * depending on the code sign. */
160   index = qCode;
161   if (qCode < 0) {
162     index = (~index);
163   }
164   index = index + 1;
165   invQ = iqdata_pt->thresholdTablePtr_sl1[index];
166   if (qCode < 0) {
167     invQ = -invQ;
168   }
169 
170   /* Load invQ into the accumulator. Add the product of the dither value times
171    * the indexed dither table value. Then get the result back from the
172    * accumulator as an updated invQ. */
173   tmp_r64.s64 = ((int64_t)ditherVal * iqdata_pt->ditherTablePtr_sf1[index]);
174   tmp_r64.s32.h += invQ >> 1;
175 
176   acc = tmp_r64.s32.h;
177 
178   tmp_round1 = tmp_r64.s32.h & 0x00000001L;
179   if (tmp_r64.u32.l >= 0x80000000) {
180     acc++;
181   }
182   if (tmp_round1 == 0 && tmp_r64.u32.l == 0x80000000L) {
183     acc--;
184   }
185   acc = ssat24(acc);
186 
187   invQ = acc;
188 
189   /* Scale invQ by the current delta value. Left-shift the result (in the
190    * accumulator) by 4 positions for the delta scaling. Get the updated invQ
191    * back from the accumulator. */
192   u16t = iqdata_pt->logDelta;
193   tmp_acc = ((int64_t)invQ * iqdata_pt->delta);
194   tmp_accL = u16t * logDeltaLeakVal;
195   tmp_accH = iqdata_pt->incrTablePtr[index];
196   acc = (int32_t)(tmp_acc >> (23 - deltaScale));
197   invQ = acc;
198 
199   /* Now update the value of logDelta. Load the accumulator with the index
200    * value of the logDelta increment table. Add the product of the current
201    * logDelta scaled by a leaky coefficient (16310 in Q14). Get the value back
202    * from the accumulator. */
203   tmp_accH += tmp_accL >> (32 - 17);
204 
205   acc = tmp_accH;
206 
207   tmp_r64.u32.l = ((uint32_t)tmp_accL << 17);
208   tmp_r64.s32.h = tmp_accH;
209 
210   tmp_round0 = tmp_r64.u32.l;
211   tmp_round1 = (int32_t)(tmp_r64.u64 >> 1);
212   if (tmp_round0 >= 0x80000000L) {
213     acc++;
214   }
215   if (tmp_round1 == 0x40000000L) {
216     acc--;
217   }
218 
219   /* Limit the updated logDelta between 0 and its subband-specific maximum. */
220   if (acc < 0) {
221     acc = 0;
222   }
223   if (acc > iqdata_pt->maxLogDelta) {
224     acc = iqdata_pt->maxLogDelta;
225   }
226 
227   iqdata_pt->logDelta = (uint16_t)acc;
228 
229   /* The updated value of delta is the logTable output (indexed by 5 bits from
230    * the updated logDelta) shifted by a value involving the logDelta minimum
231    * and the updated logDelta itself. */
232   iqdata_pt->delta = iqdata_pt->iquantTableLogPtr[(acc >> 3) & 0x1f] >>
233                      (22 - 25 - iqdata_pt->minLogDelta - (acc >> 8));
234 
235   iqdata_pt->invQ = invQ;
236 }
237 
238 /* Function to carry out prediction ARMA filtering for the current subband
239  * performPredictionFiltering should only be used for HH and LH subband! */
performPredictionFiltering(const int32_t invQ,Subband_data * SubbandDataPt)240 XBT_INLINE_ void performPredictionFiltering(const int32_t invQ,
241                                             Subband_data* SubbandDataPt) {
242   int32_t poleVal;
243   int32_t acc;
244   int64_t accL;
245   uint32_t pointer;
246   int32_t poleDelayLine;
247   int32_t predVal;
248   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
249   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
250   int32_t zData0;
251   int32_t* cbuf_pt;
252   int32_t invQincr_pos;
253   int32_t invQincr_neg;
254   int32_t k;
255   int32_t oldZData;
256   /* Pole coefficient and data indices */
257   enum { a1 = 0, a2 = 1 };
258 
259   /* Write the newest pole input sample to the pole delay line.
260    * Ensure the sum of the current dequantised error and the previous
261    * predictor output is saturated if necessary. */
262   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
263 
264   poleDelayLine = ssat24(poleDelayLine);
265 
266   /* Pole filter convolution. Shift convolution result 1 place to the left
267    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
268    * and we want a Q23 result */
269   accL = ((int64_t)poleCoeff[a2] *
270           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
271   /* Update the pole delay line for the next pass by writing the new input
272    * sample into the 2nd element */
273   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
274   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
275   poleVal = (int32_t)(accL >> 22);
276   poleVal = ssat24(poleVal);
277 
278   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
279   if (invQ == 0) {
280     invQincr_pos = 0L;
281   } else {
282     invQincr_pos = 0x800000;
283   }
284   if (invQ < 0L) {
285     invQincr_pos = -invQincr_pos;
286   }
287 
288   invQincr_neg = 0x0080 - invQincr_pos;
289   invQincr_pos += 0x0080;
290 
291   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 12;
292   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
293   /* partial manual unrolling to improve performance */
294   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 12) {
295     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
296   }
297 
298   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
299 
300   /* Iterate over the number of coefficients for this subband */
301   oldZData = invQ;
302   accL = 0;
303   for (k = 0; k < 12; k++) {
304     uint32_t tmp_round0;
305     int32_t coeffValue;
306 
307     zData0 = (*(cbuf_pt--));
308     coeffValue = *(zeroCoeffPt + k);
309     if (zData0 < 0L) {
310       acc = invQincr_neg - coeffValue;
311     } else {
312       acc = invQincr_pos - coeffValue;
313     }
314     tmp_round0 = acc;
315     acc = (acc >> 8) + coeffValue;
316     if (((tmp_round0 << 23) ^ 0x80000000) == 0) {
317       acc--;
318     }
319     accL += (int64_t)acc * (int64_t)(oldZData);
320     oldZData = zData0;
321     *(zeroCoeffPt + k) = acc;
322   }
323 
324   acc = (int32_t)(accL >> 22);
325   acc = ssat24(acc);
326   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
327    * this is saturated, if necessary. */
328   predVal = acc + poleVal;
329   predVal = ssat24(predVal);
330   SubbandDataPt->m_predData.m_zeroVal = acc;
331   SubbandDataPt->m_predData.m_predVal = predVal;
332 
333   /* Update the zero filter delay line by writing the new input sample to the
334    * circular buffer. */
335   SubbandDataPt->m_predData.m_zeroDelayLine
336       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
337       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
338   SubbandDataPt->m_predData.m_zeroDelayLine
339       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 12] =
340       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
341 }
342 
performPredictionFilteringLL(const int32_t invQ,Subband_data * SubbandDataPt)343 XBT_INLINE_ void performPredictionFilteringLL(const int32_t invQ,
344                                               Subband_data* SubbandDataPt) {
345   int32_t poleVal;
346   int32_t acc;
347   int64_t accL;
348   uint32_t pointer;
349   int32_t poleDelayLine;
350   int32_t predVal;
351   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
352   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
353   int32_t* cbuf_pt;
354   int32_t invQincr_pos;
355   int32_t invQincr_neg;
356   int32_t k;
357   int32_t oldZData;
358   /* Pole coefficient and data indices */
359   enum { a1 = 0, a2 = 1 };
360 
361   /* Write the newest pole input sample to the pole delay line.
362    * Ensure the sum of the current dequantised error and the previous
363    * predictor output is saturated if necessary. */
364   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
365 
366   poleDelayLine = ssat24(poleDelayLine);
367 
368   /* Pole filter convolution. Shift convolution result 1 place to the left
369    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
370    * and we want a Q23 result */
371   accL = ((int64_t)poleCoeff[a2] *
372           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
373   /* Update the pole delay line for the next pass by writing the new input
374    * sample into the 2nd element */
375   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
376   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
377   poleVal = (int32_t)(accL >> 22);
378   poleVal = ssat24(poleVal);
379   // store poleVal to free one register.
380   SubbandDataPt->m_predData.m_predVal = poleVal;
381 
382   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
383   if (invQ == 0) {
384     invQincr_pos = 0L;
385   } else {
386     invQincr_pos = 0x800000;
387   }
388   if (invQ < 0L) {
389     invQincr_pos = -invQincr_pos;
390   }
391 
392   invQincr_neg = 0x0080 - invQincr_pos;
393   invQincr_pos += 0x0080;
394 
395   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 24;
396   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
397   /* partial manual unrolling to improve performance */
398   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 24) {
399     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
400   }
401 
402   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
403 
404   /* Iterate over the number of coefficients for this subband */
405 
406   oldZData = invQ;
407   accL = 0;
408   for (k = 0; k < 24; k++) {
409     int32_t zData0;
410     int32_t coeffValue;
411 
412     zData0 = (*(cbuf_pt--));
413     coeffValue = *(zeroCoeffPt + k);
414     if (zData0 < 0L) {
415       acc = invQincr_neg - coeffValue;
416     } else {
417       acc = invQincr_pos - coeffValue;
418     }
419     if (((acc << 23) ^ 0x80000000) == 0) {
420       coeffValue--;
421     }
422     acc = (acc >> 8) + coeffValue;
423     accL += (int64_t)acc * (int64_t)(oldZData);
424     oldZData = zData0;
425     *(zeroCoeffPt + k) = acc;
426   }
427 
428   acc = (int32_t)(accL >> 22);
429   acc = ssat24(acc);
430   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
431    * this is saturated, if necessary. */
432   // recover value of PoleVal stored at beginning of routine...
433   predVal = acc + SubbandDataPt->m_predData.m_predVal;
434   predVal = ssat24(predVal);
435   SubbandDataPt->m_predData.m_zeroVal = acc;
436   SubbandDataPt->m_predData.m_predVal = predVal;
437 
438   /* Update the zero filter delay line by writing the new input sample to the
439    * circular buffer. */
440   SubbandDataPt->m_predData.m_zeroDelayLine
441       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
442       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
443   SubbandDataPt->m_predData.m_zeroDelayLine
444       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 24] =
445       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
446 }
447 
performPredictionFilteringHL(const int32_t invQ,Subband_data * SubbandDataPt)448 XBT_INLINE_ void performPredictionFilteringHL(const int32_t invQ,
449                                               Subband_data* SubbandDataPt) {
450   int32_t poleVal;
451   int32_t acc;
452   int64_t accL;
453   uint32_t pointer;
454   int32_t poleDelayLine;
455   int32_t predVal;
456   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
457   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
458   int32_t zData0;
459   int32_t* cbuf_pt;
460   int32_t invQincr_pos;
461   int32_t invQincr_neg;
462   int32_t k;
463   int32_t oldZData;
464   const int32_t roundCte = 0x80000000;
465   /* Pole coefficient and data indices */
466   enum { a1 = 0, a2 = 1 };
467 
468   /* Write the newest pole input sample to the pole delay line.
469    * Ensure the sum of the current dequantised error and the previous
470    * predictor output is saturated if necessary. */
471   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
472 
473   poleDelayLine = ssat24(poleDelayLine);
474 
475   /* Pole filter convolution. Shift convolution result 1 place to the left
476    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
477    * and we want a Q23 result */
478   accL = ((int64_t)poleCoeff[a2] *
479           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
480   /* Update the pole delay line for the next pass by writing the new input
481    * sample into the 2nd element */
482   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
483   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
484   poleVal = (int32_t)(accL >> 22);
485   poleVal = ssat24(poleVal);
486 
487   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
488   invQincr_pos = 0L;
489   if (invQ != 0) {
490     invQincr_pos = 0x800000;
491   }
492   if (invQ < 0L) {
493     invQincr_pos = -invQincr_pos;
494   }
495 
496   invQincr_neg = 0x0080 - invQincr_pos;
497   invQincr_pos += 0x0080;
498 
499   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 6;
500   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
501   /* partial manual unrolling to improve performance */
502   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 6) {
503     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
504   }
505 
506   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
507 
508   /* Iterate over the number of coefficients for this subband */
509   oldZData = invQ;
510   accL = 0;
511 
512   for (k = 0; k < 6; k++) {
513     uint32_t tmp_round0;
514     int32_t coeffValue;
515 
516     zData0 = (*(cbuf_pt--));
517     coeffValue = *(zeroCoeffPt + k);
518     if (zData0 < 0L) {
519       acc = invQincr_neg - coeffValue;
520     } else {
521       acc = invQincr_pos - coeffValue;
522     }
523     tmp_round0 = acc;
524     acc = (acc >> 8) + coeffValue;
525     if (((tmp_round0 << 23) ^ roundCte) == 0) {
526       acc--;
527     }
528     accL += (int64_t)acc * (int64_t)(oldZData);
529     oldZData = zData0;
530     *(zeroCoeffPt + k) = acc;
531   }
532 
533   acc = (int32_t)(accL >> 22);
534   acc = ssat24(acc);
535   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
536    * this is saturated, if necessary. */
537   predVal = acc + poleVal;
538   predVal = ssat24(predVal);
539   SubbandDataPt->m_predData.m_zeroVal = acc;
540   SubbandDataPt->m_predData.m_predVal = predVal;
541 
542   /* Update the zero filter delay line by writing the new input sample to the
543    * circular buffer. */
544   SubbandDataPt->m_predData.m_zeroDelayLine
545       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
546       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
547   SubbandDataPt->m_predData.m_zeroDelayLine
548       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 6] =
549       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
550 }
551 
552 #endif  // SUBBANDFUNCTIONSCOMMON_H
553