/** * Copyright (C) 2022 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*------------------------------------------------------------------------------ * * This file includes convolution functions required for the Qmf. * *----------------------------------------------------------------------------*/ #include "Qmf.h" void AsmQmfConvO(const int16_t* p1dl_buffPtr, const int16_t* p2dl_buffPtr, const int32_t* coeffPtr, int32_t* convSumDiff) { /* Since all manipulated data are "int16_t" it is possible to * reduce the number of loads by using int32_t type and manipulating * pairs of data */ int32_t acc; // Manual inlining as IAR compiler does not seem to do it itself... // WARNING: This inlining assumes that m_qmfDelayLineLength == 16 int32_t tmp_round0; int64_t local_acc0; int64_t local_acc1; int32_t coeffVal0; int32_t coeffVal1; int16_t data0; int16_t data1; int16_t data2; int16_t data3; int32_t phaseConv[2]; int32_t convSum; int32_t convDiff; coeffVal0 = (*(coeffPtr)); coeffVal1 = (*(coeffPtr + 1)); data0 = (*(p1dl_buffPtr)); data1 = (*(p2dl_buffPtr)); data2 = (*(p1dl_buffPtr - 1)); data3 = (*(p2dl_buffPtr + 1)); local_acc0 = ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 = ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 2)); coeffVal1 = (*(coeffPtr + 3)); data0 = (*(p1dl_buffPtr - 2)); data1 = (*(p2dl_buffPtr + 2)); data2 = (*(p1dl_buffPtr - 3)); data3 = (*(p2dl_buffPtr + 3)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 4)); coeffVal1 = (*(coeffPtr + 5)); data0 = (*(p1dl_buffPtr - 4)); data1 = (*(p2dl_buffPtr + 4)); data2 = (*(p1dl_buffPtr - 5)); data3 = (*(p2dl_buffPtr + 5)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 6)); coeffVal1 = (*(coeffPtr + 7)); data0 = (*(p1dl_buffPtr - 6)); data1 = (*(p2dl_buffPtr + 6)); data2 = (*(p1dl_buffPtr - 7)); data3 = (*(p2dl_buffPtr + 7)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 8)); coeffVal1 = (*(coeffPtr + 9)); data0 = (*(p1dl_buffPtr - 8)); data1 = (*(p2dl_buffPtr + 8)); data2 = (*(p1dl_buffPtr - 9)); data3 = (*(p2dl_buffPtr + 9)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 10)); coeffVal1 = (*(coeffPtr + 11)); data0 = (*(p1dl_buffPtr - 10)); data1 = (*(p2dl_buffPtr + 10)); data2 = (*(p1dl_buffPtr - 11)); data3 = (*(p2dl_buffPtr + 11)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 12)); coeffVal1 = (*(coeffPtr + 13)); data0 = (*(p1dl_buffPtr - 12)); data1 = (*(p2dl_buffPtr + 12)); data2 = (*(p1dl_buffPtr - 13)); data3 = (*(p2dl_buffPtr + 13)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); coeffVal0 = (*(coeffPtr + 14)); coeffVal1 = (*(coeffPtr + 15)); data0 = (*(p1dl_buffPtr - 14)); data1 = (*(p2dl_buffPtr + 14)); data2 = (*(p1dl_buffPtr - 15)); data3 = (*(p2dl_buffPtr + 15)); local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0); local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1); local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2); local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3); tmp_round0 = (int32_t)local_acc0 & 0x00FFFFL; local_acc0 += 0x004000L; acc = (int32_t)(local_acc0 >> 15); if (tmp_round0 == 0x004000L) { acc--; } if (acc > 8388607) { acc = 8388607; } if (acc < -8388608) { acc = -8388608; } phaseConv[0] = acc; tmp_round0 = (int32_t)local_acc1 & 0x00FFFFL; local_acc1 += 0x004000L; acc = (int32_t)(local_acc1 >> 15); if (tmp_round0 == 0x004000L) { acc--; } if (acc > 8388607) { acc = 8388607; } if (acc < -8388608) { acc = -8388608; } phaseConv[1] = acc; convSum = phaseConv[1] + phaseConv[0]; if (convSum > 8388607) { convSum = 8388607; } if (convSum < -8388608) { convSum = -8388608; } convDiff = phaseConv[1] - phaseConv[0]; if (convDiff > 8388607) { convDiff = 8388607; } if (convDiff < -8388608) { convDiff = -8388608; } *(convSumDiff) = convSum; *(convSumDiff + 2) = convDiff; } void AsmQmfConvI(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr, const int32_t* coeffPtr, int32_t* filterOutputs) { int32_t acc; // WARNING: This inlining assumes that m_qmfDelayLineLength == 16 int32_t tmp_round0; int64_t local_acc0; int64_t local_acc1; int32_t coeffVal0; int32_t coeffVal1; int32_t data0; int32_t data1; int32_t data2; int32_t data3; int32_t phaseConv[2]; int32_t convSum; int32_t convDiff; coeffVal0 = (*(coeffPtr)); coeffVal1 = (*(coeffPtr + 1)); data0 = (*(p1dl_buffPtr)); data1 = (*(p2dl_buffPtr)); data2 = (*(p1dl_buffPtr - 1)); data3 = (*(p2dl_buffPtr + 1)); local_acc0 = ((int64_t)(coeffVal0)*data0); local_acc1 = ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 2)); coeffVal1 = (*(coeffPtr + 3)); data0 = (*(p1dl_buffPtr - 2)); data1 = (*(p2dl_buffPtr + 2)); data2 = (*(p1dl_buffPtr - 3)); data3 = (*(p2dl_buffPtr + 3)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 4)); coeffVal1 = (*(coeffPtr + 5)); data0 = (*(p1dl_buffPtr - 4)); data1 = (*(p2dl_buffPtr + 4)); data2 = (*(p1dl_buffPtr - 5)); data3 = (*(p2dl_buffPtr + 5)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 6)); coeffVal1 = (*(coeffPtr + 7)); data0 = (*(p1dl_buffPtr - 6)); data1 = (*(p2dl_buffPtr + 6)); data2 = (*(p1dl_buffPtr - 7)); data3 = (*(p2dl_buffPtr + 7)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 8)); coeffVal1 = (*(coeffPtr + 9)); data0 = (*(p1dl_buffPtr - 8)); data1 = (*(p2dl_buffPtr + 8)); data2 = (*(p1dl_buffPtr - 9)); data3 = (*(p2dl_buffPtr + 9)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 10)); coeffVal1 = (*(coeffPtr + 11)); data0 = (*(p1dl_buffPtr - 10)); data1 = (*(p2dl_buffPtr + 10)); data2 = (*(p1dl_buffPtr - 11)); data3 = (*(p2dl_buffPtr + 11)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 12)); coeffVal1 = (*(coeffPtr + 13)); data0 = (*(p1dl_buffPtr - 12)); data1 = (*(p2dl_buffPtr + 12)); data2 = (*(p1dl_buffPtr - 13)); data3 = (*(p2dl_buffPtr + 13)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); coeffVal0 = (*(coeffPtr + 14)); coeffVal1 = (*(coeffPtr + 15)); data0 = (*(p1dl_buffPtr - 14)); data1 = (*(p2dl_buffPtr + 14)); data2 = (*(p1dl_buffPtr - 15)); data3 = (*(p2dl_buffPtr + 15)); local_acc0 += ((int64_t)(coeffVal0)*data0); local_acc1 += ((int64_t)(coeffVal0)*data1); local_acc0 += ((int64_t)(coeffVal1)*data2); local_acc1 += ((int64_t)(coeffVal1)*data3); tmp_round0 = (int32_t)local_acc0; local_acc0 += 0x00400000L; acc = (int32_t)(local_acc0 >> 23); if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) { acc--; } if (acc > 8388607) { acc = 8388607; } if (acc < -8388608) { acc = -8388608; } phaseConv[0] = acc; tmp_round0 = (int32_t)local_acc1; local_acc1 += 0x00400000L; acc = (int32_t)(local_acc1 >> 23); if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) { acc--; } if (acc > 8388607) { acc = 8388607; } if (acc < -8388608) { acc = -8388608; } phaseConv[1] = acc; convSum = phaseConv[1] + phaseConv[0]; if (convSum > 8388607) { convSum = 8388607; } if (convSum < -8388608) { convSum = -8388608; } *(filterOutputs) = convSum; convDiff = phaseConv[1] - phaseConv[0]; if (convDiff > 8388607) { convDiff = 8388607; } if (convDiff < -8388608) { convDiff = -8388608; } *(filterOutputs + 1) = convDiff; }