1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /****************************************************************************************
19 Portions of this file are derived from the following 3GPP standard:
20 
21     3GPP TS 26.073
22     ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23     Available from http://www.3gpp.org
24 
25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26 Permission to distribute, modify and use this file under the standard license
27 terms listed above has been obtained from the copyright holder.
28 ****************************************************************************************/
29 /*
30 ------------------------------------------------------------------------------
31 
32  Pathname: ./audio/gsm-amr/c/src/bgnscd.c
33  Functions:
34            Bgn_scd_reset
35            Bgn_scd
36 
37 ------------------------------------------------------------------------------
38  MODULE DESCRIPTION
39 
40  Background noise source characteristic detector (SCD)
41 
42 ------------------------------------------------------------------------------
43 */
44 
45 
46 /*----------------------------------------------------------------------------
47 ; INCLUDES
48 ----------------------------------------------------------------------------*/
49 #include <string.h>
50 
51 #include    "bgnscd.h"
52 #include    "typedef.h"
53 #include    "basic_op.h"
54 #include    "cnst.h"
55 #include    "copy.h"
56 #include    "gmed_n.h"
57 #include    "sqrt_l.h"
58 
59 /*----------------------------------------------------------------------------
60 ; MACROS
61 ; Define module specific macros here
62 ----------------------------------------------------------------------------*/
63 
64 
65 /*----------------------------------------------------------------------------
66 ; DEFINES
67 ; Include all pre-processor statements here. Include conditional
68 ; compile variables also.
69 ----------------------------------------------------------------------------*/
70 #define TRUE  1
71 #define FALSE 0
72 
73 /*----------------------------------------------------------------------------
74 ; LOCAL FUNCTION DEFINITIONS
75 ; Function Prototype declaration
76 ----------------------------------------------------------------------------*/
77 
78 /*----------------------------------------------------------------------------
79 ; LOCAL VARIABLE DEFINITIONS
80 ; Variable declaration - defined here and used outside this module
81 ----------------------------------------------------------------------------*/
82 
83 
84 /*
85 ------------------------------------------------------------------------------
86  FUNCTION NAME: Bgn_scd_reset
87 ------------------------------------------------------------------------------
88  INPUT AND OUTPUT DEFINITIONS
89 
90  Inputs:
91     state = points to memory of type Bgn_scdState.
92 
93  Outputs:
94     The memory of type Bgn_scdState pointed to by state is set to all
95         zeros.
96 
97  Returns:
98     Returns 0 if memory was successfully initialized,
99         otherwise returns -1.
100 
101  Global Variables Used:
102     None.
103 
104  Local Variables Needed:
105     None.
106 
107 ------------------------------------------------------------------------------
108  FUNCTION DESCRIPTION
109 
110  Resets state memory.
111 
112 ------------------------------------------------------------------------------
113  REQUIREMENTS
114 
115  None
116 
117 ------------------------------------------------------------------------------
118  REFERENCES
119 
120  bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
121 
122 ------------------------------------------------------------------------------
123  PSEUDO-CODE
124 
125 Word16 Bgn_scd_reset (Bgn_scdState *state)
126 {
127    if (state == (Bgn_scdState *) NULL){
128       fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");
129       return -1;
130    }
131 
132    // Static vectors to zero
133    Set_zero (state->frameEnergyHist, L_ENERGYHIST);
134 
135    // Initialize hangover handling
136    state->bgHangover = 0;
137 
138    return 0;
139 }
140 
141 ------------------------------------------------------------------------------
142  RESOURCES USED [optional]
143 
144  When the code is written for a specific target processor the
145  the resources used should be documented below.
146 
147  HEAP MEMORY USED: x bytes
148 
149  STACK MEMORY USED: x bytes
150 
151  CLOCK CYCLES: (cycle count equation for this function) + (variable
152                 used to represent cycle count for each subroutine
153                 called)
154      where: (cycle count variable) = cycle count for [subroutine
155                                      name]
156 
157 ------------------------------------------------------------------------------
158  CAUTION [optional]
159  [State any special notes, constraints or cautions for users of this function]
160 
161 ------------------------------------------------------------------------------
162 */
163 
Bgn_scd_reset(Bgn_scdState * state)164 Word16  Bgn_scd_reset(Bgn_scdState *state)
165 {
166     if (state == (Bgn_scdState *) NULL)
167     {
168         /* fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");  */
169         return(-1);
170     }
171 
172     /* Static vectors to zero */
173     memset(state->frameEnergyHist, 0, L_ENERGYHIST*sizeof(Word16));
174 
175     /* Initialize hangover handling */
176     state->bgHangover = 0;
177 
178     return(0);
179 }
180 
181 /****************************************************************************/
182 
183 /*
184 ------------------------------------------------------------------------------
185  FUNCTION NAME: Bgn_scd
186 ------------------------------------------------------------------------------
187  INPUT AND OUTPUT DEFINITIONS
188 
189  Inputs:
190     st = pointer to state variables of type Bgn_scdState
191     ltpGainHist[] = LTP gain history (Word16)
192     speech[] = synthesis speech frame (Word16)
193     voicedHangover = pointer to # of frames after last voiced frame (Word16)
194     pOverflow      = pointer to overflow indicator (Flag)
195 
196  Outputs:
197     st = function updates the state variables of type Bgn_scdState
198         pointed to by st.
199     voicedHangover = function updates the # of frames after last voiced
200         frame pointed to by voicedHangover.
201     pOverflow = 1 if the basic math function L_add() results in saturation.
202                   else pOverflow is zero.
203 
204  Returns:
205     inbgNoise = flag if background noise is present (Word16)
206 
207  Global Variables Used:
208     None.
209 
210  Local Variables Needed:
211     None.
212 
213 ------------------------------------------------------------------------------
214  FUNCTION DESCRIPTION
215 
216  Characterize synthesis speech and detect background noise.
217 
218 ------------------------------------------------------------------------------
219  REQUIREMENTS
220 
221  None
222 
223 ------------------------------------------------------------------------------
224  REFERENCES
225 
226  bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
227 
228 ------------------------------------------------------------------------------
229  PSEUDO-CODE
230 
231 Word16 Bgn_scd (Bgn_scdState *st,      // i : State variables for bgn SCD
232                 Word16 ltpGainHist[],  // i : LTP gain history
233                 Word16 speech[],       // o : synthesis speech frame
234                 Word16 *voicedHangover // o : # of frames after last
235                                               voiced frame
236                 )
237 {
238    Word16 i;
239    Word16 prevVoiced, inbgNoise;
240    Word16 temp;
241    Word16 ltpLimit, frameEnergyMin;
242    Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
243    Word32 s;
244 
245    // Update the inBackgroundNoise flag (valid for use in next frame if BFI)
246    // it now works as a energy detector floating on top
247    // not as good as a VAD.
248 
249    currEnergy = 0;
250    s = (Word32) 0;
251 
252    for (i = 0; i < L_FRAME; i++)
253    {
254        s = L_mac (s, speech[i], speech[i]);
255    }
256 
257    s = L_shl(s, 2);
258 
259    currEnergy = extract_h (s);
260 
261    frameEnergyMin = 32767;
262 
263    for (i = 0; i < L_ENERGYHIST; i++)
264    {
265       if (sub(st->frameEnergyHist[i], frameEnergyMin) < 0)
266          frameEnergyMin = st->frameEnergyHist[i];
267    }
268 
269    noiseFloor = shl (frameEnergyMin, 4); // Frame Energy Margin of 16
270 
271    maxEnergy = st->frameEnergyHist[0];
272    for (i = 1; i < L_ENERGYHIST-4; i++)
273    {
274       if ( sub (maxEnergy, st->frameEnergyHist[i]) < 0)
275       {
276          maxEnergy = st->frameEnergyHist[i];
277       }
278    }
279 
280    maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
281    for (i = 2*L_ENERGYHIST/3+1; i < L_ENERGYHIST; i++)
282    {
283       if ( sub (maxEnergyLastPart, st->frameEnergyHist[i] ) < 0)
284       {
285          maxEnergyLastPart = st->frameEnergyHist[i];
286       }
287    }
288 
289    inbgNoise = 0;        // false
290 
291    // Do not consider silence as noise
292    // Do not consider continuous high volume as noise
293    // Or if the current noise level is very low
294    // Mark as noise if under current noise limit
295    // OR if the maximum energy is below the upper limit
296 
297    if ( (sub(maxEnergy, LOWERNOISELIMIT) > 0) &&
298         (sub(currEnergy, FRAMEENERGYLIMIT) < 0) &&
299         (sub(currEnergy, LOWERNOISELIMIT) > 0) &&
300         ( (sub(currEnergy, noiseFloor) < 0) ||
301           (sub(maxEnergyLastPart, UPPERNOISELIMIT) < 0)))
302    {
303       if (sub(add(st->bgHangover, 1), 30) > 0)
304       {
305          st->bgHangover = 30;
306       } else
307       {
308          st->bgHangover = add(st->bgHangover, 1);
309       }
310    }
311    else
312    {
313       st->bgHangover = 0;
314    }
315 
316    // make final decision about frame state , act somewhat cautiosly
317    if (sub(st->bgHangover,1) > 0)
318       inbgNoise = 1;       // true
319 
320    for (i = 0; i < L_ENERGYHIST-1; i++)
321    {
322       st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
323    }
324    st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
325 
326    // prepare for voicing decision; tighten the threshold after some
327       time in noise
328    ltpLimit = 13926;             // 0.85  Q14
329    if (sub(st->bgHangover, 8) > 0)
330    {
331       ltpLimit = 15565;          // 0.95  Q14
332    }
333    if (sub(st->bgHangover, 15) > 0)
334    {
335       ltpLimit = 16383;          // 1.00  Q14
336    }
337 
338    // weak sort of voicing indication.
339    prevVoiced = 0;        // false
340 
341    if (sub(gmed_n(&ltpGainHist[4], 5), ltpLimit) > 0)
342    {
343       prevVoiced = 1;     // true
344    }
345    if (sub(st->bgHangover, 20) > 0) {
346       if (sub(gmed_n(ltpGainHist, 9), ltpLimit) > 0)
347       {
348          prevVoiced = 1;  // true
349       }
350       else
351       {
352          prevVoiced = 0;  // false
353       }
354    }
355 
356    if (prevVoiced)
357    {
358       *voicedHangover = 0;
359    }
360    else
361    {
362       temp = add(*voicedHangover, 1);
363       if (sub(temp, 10) > 0)
364       {
365          *voicedHangover = 10;
366       }
367       else
368       {
369          *voicedHangover = temp;
370       }
371    }
372 
373    return inbgNoise;
374 }
375 
376 ------------------------------------------------------------------------------
377  RESOURCES USED [optional]
378 
379  When the code is written for a specific target processor the
380  the resources used should be documented below.
381 
382  HEAP MEMORY USED: x bytes
383 
384  STACK MEMORY USED: x bytes
385 
386  CLOCK CYCLES: (cycle count equation for this function) + (variable
387                 used to represent cycle count for each subroutine
388                 called)
389      where: (cycle count variable) = cycle count for [subroutine
390                                      name]
391 
392 ------------------------------------------------------------------------------
393  CAUTION [optional]
394  [State any special notes, constraints or cautions for users of this function]
395 
396 ------------------------------------------------------------------------------
397 */
398 
Bgn_scd(Bgn_scdState * st,Word16 ltpGainHist[],Word16 speech[],Word16 * voicedHangover,Flag * pOverflow)399 Word16  Bgn_scd(Bgn_scdState *st,       /* i : State variables for bgn SCD  */
400                 Word16 ltpGainHist[],  /* i : LTP gain history             */
401                 Word16 speech[],       /* o : synthesis speech frame       */
402                 Word16 *voicedHangover,/* o : # of frames after last
403                                                voiced frame                 */
404                 Flag   *pOverflow
405                )
406 {
407     Word16  i;
408     Word16  prevVoiced, inbgNoise;
409     Word16  temp;
410     Word16  ltpLimit, frameEnergyMin;
411     Word16  currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
412     Word32  s, L_temp;
413 
414 
415     /* Update the inBackgroundNoise flag (valid for use in next frame if BFI)   */
416     /* it now works as a energy detector floating on top                        */
417     /* not as good as a VAD.                                                    */
418 
419     s = (Word32) 0;
420 
421     for (i = L_FRAME - 1; i >= 0; i--)
422     {
423         L_temp = ((Word32) speech[i]) * speech[i];
424         if (L_temp != (Word32) 0x40000000L)
425         {
426             L_temp = L_temp << 1;
427         }
428         else
429         {
430             L_temp = MAX_32;
431         }
432         s = L_add(s, L_temp, pOverflow);
433     }
434 
435     /* s is a sum of squares, so don't need to check for neg overflow */
436     if (s > (Word32)0x1fffffffL)
437     {
438         currEnergy = MAX_16;
439     }
440     else
441     {
442         currEnergy = (Word16)(s >> 14);
443     }
444 
445     frameEnergyMin = 32767;
446     for (i = L_ENERGYHIST - 1; i >= 0; i--)
447     {
448         if (st->frameEnergyHist[i] < frameEnergyMin)
449         {
450             frameEnergyMin = st->frameEnergyHist[i];
451         }
452     }
453 
454     /* Frame Energy Margin of 16 */
455     L_temp = (Word32)frameEnergyMin << 4;
456     if (L_temp != (Word32)((Word16) L_temp))
457     {
458         if (L_temp > 0)
459         {
460             noiseFloor = MAX_16;
461         }
462         else
463         {
464             noiseFloor = MIN_16;
465         }
466     }
467     else
468     {
469         noiseFloor = (Word16)(L_temp);
470     }
471 
472     maxEnergy = st->frameEnergyHist[0];
473     for (i = L_ENERGYHIST - 5; i >= 1; i--)
474     {
475         if (maxEnergy < st->frameEnergyHist[i])
476         {
477             maxEnergy = st->frameEnergyHist[i];
478         }
479     }
480 
481     maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
482     for (i = 2 * L_ENERGYHIST / 3 + 1; i < L_ENERGYHIST; i++)
483     {
484         if (maxEnergyLastPart < st->frameEnergyHist[i])
485         {
486             maxEnergyLastPart = st->frameEnergyHist[i];
487         }
488     }
489 
490     /* Do not consider silence as noise */
491     /* Do not consider continuous high volume as noise */
492     /* Or if the current noise level is very low */
493     /* Mark as noise if under current noise limit */
494     /* OR if the maximum energy is below the upper limit */
495 
496     if ((maxEnergy > LOWERNOISELIMIT) &&
497             (currEnergy < FRAMEENERGYLIMIT) &&
498             (currEnergy > LOWERNOISELIMIT) &&
499             ((currEnergy < noiseFloor) ||
500              (maxEnergyLastPart < UPPERNOISELIMIT)))
501     {
502         if ((st->bgHangover + 1) > 30)
503         {
504             st->bgHangover = 30;
505         }
506         else
507         {
508             st->bgHangover += 1;
509         }
510     }
511     else
512     {
513         st->bgHangover = 0;
514     }
515 
516     /* make final decision about frame state , act somewhat cautiosly */
517 
518     if (st->bgHangover > 1)
519     {
520         inbgNoise = TRUE;
521     }
522     else
523     {
524         inbgNoise = FALSE;
525     }
526 
527     for (i = 0; i < L_ENERGYHIST - 1; i++)
528     {
529         st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
530     }
531     st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
532 
533     /* prepare for voicing decision; tighten the threshold after some
534        time in noise */
535 
536     if (st->bgHangover > 15)
537     {
538         ltpLimit = 16383;       /* 1.00  Q14 */
539     }
540     else if (st->bgHangover > 8)
541     {
542         ltpLimit = 15565;       /* 0.95  Q14 */
543     }
544     else
545     {
546         ltpLimit = 13926;       /* 0.85  Q14 */
547     }
548 
549     /* weak sort of voicing indication. */
550     prevVoiced = FALSE;
551 
552     if (gmed_n(&ltpGainHist[4], 5) > ltpLimit)
553     {
554         prevVoiced = TRUE;
555     }
556 
557     if (st->bgHangover > 20)
558     {
559         if (gmed_n(ltpGainHist, 9) > ltpLimit)
560         {
561             prevVoiced = TRUE;
562         }
563         else
564         {
565             prevVoiced = FALSE;
566         }
567     }
568 
569 
570     if (prevVoiced)
571     {
572         *voicedHangover = 0;
573     }
574     else
575     {
576         temp = *voicedHangover + 1;
577 
578         if (temp > 10)
579         {
580             *voicedHangover = 10;
581         }
582         else
583         {
584             *voicedHangover = temp;
585         }
586     }
587 
588     return(inbgNoise);
589 }
590