1#!/usr/bin/env python3
2#
3#   Copyright 2017 - The Android Open Source Project
4#
5#   Licensed under the Apache License, Version 2.0 (the "License");
6#   you may not use this file except in compliance with the License.
7#   You may obtain a copy of the License at
8#
9#       http://www.apache.org/licenses/LICENSE-2.0
10#
11#   Unless required by applicable law or agreed to in writing, software
12#   distributed under the License is distributed on an "AS IS" BASIS,
13#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14#   See the License for the specific language governing permissions and
15#   limitations under the License.
16"""This module provides utilities to detect some artifacts and measure the
17    quality of audio."""
18
19import logging
20import math
21import numpy
22
23import acts_contrib.test_utils.audio_analysis_lib.audio_analysis as audio_analysis
24
25# The input signal should be one sine wave with fixed frequency which
26# can have silence before and/or after sine wave.
27# For example:
28#   silence      sine wave      silence
29#  -----------|VVVVVVVVVVVVV|-----------
30#     (a)           (b)           (c)
31# This module detects these artifacts:
32#   1. Detect noise in (a) and (c).
33#   2. Detect delay in (b).
34#   3. Detect burst in (b).
35# Assume the transitions between (a)(b) and (b)(c) are smooth and
36# amplitude increases/decreases linearly.
37# This module will detect artifacts in the sine wave.
38# This module also estimates the equivalent noise level by teager operator.
39# This module also detects volume changes in the sine wave. However, volume
40# changes may be affected by delay or burst.
41# Some artifacts may cause each other.
42
43# In this module, amplitude and frequency are derived from Hilbert transform.
44# Both amplitude and frequency are a function of time.
45
46# To detect each artifact, each point will be compared with
47# average amplitude of its block. The block size will be 1.5 ms.
48# Using average amplitude can mitigate the error caused by
49# Hilbert transform and noise.
50# In some case, for more accuracy, the block size may be modified
51# to other values.
52DEFAULT_BLOCK_SIZE_SECS = 0.0015
53
54# If the difference between average frequency of this block and
55# dominant frequency of full signal is less than 0.5 times of
56# dominant frequency, this block is considered to be within the
57# sine wave. In most cases, if there is no sine wave(only noise),
58# average frequency will be much greater than 5 times of
59# dominant frequency.
60# Also, for delay during playback, the frequency will be about 0
61# in perfect situation or much greater than 5 times of dominant
62# frequency if it's noised.
63DEFAULT_FREQUENCY_ERROR = 0.5
64
65# If the amplitude of some sample is less than 0.6 times of the
66# average amplitude of its left/right block, it will be considered
67# as a delay during playing.
68DEFAULT_DELAY_AMPLITUDE_THRESHOLD = 0.6
69
70# If the average amplitude of the block before or after playing
71# is more than 0.5 times to the average amplitude of the wave,
72# it will be considered as a noise artifact.
73DEFAULT_NOISE_AMPLITUDE_THRESHOLD = 0.5
74
75# In the sine wave, if the amplitude is more than 1.4 times of
76# its left side and its right side, it will be considered as
77# a burst.
78DEFAULT_BURST_AMPLITUDE_THRESHOLD = 1.4
79
80# When detecting burst, if the amplitude is lower than 0.5 times
81# average amplitude, we ignore it.
82DEFAULT_BURST_TOO_SMALL = 0.5
83
84# For a signal which is the combination of sine wave with fixed frequency f and
85# amplitude 1 and standard noise with amplitude k, the average teager value is
86# nearly linear to the noise level k.
87# Given frequency f, we simulate a sine wave with default noise level and
88# calculate its average teager value. Then, we can estimate the equivalent
89# noise level of input signal by the average teager value of input signal.
90DEFAULT_STANDARD_NOISE = 0.005
91
92# For delay, burst, volume increasing/decreasing, if two delay(
93# burst, volume increasing/decreasing) happen within
94# DEFAULT_SAME_EVENT_SECS seconds, we consider they are the
95# same event.
96DEFAULT_SAME_EVENT_SECS = 0.001
97
98# When detecting increasing/decreasing volume of signal, if the amplitude
99# is lower than 0.1 times average amplitude, we ignore it.
100DEFAULT_VOLUME_CHANGE_TOO_SMALL = 0.1
101
102# If average amplitude of right block is less/more than average
103# amplitude of left block times DEFAULT_VOLUME_CHANGE_AMPLITUDE, it will be
104# considered as decreasing/increasing on volume.
105DEFAULT_VOLUME_CHANGE_AMPLITUDE = 0.1
106
107# If the increasing/decreasing volume event is too close to the start or the end
108# of sine wave, we consider its volume change as part of rising/falling phase in
109# the start/end.
110NEAR_START_OR_END_SECS = 0.01
111
112# After applying Hilbert transform, the resulting amplitude and frequency may be
113# extremely large in the start and/or the end part. Thus, we will append zeros
114# before and after the whole wave for 0.1 secs.
115APPEND_ZEROS_SECS = 0.1
116
117# If the noise event is too close to the start or the end of the data, we
118# consider its noise as part of artifacts caused by edge effect of Hilbert
119# transform.
120# For example, originally, the data duration is 10 seconds.
121# We append 0.1 seconds of zeros in the beginning and the end of the data, so
122# the data becomes 10.2 seocnds long.
123# Then, we apply Hilbert transform to 10.2 seconds of data.
124# Near 0.1 seconds and 10.1 seconds, there will be edge effect of Hilbert
125# transform. We do not want these be treated as noise.
126# If NEAR_DATA_START_OR_END_SECS is set to 0.01, then the noise happened
127# at [0, 0.11] and [10.09, 10.1] will be ignored.
128NEAR_DATA_START_OR_END_SECS = 0.01
129
130# If the noise event is too close to the start or the end of the sine wave in
131# the data, we consider its noise as part of artifacts caused by edge effect of
132# Hilbert transform.
133# A |-------------|vvvvvvvvvvvvvvvvvvvvvvv|-------------|
134# B |ooooooooo| d |                       | d |ooooooooo|
135#
136# A is full signal. It contains a sine wave and silence before and after sine
137# wave.
138# In B, |oooo| shows the parts that we are going to check for noise before/after
139# sine wave. | d | is determined by NEAR_SINE_START_OR_END_SECS.
140NEAR_SINE_START_OR_END_SECS = 0.01
141
142
143class SineWaveNotFound(Exception):
144    """Error when there's no sine wave found in the signal"""
145
146
147def hilbert(x):
148    """Hilbert transform copied from scipy.
149
150    More information can be found here:
151    http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.hilbert.html
152
153    Args:
154        x: Real signal data to transform.
155
156    Returns:
157        Analytic signal of x, we can further extract amplitude and
158              frequency from it.
159
160    """
161    x = numpy.asarray(x)
162    if numpy.iscomplexobj(x):
163        raise ValueError("x must be real.")
164    axis = -1
165    N = x.shape[axis]
166    if N <= 0:
167        raise ValueError("N must be positive.")
168
169    Xf = numpy.fft.fft(x, N, axis=axis)
170    h = numpy.zeros(N)
171    if N % 2 == 0:
172        h[0] = h[N // 2] = 1
173        h[1:N // 2] = 2
174    else:
175        h[0] = 1
176        h[1:(N + 1) // 2] = 2
177
178    if len(x.shape) > 1:
179        ind = [newaxis] * x.ndim
180        ind[axis] = slice(None)
181        h = h[ind]
182    x = numpy.fft.ifft(Xf * h, axis=axis)
183    return x
184
185
186def noised_sine_wave(frequency, rate, noise_level):
187    """Generates a sine wave of 2 second with specified noise level.
188
189    Args:
190        frequency: Frequency of sine wave.
191        rate: Sampling rate in samples per second. Example inputs: 44100,
192        48000
193        noise_level: Required noise level.
194
195    Returns:
196        A sine wave with specified noise level.
197
198    """
199    wave = []
200    for index in range(0, rate * 2):
201        sample = 2.0 * math.pi * frequency * float(index) / float(rate)
202        sine_wave = math.sin(sample)
203        noise = noise_level * numpy.random.standard_normal()
204        wave.append(sine_wave + noise)
205    return wave
206
207
208def average_teager_value(wave, amplitude):
209    """Computes the normalized average teager value.
210
211    After averaging the teager value, we will normalize the value by
212    dividing square of amplitude.
213
214    Args:
215        wave: Wave to apply teager operator.
216        amplitude: Average amplitude of given wave.
217
218    Returns:
219        Average teager value.
220
221    """
222    teager_value, length = 0, len(wave)
223    for i in range(1, length - 1):
224        ith_teager_value = abs(wave[i] * wave[i] - wave[i - 1] * wave[i + 1])
225        ith_teager_value *= max(1, abs(wave[i]))
226        teager_value += ith_teager_value
227    teager_value = (float(teager_value) / length) / (amplitude**2)
228    return teager_value
229
230
231def noise_level(amplitude, frequency, rate, teager_value_of_input):
232    """Computes the noise level compared with standard_noise.
233
234    For a signal which is the combination of sine wave with fixed frequency f
235    and amplitude 1 and standard noise with amplitude k, the average teager
236    value is nearly linear to the noise level k.
237    Thus, we can compute the average teager value of a sine wave with
238    standard_noise. Then, we can estimate the noise level of given input.
239
240    Args:
241        amplitude: Amplitude of input audio.
242        frequency: Dominant frequency of input audio.
243        rate: Sampling rate in samples per second. Example inputs: 44100,
244        48000
245        teager_value_of_input: Average teager value of input audio.
246
247    Returns:
248        A float value denotes the audio is equivalent to have how many times of
249            noise compared with its amplitude.For example, 0.02 denotes that the
250            wave has a noise which has standard distribution with standard
251            deviation being 0.02 times the amplitude of the wave.
252
253    """
254    standard_noise = DEFAULT_STANDARD_NOISE
255
256    # Generates the standard sine wave with stdandard_noise level of noise.
257    standard_wave = noised_sine_wave(frequency, rate, standard_noise)
258
259    # Calculates the average teager value.
260    teager_value_of_std_wave = average_teager_value(standard_wave, amplitude)
261
262    return (teager_value_of_input / teager_value_of_std_wave) * standard_noise
263
264
265def error(f1, f2):
266    """Calculates the relative error between f1 and f2.
267
268    Args:
269        f1: Exact value.
270        f2: Test value.
271
272    Returns:
273        Relative error between f1 and f2.
274
275    """
276    return abs(float(f1) - float(f2)) / float(f1)
277
278
279def hilbert_analysis(signal, rate, block_size):
280    """Finds amplitude and frequency of each time of signal by Hilbert transform.
281
282    Args:
283        signal: The wave to analyze.
284        rate: Sampling rate in samples per second. Example inputs: 44100,
285        48000
286        block_size: The size of block to transform.
287
288    Returns:
289        A tuple of list: (amplitude, frequency) composed of amplitude and
290            frequency of each time.
291
292    """
293    # To apply Hilbert transform, the wave will be transformed
294    # segment by segment. For each segment, its size will be
295    # block_size and we will only take middle part of it.
296    # Thus, each segment looks like: |-----|=====|=====|-----|.
297    # "=...=" part will be taken while "-...-" part will be ignored.
298    #
299    # The whole size of taken part will be half of block_size
300    # which will be hilbert_block.
301    # The size of each ignored part will be half of hilbert_block
302    # which will be half_hilbert_block.
303    hilbert_block = block_size // 2
304    half_hilbert_block = hilbert_block // 2
305    # As mentioned above, for each block, we will only take middle
306    # part of it. Thus, the whole transformation will be completed as:
307    # |=====|=====|-----|           |-----|=====|=====|-----|
308    #       |-----|=====|=====|-----|           |-----|=====|=====|
309    #                   |-----|=====|=====|-----|
310    # Specially, beginning and ending part may not have ignored part.
311    length = len(signal)
312    result = []
313    for left_border in range(0, length, hilbert_block):
314        right_border = min(length, left_border + hilbert_block)
315        temp_left_border = max(0, left_border - half_hilbert_block)
316        temp_right_border = min(length, right_border + half_hilbert_block)
317        temp = hilbert(signal[temp_left_border:temp_right_border])
318        for index in range(left_border, right_border):
319            result.append(temp[index - temp_left_border])
320    result = numpy.asarray(result)
321    amplitude = numpy.abs(result)
322    phase = numpy.unwrap(numpy.angle(result))
323    frequency = numpy.diff(phase) / (2.0 * numpy.pi) * rate
324    #frequency.append(frequency[len(frequency)-1])
325    frequecny = numpy.append(frequency, frequency[len(frequency) - 1])
326    return (amplitude, frequency)
327
328
329def find_block_average_value(arr, side_block_size, block_size):
330    """For each index, finds average value of its block, left block, right block.
331
332    It will find average value for each index in the range.
333
334    For each index, the range of its block is
335        [max(0, index - block_size / 2), min(length - 1, index + block_size / 2)]
336    For each index, the range of its left block is
337        [max(0, index - size_block_size), index]
338    For each index, the range of its right block is
339        [index, min(length - 1, index + side_block_size)]
340
341    Args:
342        arr: The array to be computed.
343        side_block_size: the size of the left_block and right_block.
344        block_size: the size of the block.
345
346    Returns:
347        A tuple of lists: (left_block_average_array,
348                                 right_block_average_array,
349                                 block_average_array)
350    """
351    length = len(arr)
352    left_border, right_border = 0, 1
353    left_block_sum = arr[0]
354    right_block_sum = arr[0]
355    left_average_array = numpy.zeros(length)
356    right_average_array = numpy.zeros(length)
357    block_average_array = numpy.zeros(length)
358    for index in range(0, length):
359        while left_border < index - side_block_size:
360            left_block_sum -= arr[left_border]
361            left_border += 1
362        while right_border < min(length, index + side_block_size):
363            right_block_sum += arr[right_border]
364            right_border += 1
365
366        left_average_value = float(left_block_sum) / (index - left_border + 1)
367        right_average_value = float(right_block_sum) / (right_border - index)
368        left_average_array[index] = left_average_value
369        right_average_array[index] = right_average_value
370
371        if index + 1 < length:
372            left_block_sum += arr[index + 1]
373        right_block_sum -= arr[index]
374    left_border, right_border = 0, 1
375    block_sum = 0
376    for index in range(0, length):
377        while left_border < index - block_size / 2:
378            block_sum -= arr[left_border]
379            left_border += 1
380        while right_border < min(length, index + block_size / 2):
381            block_sum += arr[right_border]
382            right_border += 1
383
384        average_value = float(block_sum) / (right_border - left_border)
385        block_average_array[index] = average_value
386    return (left_average_array, right_average_array, block_average_array)
387
388
389def find_start_end_index(dominant_frequency, block_frequency_delta, block_size,
390                         frequency_error_threshold):
391    """Finds start and end index of sine wave.
392
393    For each block with size of block_size, we check that whether its frequency
394    is close enough to the dominant_frequency. If yes, we will consider this
395    block to be within the sine wave.
396    Then, it will return the start and end index of sine wave indicating that
397    sine wave is between [start_index, end_index)
398    It's okay if the whole signal only contains sine wave.
399
400    Args:
401        dominant_frequency: Dominant frequency of signal.
402        block_frequency_delta: Average absolute difference between dominant
403                                  frequency and frequency of each block. For
404                                  each index, its block is
405                                  [max(0, index - block_size / 2),
406                                   min(length - 1, index + block_size / 2)]
407        block_size: Block size in samples.
408
409    Returns:
410        A tuple composed of (start_index, end_index)
411
412    """
413    length = len(block_frequency_delta)
414
415    # Finds the start/end time index of playing based on dominant frequency
416    start_index, end_index = length - 1, 0
417    for index in range(0, length):
418        left_border = max(0, index - block_size / 2)
419        right_border = min(length - 1, index + block_size / 2)
420        frequency_error = block_frequency_delta[index] / dominant_frequency
421        if frequency_error < frequency_error_threshold:
422            start_index = min(start_index, left_border)
423            end_index = max(end_index, right_border + 1)
424    return (start_index, end_index)
425
426
427def noise_detection(start_index, end_index, block_amplitude, average_amplitude,
428                    rate, noise_amplitude_threshold):
429    """Detects noise before/after sine wave.
430
431    If average amplitude of some sample's block before start of wave or after
432    end of wave is more than average_amplitude times noise_amplitude_threshold,
433    it will be considered as a noise.
434
435    Args:
436        start_index: Start index of sine wave.
437        end_index: End index of sine wave.
438        block_amplitude: An array for average amplitude of each block, where
439                            amplitude is computed from Hilbert transform.
440        average_amplitude: Average amplitude of sine wave.
441        rate: Sampling rate in samples per second. Example inputs: 44100,
442        48000
443        noise_amplitude_threshold: If the average amplitude of a block is
444                        higher than average amplitude of the wave times
445                        noise_amplitude_threshold, it will be considered as
446                        noise before/after playback.
447
448    Returns:
449        A tuple of lists indicating the time that noise happens:
450            (noise_before_playing, noise_after_playing).
451
452    """
453    length = len(block_amplitude)
454    amplitude_threshold = average_amplitude * noise_amplitude_threshold
455    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
456
457    # Detects noise before playing.
458    noise_time_point = []
459    last_noise_end_time_point = []
460    previous_noise_index = None
461    times = 0
462    for index in range(0, length):
463        # Ignore noise too close to the beginning or the end of sine wave.
464        # Check the docstring of NEAR_SINE_START_OR_END_SECS.
465        if ((start_index - rate * NEAR_SINE_START_OR_END_SECS) <= index
466                and (index < end_index + rate * NEAR_SINE_START_OR_END_SECS)):
467            continue
468
469        # Ignore noise too close to the beginning or the end of original data.
470        # Check the docstring of NEAR_DATA_START_OR_END_SECS.
471        if (float(index) / rate <=
472                NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
473            continue
474        if (float(length - index) / rate <=
475                NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
476            continue
477        if block_amplitude[index] > amplitude_threshold:
478            same_event = False
479            if previous_noise_index:
480                same_event = (index -
481                              previous_noise_index) < same_event_samples
482            if not same_event:
483                index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
484                index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
485                noise_time_point.append(index_start_sec)
486                last_noise_end_time_point.append(index_end_sec)
487                times += 1
488            index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
489            last_noise_end_time_point[times - 1] = index_end_sec
490            previous_noise_index = index
491
492    noise_before_playing, noise_after_playing = [], []
493    for i in range(times):
494        duration = last_noise_end_time_point[i] - noise_time_point[i]
495        if noise_time_point[i] < float(start_index) / rate - APPEND_ZEROS_SECS:
496            noise_before_playing.append((noise_time_point[i], duration))
497        else:
498            noise_after_playing.append((noise_time_point[i], duration))
499
500    return (noise_before_playing, noise_after_playing)
501
502
503def delay_detection(start_index, end_index, block_amplitude, average_amplitude,
504                    dominant_frequency, rate, left_block_amplitude,
505                    right_block_amplitude, block_frequency_delta,
506                    delay_amplitude_threshold, frequency_error_threshold):
507    """Detects delay during playing.
508
509    For each sample, we will check whether the average amplitude of its block
510    is less than average amplitude of its left block and its right block times
511    delay_amplitude_threshold. Also, we will check whether the frequency of
512    its block is far from the dominant frequency.
513    If at least one constraint fulfilled, it will be considered as a delay.
514
515    Args:
516        start_index: Start index of sine wave.
517        end_index: End index of sine wave.
518        block_amplitude: An array for average amplitude of each block, where
519                            amplitude is computed from Hilbert transform.
520        average_amplitude: Average amplitude of sine wave.
521        dominant_frequency: Dominant frequency of signal.
522        rate: Sampling rate in samples per second. Example inputs: 44100,
523        48000
524        left_block_amplitude: Average amplitude of left block of each index.
525                                Ref to find_block_average_value function.
526        right_block_amplitude: Average amplitude of right block of each index.
527                                Ref to find_block_average_value function.
528        block_frequency_delta: Average absolute difference frequency to
529                                dominant frequency of block of each index.
530                                Ref to find_block_average_value function.
531        delay_amplitude_threshold: If the average amplitude of a block is
532                        lower than average amplitude of the wave times
533                        delay_amplitude_threshold, it will be considered
534                        as delay.
535        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
536
537    Returns:
538        List of delay occurrence:
539                [(time_1, duration_1), (time_2, duration_2), ...],
540              where time and duration are in seconds.
541
542    """
543    delay_time_points = []
544    last_delay_end_time_points = []
545    previous_delay_index = None
546    times = 0
547    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
548    start_time = float(start_index) / rate - APPEND_ZEROS_SECS
549    end_time = float(end_index) / rate - APPEND_ZEROS_SECS
550    for index in range(int(start_index), int(end_index)):
551        if block_amplitude[
552                index] > average_amplitude * delay_amplitude_threshold:
553            continue
554        now_time = float(index) / rate - APPEND_ZEROS_SECS
555        if abs(now_time - start_time) < NEAR_START_OR_END_SECS:
556            continue
557        if abs(now_time - end_time) < NEAR_START_OR_END_SECS:
558            continue
559        # If amplitude less than its left/right side and small enough,
560        # it will be considered as a delay.
561        amp_threshold = average_amplitude * delay_amplitude_threshold
562        left_threshold = delay_amplitude_threshold * left_block_amplitude[index]
563        amp_threshold = min(amp_threshold, left_threshold)
564        right_threshold = delay_amplitude_threshold * right_block_amplitude[
565            index]
566        amp_threshold = min(amp_threshold, right_threshold)
567
568        frequency_error = block_frequency_delta[index] / dominant_frequency
569
570        amplitude_too_small = block_amplitude[index] < amp_threshold
571        frequency_not_match = frequency_error > frequency_error_threshold
572
573        if amplitude_too_small or frequency_not_match:
574            same_event = False
575            if previous_delay_index:
576                same_event = (index -
577                              previous_delay_index) < same_event_samples
578            if not same_event:
579                index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
580                index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
581                delay_time_points.append(index_start_sec)
582                last_delay_end_time_points.append(index_end_sec)
583                times += 1
584            previous_delay_index = index
585            index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
586            last_delay_end_time_points[times - 1] = index_end_sec
587
588    delay_list = []
589    for i in range(len(delay_time_points)):
590        duration = last_delay_end_time_points[i] - delay_time_points[i]
591        delay_list.append((delay_time_points[i], duration))
592    return delay_list
593
594
595def burst_detection(start_index, end_index, block_amplitude, average_amplitude,
596                    dominant_frequency, rate, left_block_amplitude,
597                    right_block_amplitude, block_frequency_delta,
598                    burst_amplitude_threshold, frequency_error_threshold):
599    """Detects burst during playing.
600
601    For each sample, we will check whether the average amplitude of its block is
602    more than average amplitude of its left block and its right block times
603    burst_amplitude_threshold. Also, we will check whether the frequency of
604    its block is not compatible to the dominant frequency.
605    If at least one constraint fulfilled, it will be considered as a burst.
606
607    Args:
608        start_index: Start index of sine wave.
609        end_index: End index of sine wave.
610        block_amplitude: An array for average amplitude of each block, where
611                            amplitude is computed from Hilbert transform.
612        average_amplitude: Average amplitude of sine wave.
613        dominant_frequency: Dominant frequency of signal.
614        rate: Sampling rate in samples per second. Example inputs: 44100,
615        48000
616        left_block_amplitude: Average amplitude of left block of each index.
617                                Ref to find_block_average_value function.
618        right_block_amplitude: Average amplitude of right block of each index.
619                                Ref to find_block_average_value function.
620        block_frequency_delta: Average absolute difference frequency to
621                                dominant frequency of block of each index.
622        burst_amplitude_threshold: If the amplitude is higher than average
623                            amplitude of its left block and its right block
624                            times burst_amplitude_threshold. It will be
625                            considered as a burst.
626        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
627
628    Returns:
629        List of burst occurence: [time_1, time_2, ...],
630              where time is in seconds.
631
632    """
633    burst_time_points = []
634    previous_burst_index = None
635    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
636    for index in range(int(start_index), int(end_index)):
637        # If amplitude higher than its left/right side and large enough,
638        # it will be considered as a burst.
639        if block_amplitude[
640                index] <= average_amplitude * DEFAULT_BURST_TOO_SMALL:
641            continue
642        if abs(index - start_index) < rate * NEAR_START_OR_END_SECS:
643            continue
644        if abs(index - end_index) < rate * NEAR_START_OR_END_SECS:
645            continue
646        amp_threshold = average_amplitude * DEFAULT_BURST_TOO_SMALL
647        left_threshold = burst_amplitude_threshold * left_block_amplitude[index]
648        amp_threshold = max(amp_threshold, left_threshold)
649        right_threshold = burst_amplitude_threshold * right_block_amplitude[
650            index]
651        amp_threshold = max(amp_threshold, right_threshold)
652
653        frequency_error = block_frequency_delta[index] / dominant_frequency
654
655        amplitude_too_large = block_amplitude[index] > amp_threshold
656        frequency_not_match = frequency_error > frequency_error_threshold
657
658        if amplitude_too_large or frequency_not_match:
659            same_event = False
660            if previous_burst_index:
661                same_event = index - previous_burst_index < same_event_samples
662            if not same_event:
663                burst_time_points.append(
664                    float(index) / rate - APPEND_ZEROS_SECS)
665            previous_burst_index = index
666
667    return burst_time_points
668
669
670def changing_volume_detection(start_index, end_index, average_amplitude, rate,
671                              left_block_amplitude, right_block_amplitude,
672                              volume_changing_amplitude_threshold):
673    """Finds volume changing during playback.
674
675    For each index, we will compare average amplitude of its left block and its
676    right block. If average amplitude of right block is more than average
677    amplitude of left block times (1 + DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
678    be considered as an increasing volume. If the one of right block is less
679    than that of left block times (1 - DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
680    be considered as a decreasing volume.
681
682    Args:
683        start_index: Start index of sine wave.
684        end_index: End index of sine wave.
685        average_amplitude: Average amplitude of sine wave.
686        rate: Sampling rate in samples per second. Example inputs: 44100,
687        48000
688        left_block_amplitude: Average amplitude of left block of each index.
689                                Ref to find_block_average_value function.
690        right_block_amplitude: Average amplitude of right block of each index.
691                                Ref to find_block_average_value function.
692        volume_changing_amplitude_threshold: If the average amplitude of right
693                                                block is higher or lower than
694                                                that of left one times this
695                                                value, it will be considered as
696                                                a volume change.
697                                                Also refer to
698                                                DEFAULT_VOLUME_CHANGE_AMPLITUDE
699
700    Returns:
701        List of volume changing composed of 1 for increasing and -1 for
702            decreasing.
703
704    """
705    length = len(left_block_amplitude)
706
707    # Detects rising and/or falling volume.
708    previous_rising_index, previous_falling_index = None, None
709    changing_time = []
710    changing_events = []
711    amplitude_threshold = average_amplitude * DEFAULT_VOLUME_CHANGE_TOO_SMALL
712    same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
713    for index in range(int(start_index), int(end_index)):
714        # Skips if amplitude is too small.
715        if left_block_amplitude[index] < amplitude_threshold:
716            continue
717        if right_block_amplitude[index] < amplitude_threshold:
718            continue
719        # Skips if changing is from start or end time
720        if float(abs(start_index - index)) / rate < NEAR_START_OR_END_SECS:
721            continue
722        if float(abs(end_index - index)) / rate < NEAR_START_OR_END_SECS:
723            continue
724
725        delta_margin = volume_changing_amplitude_threshold
726        if left_block_amplitude[index] > 0:
727            delta_margin *= left_block_amplitude[index]
728
729        increasing_threshold = left_block_amplitude[index] + delta_margin
730        decreasing_threshold = left_block_amplitude[index] - delta_margin
731
732        if right_block_amplitude[index] > increasing_threshold:
733            same_event = False
734            if previous_rising_index:
735                same_event = index - previous_rising_index < same_event_samples
736            if not same_event:
737                changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
738                changing_events.append(+1)
739            previous_rising_index = index
740        if right_block_amplitude[index] < decreasing_threshold:
741            same_event = False
742            if previous_falling_index:
743                same_event = index - previous_falling_index < same_event_samples
744            if not same_event:
745                changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
746                changing_events.append(-1)
747            previous_falling_index = index
748
749    # Combines consecutive increasing/decreasing event.
750    combined_changing_events, prev = [], 0
751    for i in range(len(changing_events)):
752        if changing_events[i] == prev:
753            continue
754        combined_changing_events.append((changing_time[i], changing_events[i]))
755        prev = changing_events[i]
756    return combined_changing_events
757
758
759def quality_measurement(
760        signal,
761        rate,
762        dominant_frequency=None,
763        block_size_secs=DEFAULT_BLOCK_SIZE_SECS,
764        frequency_error_threshold=DEFAULT_FREQUENCY_ERROR,
765        delay_amplitude_threshold=DEFAULT_DELAY_AMPLITUDE_THRESHOLD,
766        noise_amplitude_threshold=DEFAULT_NOISE_AMPLITUDE_THRESHOLD,
767        burst_amplitude_threshold=DEFAULT_BURST_AMPLITUDE_THRESHOLD,
768        volume_changing_amplitude_threshold=DEFAULT_VOLUME_CHANGE_AMPLITUDE):
769    """Detects several artifacts and estimates the noise level.
770
771    This method detects artifact before playing, after playing, and delay
772    during playing. Also, it estimates the noise level of the signal.
773    To avoid the influence of noise, it calculates amplitude and frequency
774    block by block.
775
776    Args:
777        signal: A list of numbers for one-channel PCM data. The data should
778                   be normalized to [-1,1].
779        rate: Sampling rate in samples per second. Example inputs: 44100,
780        48000
781        dominant_frequency: Dominant frequency of signal. Set None to
782                               recalculate the frequency in this function.
783        block_size_secs: Block size in seconds. The measurement will be done
784                            block-by-block using average amplitude and frequency
785                            in each block to avoid noise.
786        frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR.
787        delay_amplitude_threshold: If the average amplitude of a block is
788                                      lower than average amplitude of the wave
789                                      times delay_amplitude_threshold, it will
790                                      be considered as delay.
791                                      Also refer to delay_detection and
792                                      DEFAULT_DELAY_AMPLITUDE_THRESHOLD.
793        noise_amplitude_threshold: If the average amplitude of a block is
794                                      higher than average amplitude of the wave
795                                      times noise_amplitude_threshold, it will
796                                      be considered as noise before/after
797                                      playback.
798                                      Also refer to noise_detection and
799                                      DEFAULT_NOISE_AMPLITUDE_THRESHOLD.
800        burst_amplitude_threshold: If the average amplitude of a block is
801                                      higher than average amplitude of its left
802                                      block and its right block times
803                                      burst_amplitude_threshold. It will be
804                                      considered as a burst.
805                                      Also refer to burst_detection and
806                                      DEFAULT_BURST_AMPLITUDE_THRESHOLD.
807        volume_changing_amplitude_threshold: If the average amplitude of right
808                                                block is higher or lower than
809                                                that of left one times this
810                                                value, it will be considered as
811                                                a volume change.
812                                                Also refer to
813                                                changing_volume_detection and
814                                                DEFAULT_VOLUME_CHANGE_AMPLITUDE
815
816    Returns:
817        A dictoinary of detection/estimation:
818              {'artifacts':
819                {'noise_before_playback':
820                    [(time_1, duration_1), (time_2, duration_2), ...],
821                 'noise_after_playback':
822                    [(time_1, duration_1), (time_2, duration_2), ...],
823                 'delay_during_playback':
824                    [(time_1, duration_1), (time_2, duration_2), ...],
825                 'burst_during_playback':
826                    [time_1, time_2, ...]
827                },
828               'volume_changes':
829                 [(time_1, flag_1), (time_2, flag_2), ...],
830               'equivalent_noise_level': level
831              }
832              where durations and time points are in seconds. And,
833              equivalence_noise_level is the quotient of noise and wave which
834              refers to DEFAULT_STANDARD_NOISE. volume_changes is a list of
835              tuples containing time stamps and decreasing/increasing flags for
836              volume change events.
837
838    """
839    # Calculates the block size, from seconds to samples.
840    block_size = int(block_size_secs * rate)
841
842    signal = numpy.concatenate(
843        (numpy.zeros(int(rate * APPEND_ZEROS_SECS)), signal,
844         numpy.zeros(int(rate * APPEND_ZEROS_SECS))))
845    signal = numpy.array(signal, dtype=float)
846    length = len(signal)
847
848    # Calculates the amplitude and frequency.
849    amplitude, frequency = hilbert_analysis(signal, rate, block_size)
850
851    # Finds the dominant frequency.
852    if not dominant_frequency:
853        dominant_frequency = audio_analysis.spectral_analysis(signal,
854                                                              rate)[0][0]
855
856    # Finds the array which contains absolute difference between dominant
857    # frequency and frequency at each time point.
858    frequency_delta = abs(frequency - dominant_frequency)
859
860    # Computes average amplitude of each type of block
861    res = find_block_average_value(amplitude, block_size * 2, block_size)
862    left_block_amplitude, right_block_amplitude, block_amplitude = res
863
864    # Computes average absolute difference of frequency and dominant frequency
865    # of the block of each index
866    _, _, block_frequency_delta = find_block_average_value(
867        frequency_delta, block_size * 2, block_size)
868
869    # Finds start and end index of sine wave.
870    start_index, end_index = find_start_end_index(dominant_frequency,
871                                                  block_frequency_delta,
872                                                  block_size,
873                                                  frequency_error_threshold)
874
875    if start_index > end_index:
876        raise SineWaveNotFound('No sine wave found in signal')
877
878    logging.debug('Found sine wave: start: %s, end: %s',
879                  float(start_index) / rate - APPEND_ZEROS_SECS,
880                  float(end_index) / rate - APPEND_ZEROS_SECS)
881
882    sum_of_amplitude = float(sum(amplitude[int(start_index):int(end_index)]))
883    # Finds average amplitude of sine wave.
884    average_amplitude = sum_of_amplitude / (end_index - start_index)
885
886    # Finds noise before and/or after playback.
887    noise_before_playing, noise_after_playing = noise_detection(
888        start_index, end_index, block_amplitude, average_amplitude, rate,
889        noise_amplitude_threshold)
890
891    # Finds delay during playback.
892    delays = delay_detection(start_index, end_index, block_amplitude,
893                             average_amplitude, dominant_frequency, rate,
894                             left_block_amplitude, right_block_amplitude,
895                             block_frequency_delta, delay_amplitude_threshold,
896                             frequency_error_threshold)
897
898    # Finds burst during playback.
899    burst_time_points = burst_detection(
900        start_index, end_index, block_amplitude, average_amplitude,
901        dominant_frequency, rate, left_block_amplitude, right_block_amplitude,
902        block_frequency_delta, burst_amplitude_threshold,
903        frequency_error_threshold)
904
905    # Finds volume changing during playback.
906    volume_changes = changing_volume_detection(
907        start_index, end_index, average_amplitude, rate, left_block_amplitude,
908        right_block_amplitude, volume_changing_amplitude_threshold)
909
910    # Calculates the average teager value.
911    teager_value = average_teager_value(
912        signal[int(start_index):int(end_index)], average_amplitude)
913
914    # Finds out the noise level.
915    noise = noise_level(average_amplitude, dominant_frequency, rate,
916                        teager_value)
917
918    return {
919        'artifacts': {
920            'noise_before_playback': noise_before_playing,
921            'noise_after_playback': noise_after_playing,
922            'delay_during_playback': delays,
923            'burst_during_playback': burst_time_points
924        },
925        'volume_changes': volume_changes,
926        'equivalent_noise_level': noise
927    }
928