1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <stdint.h>
19 #include <string.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <sys/epoll.h>
23 #include <sys/types.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <time.h>
28 #include <arpa/inet.h>
29 #include <netinet/in.h>
30 
31 // #define LOG_NDEBUG 0
32 #define LOG_TAG "AudioGroup"
33 #include <cutils/atomic.h>
34 #include <cutils/properties.h>
35 #include <utils/Log.h>
36 #include <utils/Errors.h>
37 #include <utils/RefBase.h>
38 #include <utils/threads.h>
39 #include <utils/SystemClock.h>
40 #include <media/AudioRecord.h>
41 #include <media/AudioTrack.h>
42 #include <media/AudioEffect.h>
43 #include <system/audio_effects/effect_aec.h>
44 #include <system/audio.h>
45 
46 #include <nativehelper/ScopedUtfChars.h>
47 #include <android/content/AttributionSourceState.h>
48 #include <android_os_Parcel.h>
49 
50 #include "jni.h"
51 #include <nativehelper/JNIHelp.h>
52 
53 #include "AudioCodec.h"
54 #include "EchoSuppressor.h"
55 
56 extern int parse(JNIEnv *env, jstring jAddress, int port, sockaddr_storage *ss);
57 
58 namespace {
59 
60 using namespace android;
61 
62 using android::content::AttributionSourceState;
63 
64 int gRandom = -1;
65 
66 // We use a circular array to implement jitter buffer. The simplest way is doing
67 // a modulo operation on the index while accessing the array. However modulo can
68 // be expensive on some platforms, such as ARM. Thus we round up the size of the
69 // array to the nearest power of 2 and then use bitwise-and instead of modulo.
70 // Currently we make it 2048ms long and assume packet interval is 50ms or less.
71 // The first 100ms is the place where samples get mixed. The rest is the real
72 // jitter buffer. For a stream at 8000Hz it takes 32 kilobytes. These numbers
73 // are chosen by experiments and each of them can be adjusted as needed.
74 
75 // Originally a stream does not send packets when it is receive-only or there is
76 // nothing to mix. However, this causes some problems with certain firewalls and
77 // proxies. A firewall might remove a port mapping when there is no outgoing
78 // packet for a preiod of time, and a proxy might wait for incoming packets from
79 // both sides before start forwarding. To solve these problems, we send out a
80 // silence packet on the stream for every second. It should be good enough to
81 // keep the stream alive with relatively low resources.
82 
83 // Other notes:
84 // + We use elapsedRealtime() to get the time. Since we use 32bit variables
85 //   instead of 64bit ones, comparison must be done by subtraction.
86 // + Sampling rate must be multiple of 1000Hz, and packet length must be in
87 //   milliseconds. No floating points.
88 // + If we cannot get enough CPU, we drop samples and simulate packet loss.
89 // + Resampling is not done yet, so streams in one group must use the same rate.
90 //   For the first release only 8000Hz is supported.
91 
92 #define BUFFER_SIZE     2048
93 #define HISTORY_SIZE    100
94 #define MEASURE_BASE    100
95 #define MEASURE_PERIOD  5000
96 #define DTMF_PERIOD     200
97 
98 class AudioStream
99 {
100 public:
101     AudioStream();
102     ~AudioStream();
103     bool set(int mode, int socket, sockaddr_storage *remote,
104         AudioCodec *codec, int sampleRate, int sampleCount,
105         int codecType, int dtmfType);
106 
107     void sendDtmf(int event);
108     bool mix(int32_t *output, int head, int tail, int sampleRate);
109     void encode(int tick, AudioStream *chain);
110     void decode(int tick);
111 
112 private:
113     enum {
114         NORMAL = 0,
115         SEND_ONLY = 1,
116         RECEIVE_ONLY = 2,
117         LAST_MODE = 2,
118     };
119 
120     int mMode;
121     int mSocket;
122     sockaddr_storage mRemote;
123     AudioCodec *mCodec;
124     uint32_t mCodecMagic;
125     uint32_t mDtmfMagic;
126     bool mFixRemote;
127 
128     int mTick;
129     int mSampleRate;
130     int mSampleCount;
131     int mInterval;
132     int mKeepAlive;
133 
134     int16_t *mBuffer;
135     int mBufferMask;
136     int mBufferHead;
137     int mBufferTail;
138     int mLatencyTimer;
139     int mLatencyScore;
140 
141     uint16_t mSequence;
142     uint32_t mTimestamp;
143     uint32_t mSsrc;
144 
145     int mDtmfEvent;
146     int mDtmfStart;
147 
148     AudioStream *mNext;
149 
150     friend class AudioGroup;
151 };
152 
AudioStream()153 AudioStream::AudioStream()
154 {
155     mSocket = -1;
156     mCodec = NULL;
157     mBuffer = NULL;
158     mNext = NULL;
159 }
160 
~AudioStream()161 AudioStream::~AudioStream()
162 {
163     close(mSocket);
164     delete mCodec;
165     delete [] mBuffer;
166     ALOGD("stream[%d] is dead", mSocket);
167 }
168 
set(int mode,int socket,sockaddr_storage * remote,AudioCodec * codec,int sampleRate,int sampleCount,int codecType,int dtmfType)169 bool AudioStream::set(int mode, int socket, sockaddr_storage *remote,
170     AudioCodec *codec, int sampleRate, int sampleCount,
171     int codecType, int dtmfType)
172 {
173     if (mode < 0 || mode > LAST_MODE) {
174         return false;
175     }
176     mMode = mode;
177 
178     mCodecMagic = (0x8000 | codecType) << 16;
179     mDtmfMagic = (dtmfType == -1) ? 0 : (0x8000 | dtmfType) << 16;
180 
181     mTick = elapsedRealtime();
182     mSampleRate = sampleRate / 1000;
183     mSampleCount = sampleCount;
184     mInterval = mSampleCount / mSampleRate;
185 
186     // Allocate jitter buffer.
187     for (mBufferMask = 8; mBufferMask < mSampleRate; mBufferMask <<= 1);
188     mBufferMask *= BUFFER_SIZE;
189     mBuffer = new int16_t[mBufferMask];
190     --mBufferMask;
191     mBufferHead = 0;
192     mBufferTail = 0;
193     mLatencyTimer = 0;
194     mLatencyScore = 0;
195 
196     // Initialize random bits.
197     read(gRandom, &mSequence, sizeof(mSequence));
198     read(gRandom, &mTimestamp, sizeof(mTimestamp));
199     read(gRandom, &mSsrc, sizeof(mSsrc));
200 
201     mDtmfEvent = -1;
202     mDtmfStart = 0;
203 
204     // Only take over these things when succeeded.
205     mSocket = socket;
206     if (codec) {
207         mRemote = *remote;
208         mCodec = codec;
209 
210         // Here we should never get an private address, but some buggy proxy
211         // servers do give us one. To solve this, we replace the address when
212         // the first time we successfully decode an incoming packet.
213         mFixRemote = false;
214         if (remote->ss_family == AF_INET) {
215             unsigned char *address =
216                 (unsigned char *)&((sockaddr_in *)remote)->sin_addr;
217             if (address[0] == 10 ||
218                 (address[0] == 172 && (address[1] >> 4) == 1) ||
219                 (address[0] == 192 && address[1] == 168)) {
220                 mFixRemote = true;
221             }
222         }
223     }
224 
225     ALOGD("stream[%d] is configured as %s %dkHz %dms mode %d", mSocket,
226         (codec ? codec->name : "RAW"), mSampleRate, mInterval, mMode);
227     return true;
228 }
229 
sendDtmf(int event)230 void AudioStream::sendDtmf(int event)
231 {
232     if (mDtmfMagic != 0) {
233         mDtmfEvent = event << 24;
234         mDtmfStart = mTimestamp + mSampleCount;
235     }
236 }
237 
mix(int32_t * output,int head,int tail,int sampleRate)238 bool AudioStream::mix(int32_t *output, int head, int tail, int sampleRate)
239 {
240     if (mMode == SEND_ONLY) {
241         return false;
242     }
243 
244     if (head - mBufferHead < 0) {
245         head = mBufferHead;
246     }
247     if (tail - mBufferTail > 0) {
248         tail = mBufferTail;
249     }
250     if (tail - head <= 0) {
251         return false;
252     }
253 
254     head *= mSampleRate;
255     tail *= mSampleRate;
256 
257     if (sampleRate == mSampleRate) {
258         for (int i = head; i - tail < 0; ++i) {
259             output[i - head] += mBuffer[i & mBufferMask];
260         }
261     } else {
262         // TODO: implement resampling.
263         return false;
264     }
265     return true;
266 }
267 
encode(int tick,AudioStream * chain)268 void AudioStream::encode(int tick, AudioStream *chain)
269 {
270     if (tick - mTick >= mInterval) {
271         // We just missed the train. Pretend that packets in between are lost.
272         int skipped = (tick - mTick) / mInterval;
273         mTick += skipped * mInterval;
274         mSequence += skipped;
275         mTimestamp += skipped * mSampleCount;
276         ALOGV("stream[%d] skips %d packets", mSocket, skipped);
277     }
278 
279     tick = mTick;
280     mTick += mInterval;
281     ++mSequence;
282     mTimestamp += mSampleCount;
283 
284     // If there is an ongoing DTMF event, send it now.
285     if (mMode != RECEIVE_ONLY && mDtmfEvent != -1) {
286         int duration = mTimestamp - mDtmfStart;
287         // Make sure duration is reasonable.
288         if (duration >= 0 && duration < mSampleRate * DTMF_PERIOD) {
289             duration += mSampleCount;
290             int32_t buffer[4] = {
291                 static_cast<int32_t>(htonl(mDtmfMagic | mSequence)),
292                 static_cast<int32_t>(htonl(mDtmfStart)),
293                 static_cast<int32_t>(mSsrc),
294                 static_cast<int32_t>(htonl(mDtmfEvent | duration)),
295             };
296             if (duration >= mSampleRate * DTMF_PERIOD) {
297                 buffer[3] |= htonl(1 << 23);
298                 mDtmfEvent = -1;
299             }
300             sendto(mSocket, buffer, sizeof(buffer), MSG_DONTWAIT,
301                 (sockaddr *)&mRemote, sizeof(mRemote));
302             return;
303         }
304         mDtmfEvent = -1;
305     }
306 
307     int32_t buffer[mSampleCount + 3];
308     bool data = false;
309     if (mMode != RECEIVE_ONLY) {
310         // Mix all other streams.
311         memset(buffer, 0, sizeof(buffer));
312         while (chain) {
313             if (chain != this) {
314                 data |= chain->mix(buffer, tick - mInterval, tick, mSampleRate);
315             }
316             chain = chain->mNext;
317         }
318     }
319 
320     int16_t samples[mSampleCount];
321     if (data) {
322         // Saturate into 16 bits.
323         for (int i = 0; i < mSampleCount; ++i) {
324             int32_t sample = buffer[i];
325             if (sample < -32768) {
326                 sample = -32768;
327             }
328             if (sample > 32767) {
329                 sample = 32767;
330             }
331             samples[i] = sample;
332         }
333     } else {
334         if ((mTick ^ mKeepAlive) >> 10 == 0) {
335             return;
336         }
337         mKeepAlive = mTick;
338         memset(samples, 0, sizeof(samples));
339 
340         if (mMode != RECEIVE_ONLY) {
341             ALOGV("stream[%d] no data", mSocket);
342         }
343     }
344 
345     if (!mCodec) {
346         // Special case for device stream.
347         send(mSocket, samples, sizeof(samples), MSG_DONTWAIT);
348         return;
349     }
350 
351     // Cook the packet and send it out.
352     buffer[0] = htonl(mCodecMagic | mSequence);
353     buffer[1] = htonl(mTimestamp);
354     buffer[2] = mSsrc;
355     int length = mCodec->encode(&buffer[3], samples);
356     if (length <= 0) {
357         ALOGV("stream[%d] encoder error", mSocket);
358         return;
359     }
360     sendto(mSocket, buffer, length + 12, MSG_DONTWAIT, (sockaddr *)&mRemote,
361         sizeof(mRemote));
362 }
363 
decode(int tick)364 void AudioStream::decode(int tick)
365 {
366     char c;
367     if (mMode == SEND_ONLY) {
368         recv(mSocket, &c, 1, MSG_DONTWAIT);
369         return;
370     }
371 
372     // Make sure mBufferHead and mBufferTail are reasonable.
373     if ((unsigned int)(tick + BUFFER_SIZE - mBufferHead) > BUFFER_SIZE * 2) {
374         mBufferHead = tick - HISTORY_SIZE;
375         mBufferTail = mBufferHead;
376     }
377 
378     if (tick - mBufferHead > HISTORY_SIZE) {
379         // Throw away outdated samples.
380         mBufferHead = tick - HISTORY_SIZE;
381         if (mBufferTail - mBufferHead < 0) {
382             mBufferTail = mBufferHead;
383         }
384     }
385 
386     // Adjust the jitter buffer if the latency keeps larger than the threshold
387     // in the measurement period.
388     int score = mBufferTail - tick - MEASURE_BASE;
389     if (mLatencyScore > score || mLatencyScore <= 0) {
390         mLatencyScore = score;
391         mLatencyTimer = tick;
392     } else if (tick - mLatencyTimer >= MEASURE_PERIOD) {
393         ALOGV("stream[%d] reduces latency of %dms", mSocket, mLatencyScore);
394         mBufferTail -= mLatencyScore;
395         mLatencyScore = -1;
396     }
397 
398     int count = (BUFFER_SIZE - (mBufferTail - mBufferHead)) * mSampleRate;
399     if (count < mSampleCount) {
400         // Buffer overflow. Drop the packet.
401         ALOGV("stream[%d] buffer overflow", mSocket);
402         recv(mSocket, &c, 1, MSG_DONTWAIT);
403         return;
404     }
405 
406     // Receive the packet and decode it.
407     int16_t samples[count];
408     if (!mCodec) {
409         // Special case for device stream.
410         count = recv(mSocket, samples, sizeof(samples),
411             MSG_TRUNC | MSG_DONTWAIT) >> 1;
412     } else {
413         __attribute__((aligned(4))) uint8_t buffer[2048];
414         sockaddr_storage remote;
415         socklen_t addrlen = sizeof(remote);
416 
417         int bufferSize = sizeof(buffer);
418         int length = recvfrom(mSocket, buffer, bufferSize,
419             MSG_TRUNC | MSG_DONTWAIT, (sockaddr *)&remote, &addrlen);
420 
421         // Do we need to check SSRC, sequence, and timestamp? They are not
422         // reliable but at least they can be used to identify duplicates?
423         if (length < 12 || length > bufferSize ||
424             (ntohl(*(uint32_t *)buffer) & 0xC07F0000) != mCodecMagic) {
425             ALOGV("stream[%d] malformed packet", mSocket);
426             return;
427         }
428         int offset = 12 + ((buffer[0] & 0x0F) << 2);
429         // length is guaranteed to be <= buffersize, so it is safe with respect
430         // buffer overflow testing as well as offset into uninitialized buffer
431         if (offset + 2 + (int)sizeof(uint16_t) > length) {
432             ALOGV("invalid buffer offset: %d", offset+2);
433             return;
434         }
435         if ((buffer[0] & 0x10) != 0) {
436             offset += 4 + (ntohs(*(uint16_t *)&buffer[offset + 2]) << 2);
437         }
438         if ((buffer[0] & 0x20) != 0) {
439             length -= buffer[length - 1];
440         }
441         length -= offset;
442         if (length >= 0) {
443             length = mCodec->decode(samples, count, &buffer[offset], length);
444         }
445         if (length > 0 && mFixRemote) {
446             mRemote = remote;
447             mFixRemote = false;
448         }
449         count = length;
450     }
451     if (count <= 0) {
452         ALOGV("stream[%d] decoder error", mSocket);
453         return;
454     }
455 
456     if (tick - mBufferTail > 0) {
457         // Buffer underrun. Reset the jitter buffer.
458         ALOGV("stream[%d] buffer underrun", mSocket);
459         if (mBufferTail - mBufferHead <= 0) {
460             mBufferHead = tick + mInterval;
461             mBufferTail = mBufferHead;
462         } else {
463             int tail = (tick + mInterval) * mSampleRate;
464             for (int i = mBufferTail * mSampleRate; i - tail < 0; ++i) {
465                 mBuffer[i & mBufferMask] = 0;
466             }
467             mBufferTail = tick + mInterval;
468         }
469     }
470 
471     // Append to the jitter buffer.
472     int tail = mBufferTail * mSampleRate;
473     for (int i = 0; i < count; ++i) {
474         mBuffer[tail & mBufferMask] = samples[i];
475         ++tail;
476     }
477     mBufferTail += mInterval;
478 }
479 
480 //------------------------------------------------------------------------------
481 
482 class AudioGroup
483 {
484 public:
485     explicit AudioGroup(const AttributionSourceState &attributionSource);
486     ~AudioGroup();
487     bool set(int sampleRate, int sampleCount);
488 
489     bool setMode(int mode);
490     bool sendDtmf(int event);
491     bool add(AudioStream *stream);
492     bool remove(AudioStream *stream);
platformHasAec()493     bool platformHasAec() { return mPlatformHasAec; }
494 
495 private:
496     enum {
497         ON_HOLD = 0,
498         MUTED = 1,
499         NORMAL = 2,
500         ECHO_SUPPRESSION = 3,
501         LAST_MODE = 3,
502     };
503 
504     bool checkPlatformAec();
505 
506     AudioStream *mChain;
507     int mEventQueue;
508     volatile int mDtmfEvent;
509 
510     const AttributionSourceState mAttributionSource;
511 
512     int mMode;
513     int mSampleRate;
514     size_t mSampleCount;
515     int mDeviceSocket;
516     bool mPlatformHasAec;
517 
518     class NetworkThread : public Thread
519     {
520     public:
NetworkThread(AudioGroup * group)521         explicit NetworkThread(AudioGroup *group) : Thread(false), mGroup(group) {}
522 
start()523         bool start()
524         {
525             if (run("Network", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
526                 ALOGE("cannot start network thread");
527                 return false;
528             }
529             return true;
530         }
531 
532     private:
533         AudioGroup *mGroup;
534         bool threadLoop();
535     };
536     sp<NetworkThread> mNetworkThread;
537 
538     class DeviceThread : public Thread
539     {
540     public:
DeviceThread(AudioGroup * group)541         explicit DeviceThread(AudioGroup *group) : Thread(false), mGroup(group) {}
542 
start()543         bool start()
544         {
545             if (run("Device", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
546                 ALOGE("cannot start device thread");
547                 return false;
548             }
549             return true;
550         }
551 
552     private:
553         AudioGroup *mGroup;
554         bool threadLoop();
555     };
556     sp<DeviceThread> mDeviceThread;
557 };
558 
AudioGroup(const AttributionSourceState & attributionSource)559 AudioGroup::AudioGroup(const AttributionSourceState &attributionSource)
560         : mAttributionSource(attributionSource)
561 {
562     mMode = ON_HOLD;
563     mChain = NULL;
564     mEventQueue = -1;
565     mDtmfEvent = -1;
566     mDeviceSocket = -1;
567     mNetworkThread = new NetworkThread(this);
568     mDeviceThread = new DeviceThread(this);
569     mPlatformHasAec = checkPlatformAec();
570 }
571 
~AudioGroup()572 AudioGroup::~AudioGroup()
573 {
574     mNetworkThread->requestExitAndWait();
575     mDeviceThread->requestExitAndWait();
576     close(mEventQueue);
577     close(mDeviceSocket);
578     while (mChain) {
579         AudioStream *next = mChain->mNext;
580         delete mChain;
581         mChain = next;
582     }
583     ALOGD("group[%d] is dead", mDeviceSocket);
584 }
585 
set(int sampleRate,int sampleCount)586 bool AudioGroup::set(int sampleRate, int sampleCount)
587 {
588     mEventQueue = epoll_create1(EPOLL_CLOEXEC);
589     if (mEventQueue == -1) {
590         ALOGE("epoll_create1: %s", strerror(errno));
591         return false;
592     }
593 
594     mSampleRate = sampleRate;
595     mSampleCount = sampleCount;
596 
597     // Create device socket.
598     int pair[2];
599     if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair)) {
600         ALOGE("socketpair: %s", strerror(errno));
601         return false;
602     }
603     mDeviceSocket = pair[0];
604 
605     // Create device stream.
606     mChain = new AudioStream;
607     if (!mChain->set(AudioStream::NORMAL, pair[1], NULL, NULL,
608         sampleRate, sampleCount, -1, -1)) {
609         close(pair[1]);
610         ALOGE("cannot initialize device stream");
611         return false;
612     }
613 
614     // Give device socket a reasonable timeout.
615     timeval tv;
616     tv.tv_sec = 0;
617     tv.tv_usec = 1000 * sampleCount / sampleRate * 500;
618     if (setsockopt(pair[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
619         ALOGE("setsockopt: %s", strerror(errno));
620         return false;
621     }
622 
623     // Add device stream into event queue.
624     epoll_event event;
625     event.events = EPOLLIN;
626     event.data.ptr = mChain;
627     if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, pair[1], &event)) {
628         ALOGE("epoll_ctl: %s", strerror(errno));
629         return false;
630     }
631 
632     // Anything else?
633     ALOGD("stream[%d] joins group[%d]", pair[1], pair[0]);
634     return true;
635 }
636 
setMode(int mode)637 bool AudioGroup::setMode(int mode)
638 {
639     if (mode < 0 || mode > LAST_MODE) {
640         return false;
641     }
642     // FIXME: temporary code to overcome echo and mic gain issues on herring and tuna boards.
643     // Must be modified/removed when the root cause of the issue is fixed in the hardware or
644     // driver
645     char value[PROPERTY_VALUE_MAX];
646     property_get("ro.product.board", value, "");
647     if (mode == NORMAL &&
648             (!strcmp(value, "herring") || !strcmp(value, "tuna"))) {
649         mode = ECHO_SUPPRESSION;
650     }
651     if (mMode == mode) {
652         return true;
653     }
654 
655     mDeviceThread->requestExitAndWait();
656     ALOGD("group[%d] switches from mode %d to %d", mDeviceSocket, mMode, mode);
657     mMode = mode;
658     return (mode == ON_HOLD) || mDeviceThread->start();
659 }
660 
sendDtmf(int event)661 bool AudioGroup::sendDtmf(int event)
662 {
663     if (event < 0 || event > 15) {
664         return false;
665     }
666 
667     // DTMF is rarely used, so we try to make it as lightweight as possible.
668     // Using volatile might be dodgy, but using a pipe or pthread primitives
669     // or stop-set-restart threads seems too heavy. Will investigate later.
670     timespec ts;
671     ts.tv_sec = 0;
672     ts.tv_nsec = 100000000;
673     for (int i = 0; mDtmfEvent != -1 && i < 20; ++i) {
674         nanosleep(&ts, NULL);
675     }
676     if (mDtmfEvent != -1) {
677         return false;
678     }
679     mDtmfEvent = event;
680     nanosleep(&ts, NULL);
681     return true;
682 }
683 
add(AudioStream * stream)684 bool AudioGroup::add(AudioStream *stream)
685 {
686     mNetworkThread->requestExitAndWait();
687 
688     epoll_event event;
689     event.events = EPOLLIN;
690     event.data.ptr = stream;
691     if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, stream->mSocket, &event)) {
692         ALOGE("epoll_ctl: %s", strerror(errno));
693         return false;
694     }
695 
696     stream->mNext = mChain->mNext;
697     mChain->mNext = stream;
698     if (!mNetworkThread->start()) {
699         // Only take over the stream when succeeded.
700         mChain->mNext = stream->mNext;
701         return false;
702     }
703 
704     ALOGD("stream[%d] joins group[%d]", stream->mSocket, mDeviceSocket);
705     return true;
706 }
707 
remove(AudioStream * stream)708 bool AudioGroup::remove(AudioStream *stream)
709 {
710     mNetworkThread->requestExitAndWait();
711 
712     for (AudioStream *chain = mChain; chain->mNext; chain = chain->mNext) {
713         if (chain->mNext == stream) {
714             if (epoll_ctl(mEventQueue, EPOLL_CTL_DEL, stream->mSocket, NULL)) {
715                 ALOGE("epoll_ctl: %s", strerror(errno));
716                 return false;
717             }
718             chain->mNext = stream->mNext;
719             ALOGD("stream[%d] leaves group[%d]", stream->mSocket, mDeviceSocket);
720             delete stream;
721             break;
722         }
723     }
724 
725     // Do not start network thread if there is only one stream.
726     if (!mChain->mNext || !mNetworkThread->start()) {
727         return false;
728     }
729     return true;
730 }
731 
threadLoop()732 bool AudioGroup::NetworkThread::threadLoop()
733 {
734     AudioStream *chain = mGroup->mChain;
735     int tick = elapsedRealtime();
736     int deadline = tick + 10;
737     int count = 0;
738 
739     for (AudioStream *stream = chain; stream; stream = stream->mNext) {
740         if (tick - stream->mTick >= 0) {
741             stream->encode(tick, chain);
742         }
743         if (deadline - stream->mTick > 0) {
744             deadline = stream->mTick;
745         }
746         ++count;
747     }
748 
749     int event = mGroup->mDtmfEvent;
750     if (event != -1) {
751         for (AudioStream *stream = chain; stream; stream = stream->mNext) {
752             stream->sendDtmf(event);
753         }
754         mGroup->mDtmfEvent = -1;
755     }
756 
757     deadline -= tick;
758     if (deadline < 1) {
759         deadline = 1;
760     }
761 
762     epoll_event events[count];
763     count = epoll_wait(mGroup->mEventQueue, events, count, deadline);
764     if (count == -1) {
765         ALOGE("epoll_wait: %s", strerror(errno));
766         return false;
767     }
768     for (int i = 0; i < count; ++i) {
769         ((AudioStream *)events[i].data.ptr)->decode(tick);
770     }
771 
772     return true;
773 }
774 
checkPlatformAec()775 bool AudioGroup::checkPlatformAec()
776 {
777     effect_descriptor_t fxDesc;
778     uint32_t numFx;
779 
780     if (AudioEffect::queryNumberEffects(&numFx) != NO_ERROR) {
781         return false;
782     }
783     for (uint32_t i = 0; i < numFx; i++) {
784         if (AudioEffect::queryEffect(i, &fxDesc) != NO_ERROR) {
785             continue;
786         }
787         if (memcmp(&fxDesc.type, FX_IID_AEC, sizeof(effect_uuid_t)) == 0) {
788             return true;
789         }
790     }
791     return false;
792 }
793 
threadLoop()794 bool AudioGroup::DeviceThread::threadLoop()
795 {
796     int mode = mGroup->mMode;
797     int sampleRate = mGroup->mSampleRate;
798     size_t sampleCount = mGroup->mSampleCount;
799     int deviceSocket = mGroup->mDeviceSocket;
800 
801     // Find out the frame count for AudioTrack and AudioRecord.
802     size_t output = 0;
803     size_t input = 0;
804     if (AudioTrack::getMinFrameCount(&output, AUDIO_STREAM_VOICE_CALL,
805         sampleRate) != NO_ERROR || output <= 0 ||
806         AudioRecord::getMinFrameCount(&input, sampleRate,
807         AUDIO_FORMAT_PCM_16_BIT, AUDIO_CHANNEL_IN_MONO) != NO_ERROR || input <= 0) {
808         ALOGE("cannot compute frame count");
809         return false;
810     }
811     ALOGD("reported frame count: output %zu, input %zu", output, input);
812 
813     if (output < sampleCount * 2) {
814         output = sampleCount * 2;
815     }
816     if (input < sampleCount * 2) {
817         input = sampleCount * 2;
818     }
819     ALOGD("adjusted frame count: output %zu, input %zu", output, input);
820 
821     // Initialize AudioTrack and AudioRecord.
822     sp<AudioTrack> track = new AudioTrack();
823     sp<AudioRecord> record = new AudioRecord(mGroup->mAttributionSource);
824     // Set caller name so it can be logged in destructor.
825     // MediaMetricsConstants.h: AMEDIAMETRICS_PROP_CALLERNAME_VALUE_RTP
826     track->setCallerName("rtp");
827     record->setCallerName("rtp");
828     if (track->set(AUDIO_STREAM_VOICE_CALL, sampleRate, AUDIO_FORMAT_PCM_16_BIT,
829                 AUDIO_CHANNEL_OUT_MONO, output, AUDIO_OUTPUT_FLAG_NONE, nullptr /*callback*/,
830                 0 /*notificationFrames*/, 0 /*sharedBuffer*/,
831                 false /*threadCanCallJava*/, AUDIO_SESSION_ALLOCATE,
832                 AudioTrack::TRANSFER_OBTAIN) != NO_ERROR ||
833             record->set(AUDIO_SOURCE_VOICE_COMMUNICATION, sampleRate, AUDIO_FORMAT_PCM_16_BIT,
834                 AUDIO_CHANNEL_IN_MONO, input, nullptr /*callback*/, 0 /*notificationFrames*/,
835                 false /*threadCanCallJava*/, AUDIO_SESSION_ALLOCATE,
836                 AudioRecord::TRANSFER_OBTAIN) != NO_ERROR) {
837         ALOGE("cannot initialize audio device");
838         return false;
839     }
840     ALOGD("latency: output %d, input %d", track->latency(), record->latency());
841 
842     // Give device socket a reasonable buffer size.
843     setsockopt(deviceSocket, SOL_SOCKET, SO_RCVBUF, &output, sizeof(output));
844     setsockopt(deviceSocket, SOL_SOCKET, SO_SNDBUF, &output, sizeof(output));
845 
846     // Drain device socket.
847     char c;
848     while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);
849 
850     // check if platform supports echo cancellation and do not active local echo suppression in
851     // this case
852     EchoSuppressor *echo = NULL;
853     sp<AudioEffect> aec;
854     if (mode == ECHO_SUPPRESSION) {
855         if (mGroup->platformHasAec()) {
856             aec = new AudioEffect(mGroup->mAttributionSource);
857             aec->set(FX_IID_AEC,
858                      nullptr, // uid
859                      0, // priority
860                      nullptr, // callback
861                      record->getSessionId(),
862                      AUDIO_IO_HANDLE_NONE); // record sessionId is sufficient.
863             status_t status = aec->initCheck();
864             if (status == NO_ERROR || status == ALREADY_EXISTS) {
865                 aec->setEnabled(true);
866             } else {
867                 aec.clear();
868             }
869         }
870         // Create local echo suppressor if platform AEC cannot be used.
871         if (aec == 0) {
872              echo = new EchoSuppressor(sampleCount,
873                                        (track->latency() + record->latency()) * sampleRate / 1000);
874         }
875     }
876     // Start AudioRecord before AudioTrack. This prevents AudioTrack from being
877     // disabled due to buffer underrun while waiting for AudioRecord.
878     if (mode != MUTED) {
879         record->start();
880         int16_t one;
881         // FIXME this may not work any more
882         record->read(&one, sizeof(one));
883     }
884     track->start();
885 
886     while (!exitPending()) {
887         int16_t output[sampleCount];
888         if (recv(deviceSocket, output, sizeof(output), 0) <= 0) {
889             memset(output, 0, sizeof(output));
890         }
891 
892         int16_t input[sampleCount];
893         int toWrite = sampleCount;
894         int toRead = (mode == MUTED) ? 0 : sampleCount;
895         int chances = 100;
896 
897         while (--chances > 0 && (toWrite > 0 || toRead > 0)) {
898             if (toWrite > 0) {
899                 AudioTrack::Buffer buffer;
900                 buffer.frameCount = toWrite;
901 
902                 status_t status = track->obtainBuffer(&buffer, 1);
903                 if (status == NO_ERROR) {
904                     int offset = sampleCount - toWrite;
905                     memcpy(buffer.data(), &output[offset], buffer.size());
906                     toWrite -= buffer.getFrameCount();
907                     track->releaseBuffer(&buffer);
908                 } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
909                     ALOGE("cannot write to AudioTrack");
910                     goto exit;
911                 }
912             }
913 
914             if (toRead > 0) {
915                 AudioRecord::Buffer buffer;
916                 buffer.frameCount = toRead;
917 
918                 status_t status = record->obtainBuffer(&buffer, 1);
919                 if (status == NO_ERROR) {
920                     int offset = sampleCount - toRead;
921                     memcpy(&input[offset], buffer.data(), buffer.size());
922                     toRead -= buffer.getFrameCount();
923                     record->releaseBuffer(&buffer);
924                 } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
925                     ALOGE("cannot read from AudioRecord");
926                     goto exit;
927                 }
928             }
929         }
930 
931         if (chances <= 0) {
932             ALOGW("device loop timeout");
933             while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);
934         }
935 
936         if (mode != MUTED) {
937             if (echo != NULL) {
938                 ALOGV("echo->run()");
939                 echo->run(output, input);
940             }
941             send(deviceSocket, input, sizeof(input), MSG_DONTWAIT);
942         }
943     }
944 
945 exit:
946     delete echo;
947     return true;
948 }
949 
950 //------------------------------------------------------------------------------
951 
952 static jfieldID gNative;
953 static jfieldID gMode;
954 
add(JNIEnv * env,jobject thiz,jint mode,jint socket,jstring jRemoteAddress,jint remotePort,jstring jCodecSpec,jint dtmfType,jobject jAttributionSource)955 jlong add(JNIEnv *env, jobject thiz, jint mode,
956     jint socket, jstring jRemoteAddress, jint remotePort,
957     jstring jCodecSpec, jint dtmfType, jobject jAttributionSource)
958 {
959     AudioCodec *codec = NULL;
960     AudioStream *stream = NULL;
961     AudioGroup *group = NULL;
962 
963     // Sanity check.
964     sockaddr_storage remote;
965     if (parse(env, jRemoteAddress, remotePort, &remote) < 0) {
966         // Exception already thrown.
967         return 0;
968     }
969     if (!jCodecSpec) {
970         jniThrowNullPointerException(env, "codecSpec");
971         return 0;
972     }
973     const char *codecSpec = env->GetStringUTFChars(jCodecSpec, NULL);
974     if (!codecSpec) {
975         // Exception already thrown.
976         return 0;
977     }
978     socket = dup(socket);
979     if (socket == -1) {
980         jniThrowException(env, "java/lang/IllegalStateException",
981             "cannot get stream socket");
982         return 0;
983     }
984 
985     Parcel* parcel = parcelForJavaObject(env, jAttributionSource);
986     AttributionSourceState attributionSource;
987     attributionSource.readFromParcel(parcel);
988 
989     // Create audio codec.
990     int codecType = -1;
991     char codecName[16];
992     int sampleRate = -1;
993     sscanf(codecSpec, "%d %15[^/]%*c%d", &codecType, codecName, &sampleRate);
994     codec = newAudioCodec(codecName);
995     int sampleCount = (codec ? codec->set(sampleRate, codecSpec) : -1);
996     env->ReleaseStringUTFChars(jCodecSpec, codecSpec);
997     if (sampleCount <= 0) {
998         jniThrowException(env, "java/lang/IllegalStateException",
999             "cannot initialize audio codec");
1000         goto error;
1001     }
1002 
1003     // Create audio stream.
1004     stream = new AudioStream;
1005     if (!stream->set(mode, socket, &remote, codec, sampleRate, sampleCount,
1006         codecType, dtmfType)) {
1007         jniThrowException(env, "java/lang/IllegalStateException",
1008             "cannot initialize audio stream");
1009         goto error;
1010     }
1011     socket = -1;
1012     codec = NULL;
1013 
1014     // Create audio group.
1015     group = (AudioGroup *)env->GetLongField(thiz, gNative);
1016     if (!group) {
1017         int mode = env->GetIntField(thiz, gMode);
1018         group = new AudioGroup(attributionSource);
1019         if (!group->set(8000, 256) || !group->setMode(mode)) {
1020             jniThrowException(env, "java/lang/IllegalStateException",
1021                 "cannot initialize audio group");
1022             goto error;
1023         }
1024     }
1025 
1026     // Add audio stream into audio group.
1027     if (!group->add(stream)) {
1028         jniThrowException(env, "java/lang/IllegalStateException",
1029             "cannot add audio stream");
1030         goto error;
1031     }
1032 
1033     // Succeed.
1034     env->SetLongField(thiz, gNative, (jlong)group);
1035     return (jlong)stream;
1036 
1037 error:
1038     delete group;
1039     delete stream;
1040     delete codec;
1041     close(socket);
1042     env->SetLongField(thiz, gNative, 0);
1043     return 0;
1044 }
1045 
remove(JNIEnv * env,jobject thiz,jlong stream)1046 void remove(JNIEnv *env, jobject thiz, jlong stream)
1047 {
1048     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
1049     if (group) {
1050         if (!stream || !group->remove((AudioStream *)stream)) {
1051             delete group;
1052             env->SetLongField(thiz, gNative, 0);
1053         }
1054     }
1055 }
1056 
setMode(JNIEnv * env,jobject thiz,jint mode)1057 void setMode(JNIEnv *env, jobject thiz, jint mode)
1058 {
1059     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
1060     if (group && !group->setMode(mode)) {
1061         jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
1062     }
1063 }
1064 
sendDtmf(JNIEnv * env,jobject thiz,jint event)1065 void sendDtmf(JNIEnv *env, jobject thiz, jint event)
1066 {
1067     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
1068     if (group && !group->sendDtmf(event)) {
1069         jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
1070     }
1071 }
1072 
1073 JNINativeMethod gMethods[] = {
1074     {"nativeAdd", "(IILjava/lang/String;ILjava/lang/String;ILandroid/os/Parcel;)J", (void *)add},
1075     {"nativeRemove", "(J)V", (void *)remove},
1076     {"nativeSetMode", "(I)V", (void *)setMode},
1077     {"nativeSendDtmf", "(I)V", (void *)sendDtmf},
1078 };
1079 
1080 } // namespace
1081 
registerAudioGroup(JNIEnv * env)1082 int registerAudioGroup(JNIEnv *env)
1083 {
1084     gRandom = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
1085     if (gRandom == -1) {
1086         ALOGE("urandom: %s", strerror(errno));
1087         return -1;
1088     }
1089 
1090     jclass clazz;
1091     if ((clazz = env->FindClass("android/net/rtp/AudioGroup")) == NULL ||
1092         (gNative = env->GetFieldID(clazz, "mNative", "J")) == NULL ||
1093         (gMode = env->GetFieldID(clazz, "mMode", "I")) == NULL ||
1094         env->RegisterNatives(clazz, gMethods, NELEM(gMethods)) < 0) {
1095         ALOGE("JNI registration failed");
1096         return -1;
1097     }
1098 
1099     return 0;
1100 }
1101