1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.service.voice;
18 
19 import android.annotation.DurationMillisLong;
20 import android.annotation.NonNull;
21 import android.annotation.Nullable;
22 import android.annotation.SdkConstant;
23 import android.annotation.SuppressLint;
24 import android.annotation.SystemApi;
25 import android.app.Service;
26 import android.content.ContentCaptureOptions;
27 import android.content.Context;
28 import android.content.Intent;
29 import android.hardware.soundtrigger.SoundTrigger;
30 import android.media.AudioFormat;
31 import android.media.AudioSystem;
32 import android.os.IBinder;
33 import android.os.IRemoteCallback;
34 import android.os.ParcelFileDescriptor;
35 import android.os.PersistableBundle;
36 import android.os.RemoteException;
37 import android.os.SharedMemory;
38 import android.speech.IRecognitionServiceManager;
39 import android.util.Log;
40 import android.view.contentcapture.ContentCaptureManager;
41 import android.view.contentcapture.IContentCaptureManager;
42 
43 import com.android.internal.infra.AndroidFuture;
44 
45 import java.io.FileInputStream;
46 import java.io.FileNotFoundException;
47 import java.util.Objects;
48 import java.util.concurrent.ExecutionException;
49 import java.util.function.IntConsumer;
50 
51 /**
52  * Implemented by an application that wants to offer query detection with visual signals.
53  *
54  * This service leverages visual signals such as camera frames to detect and stream queries from the
55  * device microphone to the {@link VoiceInteractionService}, without the support of hotword. The
56  * system will bind an application's {@link VoiceInteractionService} first. When
57  * {@link VoiceInteractionService#createVisualQueryDetector(PersistableBundle, SharedMemory,
58  * Executor, VisualQueryDetector.Callback)} is called, the system will bind the application's
59  * {@link VisualQueryDetectionService}. When requested from {@link VoiceInteractionService}, the
60  * system calls into the {@link VisualQueryDetectionService#onStartDetection()} to enable
61  * detection. This method MUST be implemented to support visual query detection service.
62  *
63  * Note: Methods in this class may be called concurrently.
64  *
65  * @hide
66  */
67 @SystemApi
68 public abstract class VisualQueryDetectionService extends Service
69         implements SandboxedDetectionInitializer {
70 
71     private static final String TAG = VisualQueryDetectionService.class.getSimpleName();
72 
73     private static final long UPDATE_TIMEOUT_MILLIS = 20000;
74 
75     /**
76      * The {@link Intent} that must be declared as handled by the service.
77      * To be supported, the service must also require the
78      * {@link android.Manifest.permission#BIND_VISUAL_QUERY_DETECTION_SERVICE} permission
79      * so that other applications can not abuse it.
80      */
81     @SdkConstant(SdkConstant.SdkConstantType.SERVICE_ACTION)
82     public static final String SERVICE_INTERFACE =
83             "android.service.voice.VisualQueryDetectionService";
84 
85 
86     /** @hide */
87     public static final String KEY_INITIALIZATION_STATUS = "initialization_status";
88 
89     private IDetectorSessionVisualQueryDetectionCallback mRemoteCallback = null;
90     @Nullable
91     private ContentCaptureManager mContentCaptureManager;
92     @Nullable
93     private IRecognitionServiceManager mIRecognitionServiceManager;
94     @Nullable
95     private IDetectorSessionStorageService mDetectorSessionStorageService;
96 
97 
98     private final ISandboxedDetectionService mInterface = new ISandboxedDetectionService.Stub() {
99 
100         @Override
101         public void detectWithVisualSignals(
102                 IDetectorSessionVisualQueryDetectionCallback callback) {
103             Log.v(TAG, "#detectWithVisualSignals");
104             mRemoteCallback = callback;
105             VisualQueryDetectionService.this.onStartDetection();
106         }
107 
108         @Override
109         public void stopDetection() {
110             Log.v(TAG, "#stopDetection");
111             VisualQueryDetectionService.this.onStopDetection();
112         }
113 
114         @Override
115         public void updateState(PersistableBundle options, SharedMemory sharedMemory,
116                 IRemoteCallback callback) throws RemoteException {
117             Log.v(TAG, "#updateState" + (callback != null ? " with callback" : ""));
118             VisualQueryDetectionService.this.onUpdateStateInternal(
119                     options,
120                     sharedMemory,
121                     callback);
122         }
123 
124         @Override
125         public void ping(IRemoteCallback callback) throws RemoteException {
126             callback.sendResult(null);
127         }
128 
129         @Override
130         public void detectFromDspSource(
131                 SoundTrigger.KeyphraseRecognitionEvent event,
132                 AudioFormat audioFormat,
133                 long timeoutMillis,
134                 IDspHotwordDetectionCallback callback) {
135             throw new UnsupportedOperationException("Not supported by VisualQueryDetectionService");
136         }
137 
138         @Override
139         public void detectFromMicrophoneSource(
140                 ParcelFileDescriptor audioStream,
141                 @HotwordDetectionService.AudioSource int audioSource,
142                 AudioFormat audioFormat,
143                 PersistableBundle options,
144                 IDspHotwordDetectionCallback callback) {
145             throw new UnsupportedOperationException("Not supported by VisualQueryDetectionService");
146         }
147 
148         @Override
149         public void updateAudioFlinger(IBinder audioFlinger) {
150             AudioSystem.setAudioFlingerBinder(audioFlinger);
151         }
152 
153         @Override
154         public void updateContentCaptureManager(IContentCaptureManager manager,
155                 ContentCaptureOptions options) {
156             mContentCaptureManager = new ContentCaptureManager(
157                     VisualQueryDetectionService.this, manager, options);
158         }
159 
160         @Override
161         public void updateRecognitionServiceManager(IRecognitionServiceManager manager) {
162             mIRecognitionServiceManager = manager;
163         }
164 
165         @Override
166         public void registerRemoteStorageService(IDetectorSessionStorageService
167                 detectorSessionStorageService) {
168             mDetectorSessionStorageService = detectorSessionStorageService;
169         }
170     };
171 
172     @Override
173     @SuppressLint("OnNameExpected")
getSystemService(@erviceName @onNull String name)174     public @Nullable Object getSystemService(@ServiceName @NonNull String name) {
175         if (Context.CONTENT_CAPTURE_MANAGER_SERVICE.equals(name)) {
176             return mContentCaptureManager;
177         } else if (Context.SPEECH_RECOGNITION_SERVICE.equals(name)
178                 && mIRecognitionServiceManager != null) {
179             return mIRecognitionServiceManager.asBinder();
180         } else {
181             return super.getSystemService(name);
182         }
183     }
184 
185     /**
186      * {@inheritDoc}
187      * @hide
188      */
189     @Override
190     @SystemApi
onUpdateState( @ullable PersistableBundle options, @Nullable SharedMemory sharedMemory, @DurationMillisLong long callbackTimeoutMillis, @Nullable IntConsumer statusCallback)191     public void onUpdateState(
192             @Nullable PersistableBundle options,
193             @Nullable SharedMemory sharedMemory,
194             @DurationMillisLong long callbackTimeoutMillis,
195             @Nullable IntConsumer statusCallback) {
196     }
197 
198     @Override
199     @Nullable
onBind(@onNull Intent intent)200     public IBinder onBind(@NonNull Intent intent) {
201         if (SERVICE_INTERFACE.equals(intent.getAction())) {
202             return mInterface.asBinder();
203         }
204         Log.w(TAG, "Tried to bind to wrong intent (should be " + SERVICE_INTERFACE + ": "
205                 + intent);
206         return null;
207     }
208 
onUpdateStateInternal(@ullable PersistableBundle options, @Nullable SharedMemory sharedMemory, IRemoteCallback callback)209     private void onUpdateStateInternal(@Nullable PersistableBundle options,
210             @Nullable SharedMemory sharedMemory, IRemoteCallback callback) {
211         IntConsumer intConsumer =
212                 SandboxedDetectionInitializer.createInitializationStatusConsumer(callback);
213         onUpdateState(options, sharedMemory, UPDATE_TIMEOUT_MILLIS, intConsumer);
214     }
215 
216     /**
217      * This is called after the service is set up and the client should open the camera and the
218      * microphone to start recognition. When the {@link VoiceInteractionService} requests that this
219      * service {@link HotwordDetector#startRecognition()} start recognition on audio coming directly
220      * from the device microphone.
221      * <p>
222      * Signal senders that return attention and query results are also expected to be called in this
223      * method according to the detection outcomes.
224      * <p>
225      * On successful user attention, developers should call
226      * {@link VisualQueryDetectionService#gainedAttention()} to enable the streaming of the query.
227      * <p>
228      * On user attention is lost, developers should call
229      * {@link VisualQueryDetectionService#lostAttention()} to disable the streaming of the query.
230      * <p>
231      * On query is detected and ready to stream, developers should call
232      * {@link VisualQueryDetectionService#streamQuery(String)} to return detected query to the
233      * {@link VisualQueryDetector}.
234      * <p>
235      * On streamed query should be rejected, clients should call
236      * {@link VisualQueryDetectionService#rejectQuery()} to abandon query streamed to the
237      * {@link VisualQueryDetector}.
238      * <p>
239      * On streamed query is finished, clients should call
240      * {@link VisualQueryDetectionService#finishQuery()} to complete query streamed to
241      * {@link VisualQueryDetector}.
242      * <p>
243      * Before a call for {@link VisualQueryDetectionService#streamQuery(String)} is triggered,
244      * {@link VisualQueryDetectionService#gainedAttention()} MUST be called to enable the streaming
245      * of query. A query streaming is also expected to be finished by calling either
246      * {@link VisualQueryDetectionService#finishQuery()} or
247      * {@link VisualQueryDetectionService#rejectQuery()} before a new query should start streaming.
248      * When the service enters the state where query streaming should be disabled,
249      * {@link VisualQueryDetectionService#lostAttention()} MUST be called to block unnecessary
250      * streaming.
251      */
onStartDetection()252     public void onStartDetection() {
253         throw new UnsupportedOperationException();
254     }
255 
256     /**
257      * Called when the {@link VoiceInteractionService}
258      * {@link HotwordDetector#stopRecognition()} requests that recognition be stopped.
259      */
onStopDetection()260     public void onStopDetection() {
261     }
262 
263     /**
264      * Informs the system that the attention is gained for the interaction intention
265      * {@link VisualQueryAttentionResult#INTERACTION_INTENTION_AUDIO_VISUAL} with
266      * engagement level equals to the maximum value possible so queries can be streamed.
267      *
268      * Usage of this method is not recommended, please use
269      * {@link VisualQueryDetectionService#gainedAttention(VisualQueryAttentionResult)} instead.
270      *
271      */
gainedAttention()272     public final void gainedAttention() {
273         try {
274             mRemoteCallback.onAttentionGained(null);
275         } catch (RemoteException e) {
276             throw e.rethrowFromSystemServer();
277         }
278     }
279 
280     /**
281      * Puts the device into an attention state that will listen to certain interaction intention
282      * based on the {@link VisualQueryAttentionResult} provided.
283      *
284      * Different type and levels of engagement will lead to corresponding UI icons showing. See
285      * {@link VisualQueryAttentionResult#setInteractionIntention(int)} for details.
286      *
287      * Exactly one {@link VisualQueryAttentionResult} can be set at a time with this method at
288      * the moment. Multiple attention results will be supported to set the device into with this
289      * API before {@link android.os.Build.VERSION_CODES#VANILLA_ICE_CREAM} is finalized.
290      *
291      * Latest call will override the {@link VisualQueryAttentionResult} of previous calls. Queries
292      * streamed are independent of the attention interactionIntention.
293      *
294      * @param attentionResult Attention result of type {@link VisualQueryAttentionResult}.
295      */
296     @SuppressLint("UnflaggedApi") // b/325678077 flags not supported in isolated process
gainedAttention(@onNull VisualQueryAttentionResult attentionResult)297     public final void gainedAttention(@NonNull VisualQueryAttentionResult attentionResult) {
298         try {
299             mRemoteCallback.onAttentionGained(attentionResult);
300         } catch (RemoteException e) {
301             throw e.rethrowFromSystemServer();
302         }
303     }
304 
305     /**
306      * Informs the system that all attention has lost to stop streaming.
307      */
lostAttention()308     public final void lostAttention() {
309         try {
310             mRemoteCallback.onAttentionLost(0); // placeholder
311         } catch (RemoteException e) {
312             throw e.rethrowFromSystemServer();
313         }
314     }
315 
316     /**
317      * This will cancel the corresponding attention if the provided interaction intention is the
318      * same as which of the object called with
319      * {@link VisualQueryDetectionService#gainedAttention(VisualQueryAttentionResult)}.
320      *
321      * @param interactionIntention Interaction intention, one of
322      *        {@link VisualQueryAttentionResult#InteractionIntention}.
323      */
324     @SuppressLint("UnflaggedApi") // b/325678077 flags not supported in isolated process
lostAttention( @isualQueryAttentionResult.InteractionIntention int interactionIntention)325     public final void lostAttention(
326             @VisualQueryAttentionResult.InteractionIntention int interactionIntention) {
327         try {
328             mRemoteCallback.onAttentionLost(interactionIntention);
329         } catch (RemoteException e) {
330             throw e.rethrowFromSystemServer();
331         }
332     }
333 
334     /**
335      * Informs the {@link VisualQueryDetector} with the text content being captured about the
336      * query from the audio source. {@code partialQuery} is provided to the
337      * {@link VisualQueryDetector}. This method is expected to be only triggered if
338      * {@link VisualQueryDetectionService#gainedAttention()} is called to put the service into the
339      * attention gained state.
340      *
341      * Usage of this method is not recommended, please use
342      * {@link VisualQueryDetectionService#streamQuery(VisualQueryDetectedResult)} instead.
343      *
344      * @param partialQuery Partially detected query in string.
345      * @throws IllegalStateException if method called without attention gained.
346      */
streamQuery(@onNull String partialQuery)347     public final void streamQuery(@NonNull String partialQuery) throws IllegalStateException {
348         Objects.requireNonNull(partialQuery);
349         try {
350             mRemoteCallback.onQueryDetected(partialQuery);
351         } catch (RemoteException e) {
352             throw new IllegalStateException("#streamQuery must be only be triggered after "
353                     + "calling #gainedAttention to be in the attention gained state.");
354         }
355     }
356 
357     /**
358      * Informs the {@link VisualQueryDetector} with the text content being captured about the
359      * query from the audio source. {@code partialResult} is provided to the
360      * {@link VisualQueryDetector}. This method is expected to be only triggered if
361      * {@link VisualQueryDetectionService#gainedAttention()} is called to put the service into
362      * the attention gained state.
363      *
364      * @param partialResult Partially detected result in the format of
365      * {@link VisualQueryDetectedResult}.
366      */
367     @SuppressLint("UnflaggedApi") // b/325678077 flags not supported in isolated process
streamQuery(@onNull VisualQueryDetectedResult partialResult)368     public final void streamQuery(@NonNull VisualQueryDetectedResult partialResult) {
369         Objects.requireNonNull(partialResult);
370         try {
371             mRemoteCallback.onResultDetected(partialResult);
372         } catch (RemoteException e) {
373             throw new IllegalStateException("#streamQuery must be only be triggered after "
374                     + "calling #gainedAttention to be in the attention gained state.");
375         }
376     }
377 
378     /**
379      * Informs the {@link VisualQueryDetector} to abandon the streamed partial query that has
380      * been sent to {@link VisualQueryDetector}.This method is expected to be only triggered if
381      * {@link VisualQueryDetectionService#streamQuery(String)} is called to put the service into
382      * the query streaming state.
383      *
384      * @throws IllegalStateException if method called without query streamed.
385      */
rejectQuery()386     public final void rejectQuery() throws IllegalStateException {
387         try {
388             mRemoteCallback.onQueryRejected();
389         } catch (RemoteException e) {
390             throw new IllegalStateException("#rejectQuery must be only be triggered after "
391                     + "calling #streamQuery to be in the query streaming state.");
392         }
393     }
394 
395     /**
396      * Informs {@link VisualQueryDetector} with the metadata to complete the streamed partial
397      * query that has been sent to {@link VisualQueryDetector}. This method is expected to be
398      * only triggered if {@link VisualQueryDetectionService#streamQuery(String)} is called to put
399      * the service into the query streaming state.
400      *
401      * @throws IllegalStateException if method called without query streamed.
402      */
finishQuery()403     public final void finishQuery() throws IllegalStateException {
404         try {
405             mRemoteCallback.onQueryFinished();
406         } catch (RemoteException e) {
407             throw new IllegalStateException("#finishQuery must be only be triggered after "
408                     + "calling #streamQuery to be in the query streaming state.");
409         }
410     }
411 
412     /**
413      * Overrides {@link Context#openFileInput} to read files with the given file names under the
414      * internal app storage of the {@link VoiceInteractionService}, i.e., the input file path would
415      * be added with {@link Context#getFilesDir()} as prefix.
416      *
417      * @param filename Relative path of a file under {@link Context#getFilesDir()}.
418      * @throws FileNotFoundException if the file does not exist or cannot be open.
419      */
420     @Override
openFileInput(@onNull String filename)421     public @NonNull FileInputStream openFileInput(@NonNull String filename) throws
422             FileNotFoundException {
423         try {
424             AndroidFuture<ParcelFileDescriptor> future = new AndroidFuture<>();
425             assert mDetectorSessionStorageService != null;
426             mDetectorSessionStorageService.openFile(filename, future);
427             ParcelFileDescriptor pfd = future.get();
428             if (pfd == null) {
429                 throw new FileNotFoundException(
430                         "File does not exist. Unable to open " + filename + ".");
431             }
432             return new FileInputStream(pfd.getFileDescriptor());
433         } catch (RemoteException | ExecutionException | InterruptedException e) {
434             Log.w(TAG, "Cannot open file due to remote service failure");
435             throw new FileNotFoundException(e.getMessage());
436         }
437     }
438 
439 }
440