// Copyright 2018 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. module chromeos.assistant.mojom; import "ui/accessibility/mojom/ax_assistant_structure.mojom"; import "ui/gfx/geometry/mojo/geometry.mojom"; import "url/mojom/url.mojom"; import "mojo/public/mojom/base/string16.mojom"; import "mojo/public/mojom/base/time.mojom"; // Interface to communicate with assistant backend. interface Assistant { // Starts a cached screen context interaction. Results related to the screen // context will be returned through the |AssistantInteractionSubscriber| // interface to registered subscribers. It is illegal to call this method // without having first cached screen context (see CacheScreenContext()). StartCachedScreenContextInteraction(); // Starts a metalayer interaction for the selected screen |region|. Results // related to the selected region will be returned through the // |AssistantInteractionSubscriber| interface to registered subscribers. StartMetalayerInteraction(gfx.mojom.Rect region); // Starts a new Assistant voice interaction. StartVoiceInteraction(); // Stops the active Assistant interaction. If there is no active interaction, // this method is a no-op. StopActiveInteraction(); // Send text query to assistant. Result will be returned through registered // |AssistantInteractionSubscriber|. SendTextQuery(string query); // Registers assistant interaction event subscriber. Subscribers' // implementation is responsible for selecting events of interest. AddAssistantInteractionSubscriber(AssistantInteractionSubscriber subscriber); // Registers assistant notification event subscriber. Subscribers' // implementation is responsible for selecting events of interest. AddAssistantNotificationSubscriber( AssistantNotificationSubscriber subscriber); // Retrieves a notification. A voiceless interaction will be sent to server to // retrieve the notification payload, which can trigger other assistant's // events such as OnTextResponse to show the result in the UI. The reteived // notification will be removed from the UI. RetrieveNotification(AssistantNotification notification, int32 action_index); // Dismisses a notification. DismissNotification(AssistantNotification notification); // Caches screen context, made up of view hierarchy and screenshot data. // Screen context is used to provide additional context alongside text and // voice queries, and may also be used in standalone screen context // interactions (see StartCachedScreenContextInteraction()). CacheScreenContext() => (); }; // Subscribes to assistant's interaction event. These events are server driven // in response to the user's direct interaction with the assistant. Responses // from the assistant may contain untrusted third-party content. Subscriber // implementations must be sure to handle the response data appropriately. interface AssistantInteractionSubscriber { // Assistant interaction has started. In the event of a voice interaction, // |is_voice_interaction| will be true. This implies that the mic is open. OnInteractionStarted(bool is_voice_interaction); // Assistant interaction has ended with the specified |resolution|. OnInteractionFinished(AssistantInteractionResolution resolution); // Assistant got Html response from server. OnHtmlResponse(string response); // Assistant got suggestions response from server. OnSuggestionsResponse(array response); // Assistant got text response from server. OnTextResponse(string response); // Assistant got open URL response from server. OnOpenUrlResponse(url.mojom.Url url); // Assistant speech recognition has started. OnSpeechRecognitionStarted(); // Assistant speech recognition intermediate result is available. OnSpeechRecognitionIntermediateResult(string high_confidence_text, string low_confidence_text); // Assistant speech recognition detected end of utterance. OnSpeechRecognitionEndOfUtterance(); // Assistant speech recognition final result is available. OnSpeechRecognitionFinalResult(string final_result); // Assistant got an instantaneous speech level update in dB. OnSpeechLevelUpdated(float speech_level); // Assistant has started speaking. When TTS is started due to an error that // occurred during the interaction, |due_to_error| is true. OnTtsStarted(bool due_to_error); }; // Subscribes to assistant's notification event. These events are server // driven and pertain to push notifications. interface AssistantNotificationSubscriber { // Assistant got notification from server. OnShowNotification(AssistantNotification notification); // Notifies UI to remove the notification with the |grouping_key|. // If |grouping_key| is empty, remove all notifications. OnRemoveNotification(string grouping_key); }; // Interface for browser to bind and start assistant. interface AssistantPlatform { // Initiates assistant and provides interfaces for assistant to call into the // browser. Init(Client assistant_client_interface, DeviceActions device_actions_interface); }; // Interface for assistant to call into client. interface Client { // Notifies assistant client that assistant running status has changed. OnAssistantStatusChanged(bool running); // Request context of current window from browser. RequestAssistantStructure() => ( ax.mojom.AssistantExtra? extra, ax.mojom.AssistantTree? structure); }; // Interface for assistant to call into browser for audio input related // functions. interface AudioInput { // Registers audio input observer. We assume dual-channel, 16kHz and signed // 32-bit int interleaved format. Audio stream starts recording as soon as // the first observer is added. And will stop recording if last observer is // closed. AddObserver(AudioInputObserver observer); }; // Observer for assistant to receive audio input data. interface AudioInputObserver { // Notifies that audio frames are available. // TODO(muyuanli): Use data pipe. OnAudioInputFramesAvailable(array buffer, uint32 frame_count, mojo_base.mojom.TimeTicks timestamp); // Notifies that audio input stream is closed. OnAudioInputClosed(); }; // Interface for assistant to call into browser to perform device actions. interface DeviceActions { // Enables or disables WiFi. SetWifiEnabled(bool enabled); // Enables or disables Bluetooth. SetBluetoothEnabled(bool enabled); }; // Enumeration of possible completions for an Assistant interaction. enum AssistantInteractionResolution { // Assistant interaction completed normally. kNormal, // Assistant interaction completed due to barge in or cancellation. kInterruption, // Assistant interaction completed due to error. kError, // Assistant interaction completed due to mic timeout. kMicTimeout, // Assistant interaction completed due to multi-device hotword loss. kMultiDeviceHotwordLoss, }; // Models an Assistant suggestion. struct AssistantSuggestion { // Display text. e.g. "Cancel". string text; // Optional URL for icon. e.g. "https://www.gstatic.com/icon.png". url.mojom.Url icon_url; // Optional URL for action. e.g. // "https://www.google.com/search?query=action". url.mojom.Url action_url; }; // Models an Assistant notification. struct AssistantNotification { // Title of the notification. string title; // Body text of the notification. string message; // URL to open when the tap action is invoked on the notification. url.mojom.Url action_url; // An id that uniquely identifies a notification. string notification_id; // Used to fetch notification contents. string consistency_token; string opaque_token; // Key that can be used to group multiple notifications together. string grouping_key; // Obfuscated Gaia id of the intended recipient of the user. string obfuscated_gaia_id; };