naiveproxy/chromeos/services/assistant/public/mojom/assistant.mojom
2018-12-09 21:59:24 -05:00

215 lines
7.9 KiB
Plaintext

// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module chromeos.assistant.mojom;
import "ui/accessibility/mojom/ax_assistant_structure.mojom";
import "ui/gfx/geometry/mojo/geometry.mojom";
import "url/mojom/url.mojom";
import "mojo/public/mojom/base/string16.mojom";
import "mojo/public/mojom/base/time.mojom";
// Interface to communicate with assistant backend.
interface Assistant {
// Starts a cached screen context interaction. Results related to the screen
// context will be returned through the |AssistantInteractionSubscriber|
// interface to registered subscribers. It is illegal to call this method
// without having first cached screen context (see CacheScreenContext()).
StartCachedScreenContextInteraction();
// Starts a metalayer interaction for the selected screen |region|. Results
// related to the selected region will be returned through the
// |AssistantInteractionSubscriber| interface to registered subscribers.
StartMetalayerInteraction(gfx.mojom.Rect region);
// Starts a new Assistant voice interaction.
StartVoiceInteraction();
// Stops the active Assistant interaction. If there is no active interaction,
// this method is a no-op.
StopActiveInteraction();
// Send text query to assistant. Result will be returned through registered
// |AssistantInteractionSubscriber|.
SendTextQuery(string query);
// Registers assistant interaction event subscriber. Subscribers'
// implementation is responsible for selecting events of interest.
AddAssistantInteractionSubscriber(AssistantInteractionSubscriber subscriber);
// Registers assistant notification event subscriber. Subscribers'
// implementation is responsible for selecting events of interest.
AddAssistantNotificationSubscriber(
AssistantNotificationSubscriber subscriber);
// Retrieves a notification. A voiceless interaction will be sent to server to
// retrieve the notification payload, which can trigger other assistant's
// events such as OnTextResponse to show the result in the UI. The reteived
// notification will be removed from the UI.
RetrieveNotification(AssistantNotification notification, int32 action_index);
// Dismisses a notification.
DismissNotification(AssistantNotification notification);
// Caches screen context, made up of view hierarchy and screenshot data.
// Screen context is used to provide additional context alongside text and
// voice queries, and may also be used in standalone screen context
// interactions (see StartCachedScreenContextInteraction()).
CacheScreenContext() => ();
};
// Subscribes to assistant's interaction event. These events are server driven
// in response to the user's direct interaction with the assistant. Responses
// from the assistant may contain untrusted third-party content. Subscriber
// implementations must be sure to handle the response data appropriately.
interface AssistantInteractionSubscriber {
// Assistant interaction has started. In the event of a voice interaction,
// |is_voice_interaction| will be true. This implies that the mic is open.
OnInteractionStarted(bool is_voice_interaction);
// Assistant interaction has ended with the specified |resolution|.
OnInteractionFinished(AssistantInteractionResolution resolution);
// Assistant got Html response from server.
OnHtmlResponse(string response);
// Assistant got suggestions response from server.
OnSuggestionsResponse(array<AssistantSuggestion> response);
// Assistant got text response from server.
OnTextResponse(string response);
// Assistant got open URL response from server.
OnOpenUrlResponse(url.mojom.Url url);
// Assistant speech recognition has started.
OnSpeechRecognitionStarted();
// Assistant speech recognition intermediate result is available.
OnSpeechRecognitionIntermediateResult(string high_confidence_text,
string low_confidence_text);
// Assistant speech recognition detected end of utterance.
OnSpeechRecognitionEndOfUtterance();
// Assistant speech recognition final result is available.
OnSpeechRecognitionFinalResult(string final_result);
// Assistant got an instantaneous speech level update in dB.
OnSpeechLevelUpdated(float speech_level);
// Assistant has started speaking. When TTS is started due to an error that
// occurred during the interaction, |due_to_error| is true.
OnTtsStarted(bool due_to_error);
};
// Subscribes to assistant's notification event. These events are server
// driven and pertain to push notifications.
interface AssistantNotificationSubscriber {
// Assistant got notification from server.
OnShowNotification(AssistantNotification notification);
// Notifies UI to remove the notification with the |grouping_key|.
// If |grouping_key| is empty, remove all notifications.
OnRemoveNotification(string grouping_key);
};
// Interface for browser to bind and start assistant.
interface AssistantPlatform {
// Initiates assistant and provides interfaces for assistant to call into the
// browser.
Init(Client assistant_client_interface,
DeviceActions device_actions_interface);
};
// Interface for assistant to call into client.
interface Client {
// Notifies assistant client that assistant running status has changed.
OnAssistantStatusChanged(bool running);
// Request context of current window from browser.
RequestAssistantStructure() => (
ax.mojom.AssistantExtra? extra,
ax.mojom.AssistantTree? structure);
};
// Interface for assistant to call into browser for audio input related
// functions.
interface AudioInput {
// Registers audio input observer. We assume dual-channel, 16kHz and signed
// 32-bit int interleaved format. Audio stream starts recording as soon as
// the first observer is added. And will stop recording if last observer is
// closed.
AddObserver(AudioInputObserver observer);
};
// Observer for assistant to receive audio input data.
interface AudioInputObserver {
// Notifies that audio frames are available.
// TODO(muyuanli): Use data pipe.
OnAudioInputFramesAvailable(array<int32> buffer, uint32 frame_count,
mojo_base.mojom.TimeTicks timestamp);
// Notifies that audio input stream is closed.
OnAudioInputClosed();
};
// Interface for assistant to call into browser to perform device actions.
interface DeviceActions {
// Enables or disables WiFi.
SetWifiEnabled(bool enabled);
// Enables or disables Bluetooth.
SetBluetoothEnabled(bool enabled);
};
// Enumeration of possible completions for an Assistant interaction.
enum AssistantInteractionResolution {
// Assistant interaction completed normally.
kNormal,
// Assistant interaction completed due to barge in or cancellation.
kInterruption,
// Assistant interaction completed due to error.
kError,
// Assistant interaction completed due to mic timeout.
kMicTimeout,
// Assistant interaction completed due to multi-device hotword loss.
kMultiDeviceHotwordLoss,
};
// Models an Assistant suggestion.
struct AssistantSuggestion {
// Display text. e.g. "Cancel".
string text;
// Optional URL for icon. e.g. "https://www.gstatic.com/icon.png".
url.mojom.Url icon_url;
// Optional URL for action. e.g.
// "https://www.google.com/search?query=action".
url.mojom.Url action_url;
};
// Models an Assistant notification.
struct AssistantNotification {
// Title of the notification.
string title;
// Body text of the notification.
string message;
// URL to open when the tap action is invoked on the notification.
url.mojom.Url action_url;
// An id that uniquely identifies a notification.
string notification_id;
// Used to fetch notification contents.
string consistency_token;
string opaque_token;
// Key that can be used to group multiple notifications together.
string grouping_key;
// Obfuscated Gaia id of the intended recipient of the user.
string obfuscated_gaia_id;
};