#ifndef AUDIO_SERVICE_H #define AUDIO_SERVICE_H #include #include #include #include #include #include #include #include #include #include #include #include #include "audio_codec.h" #include "audio_processor.h" #include "processors/audio_debugger.h" #include "wake_word.h" #include "protocol.h" /* * There are two types of audio data flow: * 1. (MIC) -> [Processors] -> {Encode Queue} -> [Opus Encoder] -> {Send Queue} -> (Server) * 2. (Server) -> {Decode Queue} -> [Opus Decoder] -> {Playback Queue} -> (Speaker) * * We use one task for MIC / Speaker / Processors, and one task for Opus Encoder / Opus Decoder. * * Decode Queue and Send Queue are the main queues, because Opus packets are quite smaller than PCM packets. * */ #define OPUS_FRAME_DURATION_MS 60 #define MAX_ENCODE_TASKS_IN_QUEUE 2 #define MAX_PLAYBACK_TASKS_IN_QUEUE 2 #define MAX_DECODE_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS) #define MAX_SEND_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS) #define AUDIO_TESTING_MAX_DURATION_MS 10000 #define MAX_TIMESTAMPS_IN_QUEUE 3 #define AUDIO_POWER_TIMEOUT_MS 15000 #define AUDIO_POWER_CHECK_INTERVAL_MS 1000 #define AS_EVENT_AUDIO_TESTING_RUNNING (1 << 0) #define AS_EVENT_WAKE_WORD_RUNNING (1 << 1) #define AS_EVENT_AUDIO_PROCESSOR_RUNNING (1 << 2) #define AS_EVENT_PLAYBACK_NOT_EMPTY (1 << 3) struct AudioServiceCallbacks { std::function on_send_queue_available; std::function on_wake_word_detected; std::function on_vad_change; std::function on_audio_testing_queue_full; }; enum AudioTaskType { kAudioTaskTypeEncodeToSendQueue, kAudioTaskTypeEncodeToTestingQueue, kAudioTaskTypeDecodeToPlaybackQueue, }; struct AudioTask { AudioTaskType type; std::vector pcm; uint32_t timestamp; }; struct DebugStatistics { uint32_t input_count = 0; uint32_t decode_count = 0; uint32_t encode_count = 0; uint32_t playback_count = 0; }; class AudioService { public: AudioService(); ~AudioService(); void Initialize(AudioCodec* codec); void Start(); void Stop(); void EncodeWakeWord(); std::unique_ptr PopWakeWordPacket(); const std::string& GetLastWakeWord() const; bool IsVoiceDetected() const { return voice_detected_; } bool IsIdle(); bool IsWakeWordRunning() const { return xEventGroupGetBits(event_group_) & AS_EVENT_WAKE_WORD_RUNNING; } bool IsAudioProcessorRunning() const { return xEventGroupGetBits(event_group_) & AS_EVENT_AUDIO_PROCESSOR_RUNNING; } void EnableWakeWordDetection(bool enable); void EnableVoiceProcessing(bool enable); void EnableAudioTesting(bool enable); void EnableDeviceAec(bool enable); void SetCallbacks(AudioServiceCallbacks& callbacks); bool PushPacketToDecodeQueue(std::unique_ptr packet, bool wait = false); std::unique_ptr PopPacketFromSendQueue(); void PlaySound(const std::string_view& sound); bool ReadAudioData(std::vector& data, int sample_rate, int samples); void ResetDecoder(); private: AudioCodec* codec_ = nullptr; AudioServiceCallbacks callbacks_; std::unique_ptr audio_processor_; std::unique_ptr wake_word_; std::unique_ptr audio_debugger_; std::unique_ptr opus_encoder_; std::unique_ptr opus_decoder_; OpusResampler input_resampler_; OpusResampler reference_resampler_; OpusResampler output_resampler_; DebugStatistics debug_statistics_; EventGroupHandle_t event_group_; // Audio encode / decode TaskHandle_t audio_input_task_handle_ = nullptr; TaskHandle_t audio_output_task_handle_ = nullptr; TaskHandle_t opus_codec_task_handle_ = nullptr; std::mutex audio_queue_mutex_; std::condition_variable audio_queue_cv_; std::deque> audio_decode_queue_; std::deque> audio_send_queue_; std::deque> audio_testing_queue_; std::deque> audio_encode_queue_; std::deque> audio_playback_queue_; // For server AEC std::deque timestamp_queue_; std::mutex timestamp_mutex_; bool wake_word_initialized_ = false; bool audio_processor_initialized_ = false; bool voice_detected_ = false; bool service_stopped_ = true; bool audio_input_need_warmup_ = false; esp_timer_handle_t audio_power_timer_ = nullptr; std::chrono::steady_clock::time_point last_input_time_; std::chrono::steady_clock::time_point last_output_time_; void AudioInputTask(); void AudioOutputTask(); void OpusCodecTask(); void PushTaskToEncodeQueue(AudioTaskType type, std::vector&& pcm); void SetDecodeSampleRate(int sample_rate, int frame_duration); void CheckAndUpdateAudioPowerState(); }; #endif