audio_service.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #ifndef AUDIO_SERVICE_H
  2. #define AUDIO_SERVICE_H
  3. #include <memory>
  4. #include <deque>
  5. #include <condition_variable>
  6. #include <chrono>
  7. #include <mutex>
  8. #include <freertos/FreeRTOS.h>
  9. #include <freertos/task.h>
  10. #include <freertos/event_groups.h>
  11. #include <esp_timer.h>
  12. #include <opus_encoder.h>
  13. #include <opus_decoder.h>
  14. #include <opus_resampler.h>
  15. #include "audio_codec.h"
  16. #include "audio_processor.h"
  17. #include "processors/audio_debugger.h"
  18. #include "wake_word.h"
  19. #include "protocol.h"
  20. /*
  21. * There are two types of audio data flow:
  22. * 1. (MIC) -> [Processors] -> {Encode Queue} -> [Opus Encoder] -> {Send Queue} -> (Server)
  23. * 2. (Server) -> {Decode Queue} -> [Opus Decoder] -> {Playback Queue} -> (Speaker)
  24. *
  25. * We use one task for MIC / Speaker / Processors, and one task for Opus Encoder / Opus Decoder.
  26. *
  27. * Decode Queue and Send Queue are the main queues, because Opus packets are quite smaller than PCM packets.
  28. *
  29. */
  30. #define OPUS_FRAME_DURATION_MS 60
  31. #define MAX_ENCODE_TASKS_IN_QUEUE 2
  32. #define MAX_PLAYBACK_TASKS_IN_QUEUE 2
  33. #define MAX_DECODE_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS)
  34. #define MAX_SEND_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS)
  35. #define AUDIO_TESTING_MAX_DURATION_MS 10000
  36. #define MAX_TIMESTAMPS_IN_QUEUE 3
  37. #define AUDIO_POWER_TIMEOUT_MS 15000
  38. #define AUDIO_POWER_CHECK_INTERVAL_MS 1000
  39. #define AS_EVENT_AUDIO_TESTING_RUNNING (1 << 0)
  40. #define AS_EVENT_WAKE_WORD_RUNNING (1 << 1)
  41. #define AS_EVENT_AUDIO_PROCESSOR_RUNNING (1 << 2)
  42. #define AS_EVENT_PLAYBACK_NOT_EMPTY (1 << 3)
  43. struct AudioServiceCallbacks {
  44. std::function<void(void)> on_send_queue_available;
  45. std::function<void(const std::string&)> on_wake_word_detected;
  46. std::function<void(bool)> on_vad_change;
  47. std::function<void(void)> on_audio_testing_queue_full;
  48. };
  49. enum AudioTaskType {
  50. kAudioTaskTypeEncodeToSendQueue,
  51. kAudioTaskTypeEncodeToTestingQueue,
  52. kAudioTaskTypeDecodeToPlaybackQueue,
  53. };
  54. struct AudioTask {
  55. AudioTaskType type;
  56. std::vector<int16_t> pcm;
  57. uint32_t timestamp;
  58. };
  59. struct DebugStatistics {
  60. uint32_t input_count = 0;
  61. uint32_t decode_count = 0;
  62. uint32_t encode_count = 0;
  63. uint32_t playback_count = 0;
  64. };
  65. class AudioService {
  66. public:
  67. AudioService();
  68. ~AudioService();
  69. void Initialize(AudioCodec* codec);
  70. void Start();
  71. void Stop();
  72. void EncodeWakeWord();
  73. std::unique_ptr<AudioStreamPacket> PopWakeWordPacket();
  74. const std::string& GetLastWakeWord() const;
  75. bool IsVoiceDetected() const { return voice_detected_; }
  76. bool IsIdle();
  77. bool IsWakeWordRunning() const { return xEventGroupGetBits(event_group_) & AS_EVENT_WAKE_WORD_RUNNING; }
  78. bool IsAudioProcessorRunning() const { return xEventGroupGetBits(event_group_) & AS_EVENT_AUDIO_PROCESSOR_RUNNING; }
  79. void EnableWakeWordDetection(bool enable);
  80. void EnableVoiceProcessing(bool enable);
  81. void EnableAudioTesting(bool enable);
  82. void EnableDeviceAec(bool enable);
  83. void SetCallbacks(AudioServiceCallbacks& callbacks);
  84. bool PushPacketToDecodeQueue(std::unique_ptr<AudioStreamPacket> packet, bool wait = false);
  85. std::unique_ptr<AudioStreamPacket> PopPacketFromSendQueue();
  86. void PlaySound(const std::string_view& sound);
  87. bool ReadAudioData(std::vector<int16_t>& data, int sample_rate, int samples);
  88. void ResetDecoder();
  89. private:
  90. AudioCodec* codec_ = nullptr;
  91. AudioServiceCallbacks callbacks_;
  92. std::unique_ptr<AudioProcessor> audio_processor_;
  93. std::unique_ptr<WakeWord> wake_word_;
  94. std::unique_ptr<AudioDebugger> audio_debugger_;
  95. std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
  96. std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
  97. OpusResampler input_resampler_;
  98. OpusResampler reference_resampler_;
  99. OpusResampler output_resampler_;
  100. DebugStatistics debug_statistics_;
  101. EventGroupHandle_t event_group_;
  102. // Audio encode / decode
  103. TaskHandle_t audio_input_task_handle_ = nullptr;
  104. TaskHandle_t audio_output_task_handle_ = nullptr;
  105. TaskHandle_t opus_codec_task_handle_ = nullptr;
  106. std::mutex audio_queue_mutex_;
  107. std::condition_variable audio_queue_cv_;
  108. std::deque<std::unique_ptr<AudioStreamPacket>> audio_decode_queue_;
  109. std::deque<std::unique_ptr<AudioStreamPacket>> audio_send_queue_;
  110. std::deque<std::unique_ptr<AudioStreamPacket>> audio_testing_queue_;
  111. std::deque<std::unique_ptr<AudioTask>> audio_encode_queue_;
  112. std::deque<std::unique_ptr<AudioTask>> audio_playback_queue_;
  113. // For server AEC
  114. std::deque<uint32_t> timestamp_queue_;
  115. std::mutex timestamp_mutex_;
  116. bool wake_word_initialized_ = false;
  117. bool audio_processor_initialized_ = false;
  118. bool voice_detected_ = false;
  119. bool service_stopped_ = true;
  120. bool audio_input_need_warmup_ = false;
  121. esp_timer_handle_t audio_power_timer_ = nullptr;
  122. std::chrono::steady_clock::time_point last_input_time_;
  123. std::chrono::steady_clock::time_point last_output_time_;
  124. void AudioInputTask();
  125. void AudioOutputTask();
  126. void OpusCodecTask();
  127. void PushTaskToEncodeQueue(AudioTaskType type, std::vector<int16_t>&& pcm);
  128. void SetDecodeSampleRate(int sample_rate, int frame_duration);
  129. void CheckAndUpdateAudioPowerState();
  130. };
  131. #endif