application.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. #include "application.h"
  2. #include "board.h"
  3. #include "display.h"
  4. #include "system_info.h"
  5. #include "audio_codec.h"
  6. #include "mqtt_protocol.h"
  7. #include "websocket_protocol.h"
  8. #include "font_awesome_symbols.h"
  9. #include "assets/lang_config.h"
  10. #include "mcp_server.h"
  11. #include <cstring>
  12. #include <esp_log.h>
  13. #include <cJSON.h>
  14. #include <driver/gpio.h>
  15. #include <arpa/inet.h>
  16. #define TAG "Application"
  17. static const char* const STATE_STRINGS[] = {
  18. "unknown",
  19. "starting",
  20. "configuring",
  21. "idle",
  22. "connecting",
  23. "listening",
  24. "speaking",
  25. "upgrading",
  26. "activating",
  27. "audio_testing",
  28. "fatal_error",
  29. "invalid_state"
  30. };
  31. Application::Application() {
  32. event_group_ = xEventGroupCreate();
  33. #if CONFIG_USE_DEVICE_AEC && CONFIG_USE_SERVER_AEC
  34. #error "CONFIG_USE_DEVICE_AEC and CONFIG_USE_SERVER_AEC cannot be enabled at the same time"
  35. #elif CONFIG_USE_DEVICE_AEC
  36. aec_mode_ = kAecOnDeviceSide;
  37. #elif CONFIG_USE_SERVER_AEC
  38. aec_mode_ = kAecOnServerSide;
  39. #else
  40. aec_mode_ = kAecOff;
  41. #endif
  42. esp_timer_create_args_t clock_timer_args = {
  43. .callback = [](void* arg) {
  44. Application* app = (Application*)arg;
  45. app->OnClockTimer();
  46. },
  47. .arg = this,
  48. .dispatch_method = ESP_TIMER_TASK,
  49. .name = "clock_timer",
  50. .skip_unhandled_events = true
  51. };
  52. esp_timer_create(&clock_timer_args, &clock_timer_handle_);
  53. }
  54. Application::~Application() {
  55. if (clock_timer_handle_ != nullptr) {
  56. esp_timer_stop(clock_timer_handle_);
  57. esp_timer_delete(clock_timer_handle_);
  58. }
  59. vEventGroupDelete(event_group_);
  60. }
  61. void Application::CheckNewVersion(Ota& ota) {
  62. const int MAX_RETRY = 10;
  63. int retry_count = 0;
  64. int retry_delay = 10; // 初始重试延迟为10秒
  65. auto& board = Board::GetInstance();
  66. while (true) {
  67. SetDeviceState(kDeviceStateActivating);
  68. auto display = board.GetDisplay();
  69. display->SetStatus(Lang::Strings::CHECKING_NEW_VERSION);
  70. if (!ota.CheckVersion()) {
  71. retry_count++;
  72. if (retry_count >= MAX_RETRY) {
  73. ESP_LOGE(TAG, "Too many retries, exit version check");
  74. return;
  75. }
  76. char buffer[128];
  77. snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, ota.GetCheckVersionUrl().c_str());
  78. Alert(Lang::Strings::ERROR, buffer, "sad", Lang::Sounds::P3_EXCLAMATION);
  79. ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY);
  80. for (int i = 0; i < retry_delay; i++) {
  81. vTaskDelay(pdMS_TO_TICKS(1000));
  82. if (device_state_ == kDeviceStateIdle) {
  83. break;
  84. }
  85. }
  86. retry_delay *= 2; // 每次重试后延迟时间翻倍
  87. continue;
  88. }
  89. retry_count = 0;
  90. retry_delay = 10; // 重置重试延迟时间
  91. if (ota.HasNewVersion()) {
  92. Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "happy", Lang::Sounds::P3_UPGRADE);
  93. vTaskDelay(pdMS_TO_TICKS(3000));
  94. SetDeviceState(kDeviceStateUpgrading);
  95. display->SetIcon(FONT_AWESOME_DOWNLOAD);
  96. std::string message = std::string(Lang::Strings::NEW_VERSION) + ota.GetFirmwareVersion();
  97. display->SetChatMessage("system", message.c_str());
  98. board.SetPowerSaveMode(false);
  99. audio_service_.Stop();
  100. vTaskDelay(pdMS_TO_TICKS(1000));
  101. bool upgrade_success = ota.StartUpgrade([display](int progress, size_t speed) {
  102. char buffer[64];
  103. snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
  104. display->SetChatMessage("system", buffer);
  105. });
  106. if (!upgrade_success) {
  107. // Upgrade failed, restart audio service and continue running
  108. ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
  109. audio_service_.Start(); // Restart audio service
  110. board.SetPowerSaveMode(true); // Restore power save mode
  111. Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "sad", Lang::Sounds::P3_EXCLAMATION);
  112. vTaskDelay(pdMS_TO_TICKS(3000));
  113. // Continue to normal operation (don't break, just fall through)
  114. } else {
  115. // Upgrade success, reboot immediately
  116. ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
  117. display->SetChatMessage("system", "Upgrade successful, rebooting...");
  118. vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
  119. Reboot();
  120. return; // This line will never be reached after reboot
  121. }
  122. }
  123. // No new version, mark the current version as valid
  124. ota.MarkCurrentVersionValid();
  125. if (!ota.HasActivationCode() && !ota.HasActivationChallenge()) {
  126. xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
  127. // Exit the loop if done checking new version
  128. break;
  129. }
  130. display->SetStatus(Lang::Strings::ACTIVATION);
  131. // Activation code is shown to the user and waiting for the user to input
  132. if (ota.HasActivationCode()) {
  133. ShowActivationCode(ota.GetActivationCode(), ota.GetActivationMessage());
  134. }
  135. // This will block the loop until the activation is done or timeout
  136. for (int i = 0; i < 10; ++i) {
  137. ESP_LOGI(TAG, "Activating... %d/%d", i + 1, 10);
  138. esp_err_t err = ota.Activate();
  139. if (err == ESP_OK) {
  140. xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
  141. break;
  142. } else if (err == ESP_ERR_TIMEOUT) {
  143. vTaskDelay(pdMS_TO_TICKS(3000));
  144. } else {
  145. vTaskDelay(pdMS_TO_TICKS(10000));
  146. }
  147. if (device_state_ == kDeviceStateIdle) {
  148. break;
  149. }
  150. }
  151. }
  152. }
  153. void Application::ShowActivationCode(const std::string& code, const std::string& message) {
  154. #if 0
  155. struct digit_sound {
  156. char digit;
  157. const std::string_view& sound;
  158. };
  159. static const std::array<digit_sound, 10> digit_sounds{{
  160. digit_sound{'0', Lang::Sounds::P3_0},
  161. digit_sound{'1', Lang::Sounds::P3_1},
  162. digit_sound{'2', Lang::Sounds::P3_2},
  163. digit_sound{'3', Lang::Sounds::P3_3},
  164. digit_sound{'4', Lang::Sounds::P3_4},
  165. digit_sound{'5', Lang::Sounds::P3_5},
  166. digit_sound{'6', Lang::Sounds::P3_6},
  167. digit_sound{'7', Lang::Sounds::P3_7},
  168. digit_sound{'8', Lang::Sounds::P3_8},
  169. digit_sound{'9', Lang::Sounds::P3_9}
  170. }};
  171. #endif
  172. // This sentence uses 9KB of SRAM, so we need to wait for it to finish
  173. Alert(Lang::Strings::ACTIVATION, message.c_str(), "happy", Lang::Sounds::P3_ACTIVATION);
  174. #if 0
  175. for (const auto& digit : code) {
  176. auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
  177. [digit](const digit_sound& ds) { return ds.digit == digit; });
  178. if (it != digit_sounds.end()) {
  179. audio_service_.PlaySound(it->sound);
  180. }
  181. }
  182. #endif
  183. }
  184. void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) {
  185. ESP_LOGW(TAG, "Alert %s: %s [%s]", status, message, emotion);
  186. auto display = Board::GetInstance().GetDisplay();
  187. display->SetStatus(status);
  188. display->SetEmotion(emotion);
  189. display->SetChatMessage("system", message);
  190. if (!sound.empty()) {
  191. audio_service_.PlaySound(sound);
  192. }
  193. }
  194. void Application::DismissAlert() {
  195. if (device_state_ == kDeviceStateIdle) {
  196. auto display = Board::GetInstance().GetDisplay();
  197. display->SetStatus(Lang::Strings::STANDBY);
  198. display->SetEmotion("neutral");
  199. display->SetChatMessage("system", "");
  200. }
  201. }
  202. void Application::ToggleChatState() {
  203. if (device_state_ == kDeviceStateActivating) {
  204. SetDeviceState(kDeviceStateIdle);
  205. return;
  206. } else if (device_state_ == kDeviceStateWifiConfiguring) {
  207. audio_service_.EnableAudioTesting(true);
  208. SetDeviceState(kDeviceStateAudioTesting);
  209. return;
  210. } else if (device_state_ == kDeviceStateAudioTesting) {
  211. audio_service_.EnableAudioTesting(false);
  212. SetDeviceState(kDeviceStateWifiConfiguring);
  213. return;
  214. }
  215. if (!protocol_) {
  216. ESP_LOGE(TAG, "Protocol not initialized");
  217. return;
  218. }
  219. if (device_state_ == kDeviceStateIdle) {
  220. Schedule([this]() {
  221. if (!protocol_->IsAudioChannelOpened()) {
  222. SetDeviceState(kDeviceStateConnecting);
  223. if (!protocol_->OpenAudioChannel()) {
  224. return;
  225. }
  226. }
  227. SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
  228. });
  229. } else if (device_state_ == kDeviceStateSpeaking) {
  230. Schedule([this]() {
  231. AbortSpeaking(kAbortReasonNone);
  232. });
  233. } else if (device_state_ == kDeviceStateListening) {
  234. Schedule([this]() {
  235. protocol_->CloseAudioChannel();
  236. });
  237. }
  238. }
  239. void Application::StartListening() {
  240. if (device_state_ == kDeviceStateActivating) {
  241. SetDeviceState(kDeviceStateIdle);
  242. return;
  243. } else if (device_state_ == kDeviceStateWifiConfiguring) {
  244. audio_service_.EnableAudioTesting(true);
  245. SetDeviceState(kDeviceStateAudioTesting);
  246. return;
  247. }
  248. if (!protocol_) {
  249. ESP_LOGE(TAG, "Protocol not initialized");
  250. return;
  251. }
  252. if (device_state_ == kDeviceStateIdle) {
  253. Schedule([this]() {
  254. if (!protocol_->IsAudioChannelOpened()) {
  255. SetDeviceState(kDeviceStateConnecting);
  256. if (!protocol_->OpenAudioChannel()) {
  257. return;
  258. }
  259. }
  260. SetListeningMode(kListeningModeManualStop);
  261. });
  262. } else if (device_state_ == kDeviceStateSpeaking) {
  263. Schedule([this]() {
  264. AbortSpeaking(kAbortReasonNone);
  265. SetListeningMode(kListeningModeManualStop);
  266. });
  267. }
  268. }
  269. void Application::StopListening() {
  270. if (device_state_ == kDeviceStateAudioTesting) {
  271. audio_service_.EnableAudioTesting(false);
  272. SetDeviceState(kDeviceStateWifiConfiguring);
  273. return;
  274. }
  275. const std::array<int, 3> valid_states = {
  276. kDeviceStateListening,
  277. kDeviceStateSpeaking,
  278. kDeviceStateIdle,
  279. };
  280. // If not valid, do nothing
  281. if (std::find(valid_states.begin(), valid_states.end(), device_state_) == valid_states.end()) {
  282. return;
  283. }
  284. Schedule([this]() {
  285. if (device_state_ == kDeviceStateListening) {
  286. protocol_->SendStopListening();
  287. SetDeviceState(kDeviceStateIdle);
  288. }
  289. });
  290. }
  291. void Application::Start() {
  292. auto& board = Board::GetInstance();
  293. SetDeviceState(kDeviceStateStarting);
  294. /* Setup the display */
  295. auto display = board.GetDisplay();
  296. /* Setup the audio service */
  297. auto codec = board.GetAudioCodec();
  298. audio_service_.Initialize(codec);
  299. audio_service_.Start();
  300. AudioServiceCallbacks callbacks;
  301. callbacks.on_send_queue_available = [this]() {
  302. xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);
  303. };
  304. callbacks.on_wake_word_detected = [this](const std::string& wake_word) {
  305. xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
  306. };
  307. callbacks.on_vad_change = [this](bool speaking) {
  308. xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
  309. };
  310. audio_service_.SetCallbacks(callbacks);
  311. /* Start the clock timer to update the status bar */
  312. esp_timer_start_periodic(clock_timer_handle_, 1000000);
  313. /* Wait for the network to be ready */
  314. board.StartNetwork();
  315. // Update the status bar immediately to show the network state
  316. display->UpdateStatusBar(true);
  317. // Check for new firmware version or get the MQTT broker address
  318. Ota ota;
  319. CheckNewVersion(ota);
  320. // Initialize the protocol
  321. display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
  322. // Add MCP common tools before initializing the protocol
  323. McpServer::GetInstance().AddCommonTools();
  324. if (ota.HasMqttConfig()) {
  325. protocol_ = std::make_unique<MqttProtocol>();
  326. } else if (ota.HasWebsocketConfig()) {
  327. protocol_ = std::make_unique<WebsocketProtocol>();
  328. } else {
  329. ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
  330. protocol_ = std::make_unique<MqttProtocol>();
  331. }
  332. protocol_->OnNetworkError([this](const std::string& message) {
  333. last_error_message_ = message;
  334. xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
  335. });
  336. protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
  337. if (device_state_ == kDeviceStateSpeaking) {
  338. audio_service_.PushPacketToDecodeQueue(std::move(packet));
  339. }
  340. });
  341. protocol_->OnAudioChannelOpened([this, codec, &board]() {
  342. board.SetPowerSaveMode(false);
  343. if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
  344. ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
  345. protocol_->server_sample_rate(), codec->output_sample_rate());
  346. }
  347. });
  348. protocol_->OnAudioChannelClosed([this, &board]() {
  349. board.SetPowerSaveMode(true);
  350. Schedule([this]() {
  351. auto display = Board::GetInstance().GetDisplay();
  352. display->SetChatMessage("system", "");
  353. SetDeviceState(kDeviceStateIdle);
  354. });
  355. });
  356. protocol_->OnIncomingJson([this, display](const cJSON* root) {
  357. // Parse JSON data
  358. auto type = cJSON_GetObjectItem(root, "type");
  359. if (strcmp(type->valuestring, "tts") == 0) {
  360. auto state = cJSON_GetObjectItem(root, "state");
  361. if (strcmp(state->valuestring, "start") == 0) {
  362. Schedule([this]() {
  363. aborted_ = false;
  364. if (device_state_ == kDeviceStateIdle || device_state_ == kDeviceStateListening) {
  365. SetDeviceState(kDeviceStateSpeaking);
  366. }
  367. });
  368. } else if (strcmp(state->valuestring, "stop") == 0) {
  369. Schedule([this]() {
  370. if (device_state_ == kDeviceStateSpeaking) {
  371. if (listening_mode_ == kListeningModeManualStop) {
  372. SetDeviceState(kDeviceStateIdle);
  373. } else {
  374. SetDeviceState(kDeviceStateListening);
  375. }
  376. }
  377. });
  378. } else if (strcmp(state->valuestring, "sentence_start") == 0) {
  379. auto text = cJSON_GetObjectItem(root, "text");
  380. if (cJSON_IsString(text)) {
  381. ESP_LOGI(TAG, "<< %s", text->valuestring);
  382. Schedule([this, display, message = std::string(text->valuestring)]() {
  383. display->SetChatMessage("assistant", message.c_str());
  384. });
  385. }
  386. }
  387. } else if (strcmp(type->valuestring, "stt") == 0) {
  388. auto text = cJSON_GetObjectItem(root, "text");
  389. if (cJSON_IsString(text)) {
  390. ESP_LOGI(TAG, ">> %s", text->valuestring);
  391. Schedule([this, display, message = std::string(text->valuestring)]() {
  392. display->SetChatMessage("user", message.c_str());
  393. });
  394. }
  395. } else if (strcmp(type->valuestring, "llm") == 0) {
  396. auto emotion = cJSON_GetObjectItem(root, "emotion");
  397. if (cJSON_IsString(emotion)) {
  398. Schedule([this, display, emotion_str = std::string(emotion->valuestring)]() {
  399. display->SetEmotion(emotion_str.c_str());
  400. });
  401. }
  402. } else if (strcmp(type->valuestring, "mcp") == 0) {
  403. auto payload = cJSON_GetObjectItem(root, "payload");
  404. if (cJSON_IsObject(payload)) {
  405. McpServer::GetInstance().ParseMessage(payload);
  406. }
  407. } else if (strcmp(type->valuestring, "system") == 0) {
  408. auto command = cJSON_GetObjectItem(root, "command");
  409. if (cJSON_IsString(command)) {
  410. ESP_LOGI(TAG, "System command: %s", command->valuestring);
  411. if (strcmp(command->valuestring, "reboot") == 0) {
  412. // Do a reboot if user requests a OTA update
  413. Schedule([this]() {
  414. Reboot();
  415. });
  416. } else {
  417. ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
  418. }
  419. }
  420. } else if (strcmp(type->valuestring, "alert") == 0) {
  421. auto status = cJSON_GetObjectItem(root, "status");
  422. auto message = cJSON_GetObjectItem(root, "message");
  423. auto emotion = cJSON_GetObjectItem(root, "emotion");
  424. if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
  425. Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::P3_VIBRATION);
  426. } else {
  427. ESP_LOGW(TAG, "Alert command requires status, message and emotion");
  428. }
  429. #if CONFIG_RECEIVE_CUSTOM_MESSAGE
  430. } else if (strcmp(type->valuestring, "custom") == 0) {
  431. auto payload = cJSON_GetObjectItem(root, "payload");
  432. ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
  433. if (cJSON_IsObject(payload)) {
  434. Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
  435. display->SetChatMessage("system", payload_str.c_str());
  436. });
  437. } else {
  438. ESP_LOGW(TAG, "Invalid custom message format: missing payload");
  439. }
  440. #endif
  441. } else {
  442. ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
  443. }
  444. });
  445. bool protocol_started = protocol_->Start();
  446. SetDeviceState(kDeviceStateIdle);
  447. has_server_time_ = ota.HasServerTime();
  448. if (protocol_started) {
  449. std::string message = std::string(Lang::Strings::VERSION) + ota.GetCurrentVersion();
  450. display->ShowNotification(message.c_str());
  451. display->SetChatMessage("system", "");
  452. // Play the success sound to indicate the device is ready
  453. audio_service_.PlaySound(Lang::Sounds::P3_SUCCESS);
  454. }
  455. // Print heap stats
  456. SystemInfo::PrintHeapStats();
  457. // Enter the main event loop
  458. MainEventLoop();
  459. }
  460. void Application::OnClockTimer() {
  461. clock_ticks_++;
  462. auto display = Board::GetInstance().GetDisplay();
  463. display->UpdateStatusBar();
  464. // Print the debug info every 10 seconds
  465. if (clock_ticks_ % 10 == 0) {
  466. // SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
  467. // SystemInfo::PrintTaskList();
  468. SystemInfo::PrintHeapStats();
  469. if (Board::GetInstance().internetConnet==1 && clock_ticks_%50==0)
  470. {
  471. Board::GetInstance().postAlive();
  472. }
  473. }
  474. }
  475. // Add a async task to MainLoop
  476. void Application::Schedule(std::function<void()> callback) {
  477. {
  478. std::lock_guard<std::mutex> lock(mutex_);
  479. main_tasks_.push_back(std::move(callback));
  480. }
  481. xEventGroupSetBits(event_group_, MAIN_EVENT_SCHEDULE);
  482. }
  483. // The Main Event Loop controls the chat state and websocket connection
  484. // If other tasks need to access the websocket or chat state,
  485. // they should use Schedule to call this function
  486. void Application::MainEventLoop() {
  487. // Raise the priority of the main event loop to avoid being interrupted by background tasks (which has priority 2)
  488. vTaskPrioritySet(NULL, 3);
  489. while (true) {
  490. auto bits = xEventGroupWaitBits(event_group_, MAIN_EVENT_SCHEDULE |
  491. MAIN_EVENT_SEND_AUDIO |
  492. MAIN_EVENT_WAKE_WORD_DETECTED |
  493. MAIN_EVENT_VAD_CHANGE |
  494. MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
  495. if (bits & MAIN_EVENT_ERROR) {
  496. SetDeviceState(kDeviceStateIdle);
  497. Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
  498. }
  499. if (bits & MAIN_EVENT_SEND_AUDIO) {
  500. while (auto packet = audio_service_.PopPacketFromSendQueue()) {
  501. if (!protocol_->SendAudio(std::move(packet))) {
  502. break;
  503. }
  504. }
  505. }
  506. if (bits & MAIN_EVENT_WAKE_WORD_DETECTED) {
  507. OnWakeWordDetected();
  508. }
  509. if (bits & MAIN_EVENT_VAD_CHANGE) {
  510. if (device_state_ == kDeviceStateListening) {
  511. auto led = Board::GetInstance().GetLed();
  512. led->OnStateChanged();
  513. }
  514. }
  515. if (bits & MAIN_EVENT_SCHEDULE) {
  516. std::unique_lock<std::mutex> lock(mutex_);
  517. auto tasks = std::move(main_tasks_);
  518. lock.unlock();
  519. for (auto& task : tasks) {
  520. task();
  521. }
  522. }
  523. }
  524. }
  525. void Application::OnWakeWordDetected() {
  526. if (!protocol_) {
  527. return;
  528. }
  529. if (device_state_ == kDeviceStateIdle) {
  530. audio_service_.EncodeWakeWord();
  531. if (!protocol_->IsAudioChannelOpened()) {
  532. SetDeviceState(kDeviceStateConnecting);
  533. if (!protocol_->OpenAudioChannel()) {
  534. audio_service_.EnableWakeWordDetection(true);
  535. return;
  536. }
  537. }
  538. auto wake_word = audio_service_.GetLastWakeWord();
  539. ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
  540. #if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
  541. // Encode and send the wake word data to the server
  542. while (auto packet = audio_service_.PopWakeWordPacket()) {
  543. protocol_->SendAudio(std::move(packet));
  544. }
  545. // Set the chat state to wake word detected
  546. protocol_->SendWakeWordDetected(wake_word);
  547. SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
  548. #else
  549. SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
  550. // Play the pop up sound to indicate the wake word is detected
  551. audio_service_.PlaySound(Lang::Sounds::P3_POPUP);
  552. #endif
  553. } else if (device_state_ == kDeviceStateSpeaking) {
  554. AbortSpeaking(kAbortReasonWakeWordDetected);
  555. } else if (device_state_ == kDeviceStateActivating) {
  556. SetDeviceState(kDeviceStateIdle);
  557. }
  558. }
  559. void Application::AbortSpeaking(AbortReason reason) {
  560. ESP_LOGI(TAG, "Abort speaking");
  561. aborted_ = true;
  562. protocol_->SendAbortSpeaking(reason);
  563. }
  564. void Application::SetListeningMode(ListeningMode mode) {
  565. listening_mode_ = mode;
  566. SetDeviceState(kDeviceStateListening);
  567. }
  568. void Application::SetDeviceState(DeviceState state) {
  569. if (device_state_ == state) {
  570. return;
  571. }
  572. clock_ticks_ = 0;
  573. auto previous_state = device_state_;
  574. device_state_ = state;
  575. ESP_LOGI(TAG, "STATE: %s", STATE_STRINGS[device_state_]);
  576. // Send the state change event
  577. DeviceStateEventManager::GetInstance().PostStateChangeEvent(previous_state, state);
  578. auto& board = Board::GetInstance();
  579. auto display = board.GetDisplay();
  580. auto led = board.GetLed();
  581. led->OnStateChanged();
  582. switch (state) {
  583. case kDeviceStateUnknown:
  584. case kDeviceStateIdle:
  585. display->SetStatus(Lang::Strings::STANDBY);
  586. display->SetEmotion("neutral");
  587. audio_service_.EnableVoiceProcessing(false);
  588. audio_service_.EnableWakeWordDetection(true);
  589. break;
  590. case kDeviceStateConnecting:
  591. display->SetStatus(Lang::Strings::CONNECTING);
  592. display->SetEmotion("neutral");
  593. display->SetChatMessage("system", "");
  594. break;
  595. case kDeviceStateListening:
  596. display->SetStatus(Lang::Strings::LISTENING);
  597. display->SetEmotion("neutral");
  598. // Make sure the audio processor is running
  599. if (!audio_service_.IsAudioProcessorRunning()) {
  600. // Send the start listening command
  601. protocol_->SendStartListening(listening_mode_);
  602. audio_service_.EnableVoiceProcessing(true);
  603. audio_service_.EnableWakeWordDetection(false);
  604. }
  605. break;
  606. case kDeviceStateSpeaking:
  607. display->SetStatus(Lang::Strings::SPEAKING);
  608. if (listening_mode_ != kListeningModeRealtime) {
  609. audio_service_.EnableVoiceProcessing(false);
  610. // Only AFE wake word can be detected in speaking mode
  611. #if CONFIG_USE_AFE_WAKE_WORD
  612. audio_service_.EnableWakeWordDetection(true);
  613. #else
  614. audio_service_.EnableWakeWordDetection(false);
  615. #endif
  616. }
  617. audio_service_.ResetDecoder();
  618. break;
  619. default:
  620. // Do nothing
  621. break;
  622. }
  623. }
  624. void Application::Reboot() {
  625. ESP_LOGI(TAG, "Rebooting...");
  626. esp_restart();
  627. }
  628. void Application::WakeWordInvoke(const std::string& wake_word) {
  629. if (device_state_ == kDeviceStateIdle) {
  630. ToggleChatState();
  631. Schedule([this, wake_word]() {
  632. if (protocol_) {
  633. protocol_->SendWakeWordDetected(wake_word);
  634. }
  635. });
  636. } else if (device_state_ == kDeviceStateSpeaking) {
  637. Schedule([this]() {
  638. AbortSpeaking(kAbortReasonNone);
  639. });
  640. } else if (device_state_ == kDeviceStateListening) {
  641. Schedule([this]() {
  642. if (protocol_) {
  643. protocol_->CloseAudioChannel();
  644. }
  645. });
  646. }
  647. }
  648. bool Application::CanEnterSleepMode() {
  649. if (device_state_ != kDeviceStateIdle) {
  650. return false;
  651. }
  652. if (protocol_ && protocol_->IsAudioChannelOpened()) {
  653. return false;
  654. }
  655. if (!audio_service_.IsIdle()) {
  656. return false;
  657. }
  658. // Now it is safe to enter sleep mode
  659. return true;
  660. }
  661. void Application::SendMcpMessage(const std::string& payload) {
  662. Schedule([this, payload]() {
  663. if (protocol_) {
  664. protocol_->SendMcpMessage(payload);
  665. }
  666. });
  667. }
  668. void Application::SetAecMode(AecMode mode) {
  669. aec_mode_ = mode;
  670. Schedule([this]() {
  671. auto& board = Board::GetInstance();
  672. auto display = board.GetDisplay();
  673. switch (aec_mode_) {
  674. case kAecOff:
  675. audio_service_.EnableDeviceAec(false);
  676. display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
  677. break;
  678. case kAecOnServerSide:
  679. audio_service_.EnableDeviceAec(false);
  680. display->ShowNotification(Lang::Strings::RTC_MODE_ON);
  681. break;
  682. case kAecOnDeviceSide:
  683. audio_service_.EnableDeviceAec(true);
  684. display->ShowNotification(Lang::Strings::RTC_MODE_ON);
  685. break;
  686. }
  687. // If the AEC mode is changed, close the audio channel
  688. if (protocol_ && protocol_->IsAudioChannelOpened()) {
  689. protocol_->CloseAudioChannel();
  690. }
  691. });
  692. }
  693. void Application::PlaySound(const std::string_view& sound) {
  694. audio_service_.PlaySound(sound);
  695. }