mcp_server.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /*
  2. * MCP Server Implementation
  3. * Reference: https://modelcontextprotocol.io/specification/2024-11-05
  4. */
  5. #include "mcp_server.h"
  6. #include <esp_log.h>
  7. #include <esp_app_desc.h>
  8. #include <algorithm>
  9. #include <cstring>
  10. #include <esp_pthread.h>
  11. #include "application.h"
  12. #include "display.h"
  13. #include "board.h"
  14. #define TAG "MCP"
  15. #define DEFAULT_TOOLCALL_STACK_SIZE 6144
  16. McpServer::McpServer() {
  17. }
  18. McpServer::~McpServer() {
  19. for (auto tool : tools_) {
  20. delete tool;
  21. }
  22. tools_.clear();
  23. }
  24. void McpServer::AddCommonTools() {
  25. // To speed up the response time, we add the common tools to the beginning of
  26. // the tools list to utilize the prompt cache.
  27. // Backup the original tools list and restore it after adding the common tools.
  28. auto original_tools = std::move(tools_);
  29. auto& board = Board::GetInstance();
  30. AddTool("self.get_device_status",
  31. "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\n"
  32. "Use this tool for: \n"
  33. "1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n"
  34. "2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
  35. PropertyList(),
  36. [&board](const PropertyList& properties) -> ReturnValue {
  37. return board.GetDeviceStatusJson();
  38. });
  39. AddTool("self.audio_speaker.set_volume",
  40. "Set the volume of the audio speaker. If the current volume is unknown, you must call `self.get_device_status` tool first and then call this tool.",
  41. PropertyList({
  42. Property("volume", kPropertyTypeInteger, 0, 100)
  43. }),
  44. [&board](const PropertyList& properties) -> ReturnValue {
  45. auto codec = board.GetAudioCodec();
  46. codec->SetOutputVolume(properties["volume"].value<int>());
  47. return true;
  48. });
  49. auto backlight = board.GetBacklight();
  50. if (backlight) {
  51. AddTool("self.screen.set_brightness",
  52. "Set the brightness of the screen.",
  53. PropertyList({
  54. Property("brightness", kPropertyTypeInteger, 0, 100)
  55. }),
  56. [backlight](const PropertyList& properties) -> ReturnValue {
  57. uint8_t brightness = static_cast<uint8_t>(properties["brightness"].value<int>());
  58. backlight->SetBrightness(brightness, true);
  59. return true;
  60. });
  61. }
  62. auto display = board.GetDisplay();
  63. if (display && !display->GetTheme().empty()) {
  64. AddTool("self.screen.set_theme",
  65. "Set the theme of the screen. The theme can be `light` or `dark`.",
  66. PropertyList({
  67. Property("theme", kPropertyTypeString)
  68. }),
  69. [display](const PropertyList& properties) -> ReturnValue {
  70. display->SetTheme(properties["theme"].value<std::string>().c_str());
  71. return true;
  72. });
  73. }
  74. auto camera = board.GetCamera();
  75. if (camera) {
  76. AddTool("self.camera.take_photo",
  77. "Take a photo and explain it. Use this tool after the user asks you to see something.\n"
  78. "Args:\n"
  79. " `question`: The question that you want to ask about the photo.\n"
  80. "Return:\n"
  81. " A JSON object that provides the photo information.",
  82. PropertyList({
  83. Property("question", kPropertyTypeString)
  84. }),
  85. [camera](const PropertyList& properties) -> ReturnValue {
  86. if (!camera->Capture()) {
  87. return "{\"success\": false, \"message\": \"Failed to capture photo\"}";
  88. }
  89. auto question = properties["question"].value<std::string>();
  90. return camera->Explain(question);
  91. });
  92. }
  93. // Restore the original tools list to the end of the tools list
  94. tools_.insert(tools_.end(), original_tools.begin(), original_tools.end());
  95. }
  96. void McpServer::AddTool(McpTool* tool) {
  97. // Prevent adding duplicate tools
  98. if (std::find_if(tools_.begin(), tools_.end(), [tool](const McpTool* t) { return t->name() == tool->name(); }) != tools_.end()) {
  99. ESP_LOGW(TAG, "Tool %s already added", tool->name().c_str());
  100. return;
  101. }
  102. ESP_LOGI(TAG, "Add tool: %s", tool->name().c_str());
  103. tools_.push_back(tool);
  104. }
  105. void McpServer::AddTool(const std::string& name, const std::string& description, const PropertyList& properties, std::function<ReturnValue(const PropertyList&)> callback) {
  106. AddTool(new McpTool(name, description, properties, callback));
  107. }
  108. void McpServer::ParseMessage(const std::string& message) {
  109. cJSON* json = cJSON_Parse(message.c_str());
  110. if (json == nullptr) {
  111. ESP_LOGE(TAG, "Failed to parse MCP message: %s", message.c_str());
  112. return;
  113. }
  114. ParseMessage(json);
  115. cJSON_Delete(json);
  116. }
  117. void McpServer::ParseCapabilities(const cJSON* capabilities) {
  118. auto vision = cJSON_GetObjectItem(capabilities, "vision");
  119. if (cJSON_IsObject(vision)) {
  120. auto url = cJSON_GetObjectItem(vision, "url");
  121. auto token = cJSON_GetObjectItem(vision, "token");
  122. if (cJSON_IsString(url)) {
  123. auto camera = Board::GetInstance().GetCamera();
  124. if (camera) {
  125. std::string url_str = std::string(url->valuestring);
  126. std::string token_str;
  127. if (cJSON_IsString(token)) {
  128. token_str = std::string(token->valuestring);
  129. }
  130. camera->SetExplainUrl(url_str, token_str);
  131. }
  132. }
  133. }
  134. }
  135. void McpServer::ParseMessage(const cJSON* json) {
  136. // Check JSONRPC version
  137. auto version = cJSON_GetObjectItem(json, "jsonrpc");
  138. if (version == nullptr || !cJSON_IsString(version) || strcmp(version->valuestring, "2.0") != 0) {
  139. ESP_LOGE(TAG, "Invalid JSONRPC version: %s", version ? version->valuestring : "null");
  140. return;
  141. }
  142. // Check method
  143. auto method = cJSON_GetObjectItem(json, "method");
  144. if (method == nullptr || !cJSON_IsString(method)) {
  145. ESP_LOGE(TAG, "Missing method");
  146. return;
  147. }
  148. auto method_str = std::string(method->valuestring);
  149. if (method_str.find("notifications") == 0) {
  150. return;
  151. }
  152. // Check params
  153. auto params = cJSON_GetObjectItem(json, "params");
  154. if (params != nullptr && !cJSON_IsObject(params)) {
  155. ESP_LOGE(TAG, "Invalid params for method: %s", method_str.c_str());
  156. return;
  157. }
  158. auto id = cJSON_GetObjectItem(json, "id");
  159. if (id == nullptr || !cJSON_IsNumber(id)) {
  160. ESP_LOGE(TAG, "Invalid id for method: %s", method_str.c_str());
  161. return;
  162. }
  163. auto id_int = id->valueint;
  164. if (method_str == "initialize") {
  165. if (cJSON_IsObject(params)) {
  166. auto capabilities = cJSON_GetObjectItem(params, "capabilities");
  167. if (cJSON_IsObject(capabilities)) {
  168. ParseCapabilities(capabilities);
  169. }
  170. }
  171. auto app_desc = esp_app_get_description();
  172. std::string message = "{\"protocolVersion\":\"2024-11-05\",\"capabilities\":{\"tools\":{}},\"serverInfo\":{\"name\":\"" BOARD_NAME "\",\"version\":\"";
  173. message += app_desc->version;
  174. message += "\"}}";
  175. ReplyResult(id_int, message);
  176. } else if (method_str == "tools/list") {
  177. std::string cursor_str = "";
  178. if (params != nullptr) {
  179. auto cursor = cJSON_GetObjectItem(params, "cursor");
  180. if (cJSON_IsString(cursor)) {
  181. cursor_str = std::string(cursor->valuestring);
  182. }
  183. }
  184. GetToolsList(id_int, cursor_str);
  185. } else if (method_str == "tools/call") {
  186. if (!cJSON_IsObject(params)) {
  187. ESP_LOGE(TAG, "tools/call: Missing params");
  188. ReplyError(id_int, "Missing params");
  189. return;
  190. }
  191. auto tool_name = cJSON_GetObjectItem(params, "name");
  192. if (!cJSON_IsString(tool_name)) {
  193. ESP_LOGE(TAG, "tools/call: Missing name");
  194. ReplyError(id_int, "Missing name");
  195. return;
  196. }
  197. auto tool_arguments = cJSON_GetObjectItem(params, "arguments");
  198. if (tool_arguments != nullptr && !cJSON_IsObject(tool_arguments)) {
  199. ESP_LOGE(TAG, "tools/call: Invalid arguments");
  200. ReplyError(id_int, "Invalid arguments");
  201. return;
  202. }
  203. auto stack_size = cJSON_GetObjectItem(params, "stackSize");
  204. if (stack_size != nullptr && !cJSON_IsNumber(stack_size)) {
  205. ESP_LOGE(TAG, "tools/call: Invalid stackSize");
  206. ReplyError(id_int, "Invalid stackSize");
  207. return;
  208. }
  209. DoToolCall(id_int, std::string(tool_name->valuestring), tool_arguments, stack_size ? stack_size->valueint : DEFAULT_TOOLCALL_STACK_SIZE);
  210. } else {
  211. ESP_LOGE(TAG, "Method not implemented: %s", method_str.c_str());
  212. ReplyError(id_int, "Method not implemented: " + method_str);
  213. }
  214. }
  215. void McpServer::ReplyResult(int id, const std::string& result) {
  216. std::string payload = "{\"jsonrpc\":\"2.0\",\"id\":";
  217. payload += std::to_string(id) + ",\"result\":";
  218. payload += result;
  219. payload += "}";
  220. Application::GetInstance().SendMcpMessage(payload);
  221. }
  222. void McpServer::ReplyError(int id, const std::string& message) {
  223. std::string payload = "{\"jsonrpc\":\"2.0\",\"id\":";
  224. payload += std::to_string(id);
  225. payload += ",\"error\":{\"message\":\"";
  226. payload += message;
  227. payload += "\"}}";
  228. Application::GetInstance().SendMcpMessage(payload);
  229. }
  230. void McpServer::GetToolsList(int id, const std::string& cursor) {
  231. const int max_payload_size = 8000;
  232. std::string json = "{\"tools\":[";
  233. bool found_cursor = cursor.empty();
  234. auto it = tools_.begin();
  235. std::string next_cursor = "";
  236. while (it != tools_.end()) {
  237. // 如果我们还没有找到起始位置,继续搜索
  238. if (!found_cursor) {
  239. if ((*it)->name() == cursor) {
  240. found_cursor = true;
  241. } else {
  242. ++it;
  243. continue;
  244. }
  245. }
  246. // 添加tool前检查大小
  247. std::string tool_json = (*it)->to_json() + ",";
  248. if (json.length() + tool_json.length() + 30 > max_payload_size) {
  249. // 如果添加这个tool会超出大小限制,设置next_cursor并退出循环
  250. next_cursor = (*it)->name();
  251. break;
  252. }
  253. json += tool_json;
  254. ++it;
  255. }
  256. if (json.back() == ',') {
  257. json.pop_back();
  258. }
  259. if (json.back() == '[' && !tools_.empty()) {
  260. // 如果没有添加任何tool,返回错误
  261. ESP_LOGE(TAG, "tools/list: Failed to add tool %s because of payload size limit", next_cursor.c_str());
  262. ReplyError(id, "Failed to add tool " + next_cursor + " because of payload size limit");
  263. return;
  264. }
  265. if (next_cursor.empty()) {
  266. json += "]}";
  267. } else {
  268. json += "],\"nextCursor\":\"" + next_cursor + "\"}";
  269. }
  270. ReplyResult(id, json);
  271. }
  272. void McpServer::DoToolCall(int id, const std::string& tool_name, const cJSON* tool_arguments, int stack_size) {
  273. auto tool_iter = std::find_if(tools_.begin(), tools_.end(),
  274. [&tool_name](const McpTool* tool) {
  275. return tool->name() == tool_name;
  276. });
  277. if (tool_iter == tools_.end()) {
  278. ESP_LOGE(TAG, "tools/call: Unknown tool: %s", tool_name.c_str());
  279. ReplyError(id, "Unknown tool: " + tool_name);
  280. return;
  281. }
  282. PropertyList arguments = (*tool_iter)->properties();
  283. try {
  284. for (auto& argument : arguments) {
  285. bool found = false;
  286. if (cJSON_IsObject(tool_arguments)) {
  287. auto value = cJSON_GetObjectItem(tool_arguments, argument.name().c_str());
  288. if (argument.type() == kPropertyTypeBoolean && cJSON_IsBool(value)) {
  289. argument.set_value<bool>(value->valueint == 1);
  290. found = true;
  291. } else if (argument.type() == kPropertyTypeInteger && cJSON_IsNumber(value)) {
  292. argument.set_value<int>(value->valueint);
  293. found = true;
  294. } else if (argument.type() == kPropertyTypeString && cJSON_IsString(value)) {
  295. argument.set_value<std::string>(value->valuestring);
  296. found = true;
  297. }
  298. }
  299. if (!argument.has_default_value() && !found) {
  300. ESP_LOGE(TAG, "tools/call: Missing valid argument: %s", argument.name().c_str());
  301. ReplyError(id, "Missing valid argument: " + argument.name());
  302. return;
  303. }
  304. }
  305. } catch (const std::exception& e) {
  306. ESP_LOGE(TAG, "tools/call: %s", e.what());
  307. ReplyError(id, e.what());
  308. return;
  309. }
  310. // Start a task to receive data with stack size
  311. esp_pthread_cfg_t cfg = esp_pthread_get_default_config();
  312. cfg.thread_name = "tool_call";
  313. cfg.stack_size = stack_size;
  314. cfg.prio = 1;
  315. esp_pthread_set_cfg(&cfg);
  316. // Use a thread to call the tool to avoid blocking the main thread
  317. tool_call_thread_ = std::thread([this, id, tool_iter, arguments = std::move(arguments)]() {
  318. try {
  319. ReplyResult(id, (*tool_iter)->Call(arguments));
  320. } catch (const std::exception& e) {
  321. ESP_LOGE(TAG, "tools/call: %s", e.what());
  322. ReplyError(id, e.what());
  323. }
  324. });
  325. tool_call_thread_.detach();
  326. }