/** * @description: HTML 解析器,将 HTML 字符串转换为微信小程序 JSON 结构 * @author: ML 1940694428@qq.com * @date: 2025/03/31 */ interface ViewNode { type: "view" | "text" | "image"; text?: string; src?: string; css?: Record; views?: ViewNode[]; } interface VirtualElement { tagName: string; attributes: Record; children: (VirtualElement | VirtualText)[]; } interface VirtualText { text: string; } export default class HtmlParser { private screenWidth?: number; constructor(screenWidth?: number) { this.screenWidth = screenWidth; } /** * 过滤 HTML 字符串中的指定内容 * @param html 原始 HTML 字符串 * @param filters 过滤规则(键值对,例如 { ' ': '' }) * @returns 过滤后的新字符串 */ public filterHtmlString(html: string, filters: Record): string { let filteredHtml = html; for (const [key, value] of Object.entries(filters)) { const regex = new RegExp(key, "g"); // 全局匹配 key filteredHtml = filteredHtml.replace(regex, value); } return filteredHtml; } /** * 解析 HTML 到微信小程序 JSON 结构 * @param html 原始 HTML 字符串 * @returns 转换后的 JSON 结构 */ public parseHtmlToJson(html: string): ViewNode[] { const tempHtml = this.filterHtmlString(html, { ' ': ' ' }); const tempDiv = this.createElement(tempHtml); return this.parseElement(tempDiv); } /** * 创建一个虚拟的 DOM 结构 * @param html HTML 字符串 * @returns 虚拟的 DOM 结构 */ private createElement(html: string): VirtualElement { const div: VirtualElement = { tagName: "div", attributes: {}, children: [] }; const stack: VirtualElement[] = [div]; const re = /<([a-zA-Z]+)([^>]*?)\/?>|<\/([a-zA-Z]+)>|([^<]+)/g; let match: RegExpExecArray | null; while ((match = re.exec(html))) { try { if (match[1]) { // 开始标签或自闭合标签 const tagName = match[1].toLowerCase(); const attributes = this.parseAttributes(match[2]); const element: VirtualElement = { tagName, attributes, children: [] }; stack[stack.length - 1].children.push(element); if (!match[0].endsWith('/>')) { stack.push(element); } } else if (match[3]) { // 结束标签 if (stack.length === 1) { throw new Error(`Unexpected closing tag: ${match[3]}`); } stack.pop(); } else if (match[4]) { // 文本节点 const text = match[4].trim(); if (text) { const textNode: VirtualText = { text }; stack[stack.length - 1].children.push(textNode); } } } catch (error) { console.error(error.message); } } if (stack.length > 1) { console.error("Unclosed tags detected"); } return div; } /** * 解析 HTML 元素的属性 * @param attributeString 属性字符串 * @returns 属性对象 */ private parseAttributes(attributeString: string): Record { const attributes: Record = {}; const re = /([a-zA-Z\-]+)="([^"]*)"/g; let match: RegExpExecArray | null; while ((match = re.exec(attributeString))) { attributes[match[1]] = match[2]; } return attributes; } /** * 递归解析虚拟 DOM 元素 * @param element 虚拟 DOM 节点 * @returns JSON 结构 */ private parseElement(element: VirtualElement): ViewNode[] { const result: ViewNode[] = []; for (const node of element.children) { if ('text' in node) { result.push({ type: "text", text: node.text }); } else { const tagName = node.tagName; const styles = this.parseInlineStyle(node.attributes.style || ""); const children = this.parseElement(node); // 生成 JSON 结构 let parsedNode: ViewNode = { type: "view", css: styles, views: children }; // 当前获取的标签 let _tagName = tagName; //受支持的标签 const supportedTags = ["p", "div", "span", "strong", "em", "code", "img"]; // 如果标签不受支持,则直接转为 div if (!supportedTags.includes(_tagName)) _tagName = "div"; switch (_tagName) { case "p": parsedNode.css = { display: "block", wordWrap: "break-word", wordBreak: "break-word", whiteSpace: "normal", maxWidth: this.screenWidth ? `${this.screenWidth}px` : "100%", ...styles, }; break; case "div": parsedNode.type = "view"; break; case "span": case "strong": case "em": parsedNode.type = "text"; parsedNode.text = node.children.map((child: VirtualElement | VirtualText) => 'text' in child ? child.text : '').join(""); delete parsedNode.views; break; case "code": parsedNode.type = "view"; parsedNode.css = { display: "block", whiteSpace: "pre-wrap", wordWrap: "break-word", wordBreak: "break-word", overflow: "auto", color: "#333", border: "1px solid #f0f0f0", backgroundColor: "#f8f8f8", padding: "10px", borderRadius: "4px", ...styles, }; break; case "img": parsedNode.type = "image"; parsedNode.src = node.attributes.src || ""; delete parsedNode.views; break; case "font": result.push(...children); continue; default: continue; } result.push(parsedNode); } } return result; } /** * 解析内联样式 * @param styleString CSS 样式字符串 * @returns JSON 格式的样式对象 */ private parseInlineStyle(styleString: string): Record { const styles: Record = {}; styleString.split(";").forEach((style) => { const [key, value] = style.split(":").map((s) => s.trim()); if (key && value) { styles[this.camelCase(key)] = value; } }); return styles; } /** * 转换 CSS 属性名为驼峰命名 */ private camelCase(input: string): string { return input.replace(/-([a-z])/g, (_, letter) => letter.toUpperCase()); } }