sax.js 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597
  1. ;(function (sax) { // wrapper for non-node envs
  2. sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
  3. sax.SAXParser = SAXParser
  4. sax.SAXStream = SAXStream
  5. sax.createStream = createStream
  6. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  7. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  8. // since that's the earliest that a buffer overrun could occur. This way, checks are
  9. // as rare as required, but as often as necessary to ensure never crossing this bound.
  10. // Furthermore, buffers are only tested at most once per write(), so passing a very
  11. // large string into write() might have undesirable effects, but this is manageable by
  12. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  13. // edge case, result in creating at most one complete copy of the string passed in.
  14. // Set to Infinity to have unlimited buffers.
  15. sax.MAX_BUFFER_LENGTH = 64 * 1024
  16. var buffers = [
  17. 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
  18. 'procInstName', 'procInstBody', 'entity', 'attribName',
  19. 'attribValue', 'cdata', 'script'
  20. ]
  21. sax.EVENTS = [
  22. 'text',
  23. 'processinginstruction',
  24. 'sgmldeclaration',
  25. 'doctype',
  26. 'comment',
  27. 'opentagstart',
  28. 'attribute',
  29. 'opentag',
  30. 'closetag',
  31. 'opencdata',
  32. 'cdata',
  33. 'closecdata',
  34. 'error',
  35. 'end',
  36. 'ready',
  37. 'script',
  38. 'opennamespace',
  39. 'closenamespace'
  40. ]
  41. function SAXParser (strict, opt) {
  42. if (!(this instanceof SAXParser)) {
  43. return new SAXParser(strict, opt)
  44. }
  45. var parser = this
  46. clearBuffers(parser)
  47. parser.q = parser.c = ''
  48. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  49. parser.opt = opt || {}
  50. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  51. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  52. parser.tags = []
  53. parser.closed = parser.closedRoot = parser.sawRoot = false
  54. parser.tag = parser.error = null
  55. parser.strict = !!strict
  56. parser.noscript = !!(strict || parser.opt.noscript)
  57. parser.state = S.BEGIN
  58. parser.strictEntities = parser.opt.strictEntities
  59. parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
  60. parser.attribList = []
  61. // namespaces form a prototype chain.
  62. // it always points at the current tag,
  63. // which protos to its parent tag.
  64. if (parser.opt.xmlns) {
  65. parser.ns = Object.create(rootNS)
  66. }
  67. // disallow unquoted attribute values if not otherwise configured
  68. // and strict mode is true
  69. if (parser.opt.unquotedAttributeValues === undefined) {
  70. parser.opt.unquotedAttributeValues = !strict;
  71. }
  72. // mostly just for error reporting
  73. parser.trackPosition = parser.opt.position !== false
  74. if (parser.trackPosition) {
  75. parser.position = parser.line = parser.column = 0
  76. }
  77. emit(parser, 'onready')
  78. }
  79. if (!Object.create) {
  80. Object.create = function (o) {
  81. function F () {}
  82. F.prototype = o
  83. var newf = new F()
  84. return newf
  85. }
  86. }
  87. if (!Object.keys) {
  88. Object.keys = function (o) {
  89. var a = []
  90. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  91. return a
  92. }
  93. }
  94. function checkBufferLength (parser) {
  95. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  96. var maxActual = 0
  97. for (var i = 0, l = buffers.length; i < l; i++) {
  98. var len = parser[buffers[i]].length
  99. if (len > maxAllowed) {
  100. // Text/cdata nodes can get big, and since they're buffered,
  101. // we can get here under normal conditions.
  102. // Avoid issues by emitting the text node now,
  103. // so at least it won't get any bigger.
  104. switch (buffers[i]) {
  105. case 'textNode':
  106. closeText(parser)
  107. break
  108. case 'cdata':
  109. emitNode(parser, 'oncdata', parser.cdata)
  110. parser.cdata = ''
  111. break
  112. case 'script':
  113. emitNode(parser, 'onscript', parser.script)
  114. parser.script = ''
  115. break
  116. default:
  117. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  118. }
  119. }
  120. maxActual = Math.max(maxActual, len)
  121. }
  122. // schedule the next check for the earliest possible buffer overrun.
  123. var m = sax.MAX_BUFFER_LENGTH - maxActual
  124. parser.bufferCheckPosition = m + parser.position
  125. }
  126. function clearBuffers (parser) {
  127. for (var i = 0, l = buffers.length; i < l; i++) {
  128. parser[buffers[i]] = ''
  129. }
  130. }
  131. function flushBuffers (parser) {
  132. closeText(parser)
  133. if (parser.cdata !== '') {
  134. emitNode(parser, 'oncdata', parser.cdata)
  135. parser.cdata = ''
  136. }
  137. if (parser.script !== '') {
  138. emitNode(parser, 'onscript', parser.script)
  139. parser.script = ''
  140. }
  141. }
  142. SAXParser.prototype = {
  143. end: function () { end(this) },
  144. write: write,
  145. resume: function () { this.error = null; return this },
  146. close: function () { return this.write(null) },
  147. flush: function () { flushBuffers(this) }
  148. }
  149. var Stream
  150. try {
  151. Stream = require('stream').Stream
  152. } catch (ex) {
  153. Stream = function () {}
  154. }
  155. if (!Stream) Stream = function () {}
  156. var streamWraps = sax.EVENTS.filter(function (ev) {
  157. return ev !== 'error' && ev !== 'end'
  158. })
  159. function createStream (strict, opt) {
  160. return new SAXStream(strict, opt)
  161. }
  162. function SAXStream (strict, opt) {
  163. if (!(this instanceof SAXStream)) {
  164. return new SAXStream(strict, opt)
  165. }
  166. Stream.apply(this)
  167. this._parser = new SAXParser(strict, opt)
  168. this.writable = true
  169. this.readable = true
  170. var me = this
  171. this._parser.onend = function () {
  172. me.emit('end')
  173. }
  174. this._parser.onerror = function (er) {
  175. me.emit('error', er)
  176. // if didn't throw, then means error was handled.
  177. // go ahead and clear error, so we can write again.
  178. me._parser.error = null
  179. }
  180. this._decoder = null
  181. streamWraps.forEach(function (ev) {
  182. Object.defineProperty(me, 'on' + ev, {
  183. get: function () {
  184. return me._parser['on' + ev]
  185. },
  186. set: function (h) {
  187. if (!h) {
  188. me.removeAllListeners(ev)
  189. me._parser['on' + ev] = h
  190. return h
  191. }
  192. me.on(ev, h)
  193. },
  194. enumerable: true,
  195. configurable: false
  196. })
  197. })
  198. }
  199. SAXStream.prototype = Object.create(Stream.prototype, {
  200. constructor: {
  201. value: SAXStream
  202. }
  203. })
  204. SAXStream.prototype.write = function (data) {
  205. if (typeof Buffer === 'function' &&
  206. typeof Buffer.isBuffer === 'function' &&
  207. Buffer.isBuffer(data)) {
  208. if (!this._decoder) {
  209. var SD = require('string_decoder').StringDecoder
  210. this._decoder = new SD('utf8')
  211. }
  212. data = this._decoder.write(data)
  213. }
  214. this._parser.write(data.toString())
  215. this.emit('data', data)
  216. return true
  217. }
  218. SAXStream.prototype.end = function (chunk) {
  219. if (chunk && chunk.length) {
  220. this.write(chunk)
  221. }
  222. this._parser.end()
  223. return true
  224. }
  225. SAXStream.prototype.on = function (ev, handler) {
  226. var me = this
  227. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  228. me._parser['on' + ev] = function () {
  229. var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
  230. args.splice(0, 0, ev)
  231. me.emit.apply(me, args)
  232. }
  233. }
  234. return Stream.prototype.on.call(me, ev, handler)
  235. }
  236. // this really needs to be replaced with character classes.
  237. // XML allows all manner of ridiculous numbers and digits.
  238. var CDATA = '[CDATA['
  239. var DOCTYPE = 'DOCTYPE'
  240. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  241. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  242. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  243. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  244. // This implementation works on strings, a single character at a time
  245. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  246. // without a significant breaking change to either this parser, or the
  247. // JavaScript language. Implementation of an emoji-capable xml parser
  248. // is left as an exercise for the reader.
  249. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  250. var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  251. var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  252. var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  253. function isWhitespace (c) {
  254. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  255. }
  256. function isQuote (c) {
  257. return c === '"' || c === '\''
  258. }
  259. function isAttribEnd (c) {
  260. return c === '>' || isWhitespace(c)
  261. }
  262. function isMatch (regex, c) {
  263. return regex.test(c)
  264. }
  265. function notMatch (regex, c) {
  266. return !isMatch(regex, c)
  267. }
  268. var S = 0
  269. sax.STATE = {
  270. BEGIN: S++, // leading byte order mark or whitespace
  271. BEGIN_WHITESPACE: S++, // leading whitespace
  272. TEXT: S++, // general stuff
  273. TEXT_ENTITY: S++, // &amp and such.
  274. OPEN_WAKA: S++, // <
  275. SGML_DECL: S++, // <!BLARG
  276. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  277. DOCTYPE: S++, // <!DOCTYPE
  278. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  279. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  280. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  281. COMMENT_STARTING: S++, // <!-
  282. COMMENT: S++, // <!--
  283. COMMENT_ENDING: S++, // <!-- blah -
  284. COMMENT_ENDED: S++, // <!-- blah --
  285. CDATA: S++, // <![CDATA[ something
  286. CDATA_ENDING: S++, // ]
  287. CDATA_ENDING_2: S++, // ]]
  288. PROC_INST: S++, // <?hi
  289. PROC_INST_BODY: S++, // <?hi there
  290. PROC_INST_ENDING: S++, // <?hi "there" ?
  291. OPEN_TAG: S++, // <strong
  292. OPEN_TAG_SLASH: S++, // <strong /
  293. ATTRIB: S++, // <a
  294. ATTRIB_NAME: S++, // <a foo
  295. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  296. ATTRIB_VALUE: S++, // <a foo=
  297. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  298. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  299. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  300. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  301. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  302. CLOSE_TAG: S++, // </a
  303. CLOSE_TAG_SAW_WHITE: S++, // </a >
  304. SCRIPT: S++, // <script> ...
  305. SCRIPT_ENDING: S++ // <script> ... <
  306. }
  307. sax.XML_ENTITIES = {
  308. 'amp': '&',
  309. 'gt': '>',
  310. 'lt': '<',
  311. 'quot': '"',
  312. 'apos': "'"
  313. }
  314. sax.ENTITIES = {
  315. 'amp': '&',
  316. 'gt': '>',
  317. 'lt': '<',
  318. 'quot': '"',
  319. 'apos': "'",
  320. 'AElig': 198,
  321. 'Aacute': 193,
  322. 'Acirc': 194,
  323. 'Agrave': 192,
  324. 'Aring': 197,
  325. 'Atilde': 195,
  326. 'Auml': 196,
  327. 'Ccedil': 199,
  328. 'ETH': 208,
  329. 'Eacute': 201,
  330. 'Ecirc': 202,
  331. 'Egrave': 200,
  332. 'Euml': 203,
  333. 'Iacute': 205,
  334. 'Icirc': 206,
  335. 'Igrave': 204,
  336. 'Iuml': 207,
  337. 'Ntilde': 209,
  338. 'Oacute': 211,
  339. 'Ocirc': 212,
  340. 'Ograve': 210,
  341. 'Oslash': 216,
  342. 'Otilde': 213,
  343. 'Ouml': 214,
  344. 'THORN': 222,
  345. 'Uacute': 218,
  346. 'Ucirc': 219,
  347. 'Ugrave': 217,
  348. 'Uuml': 220,
  349. 'Yacute': 221,
  350. 'aacute': 225,
  351. 'acirc': 226,
  352. 'aelig': 230,
  353. 'agrave': 224,
  354. 'aring': 229,
  355. 'atilde': 227,
  356. 'auml': 228,
  357. 'ccedil': 231,
  358. 'eacute': 233,
  359. 'ecirc': 234,
  360. 'egrave': 232,
  361. 'eth': 240,
  362. 'euml': 235,
  363. 'iacute': 237,
  364. 'icirc': 238,
  365. 'igrave': 236,
  366. 'iuml': 239,
  367. 'ntilde': 241,
  368. 'oacute': 243,
  369. 'ocirc': 244,
  370. 'ograve': 242,
  371. 'oslash': 248,
  372. 'otilde': 245,
  373. 'ouml': 246,
  374. 'szlig': 223,
  375. 'thorn': 254,
  376. 'uacute': 250,
  377. 'ucirc': 251,
  378. 'ugrave': 249,
  379. 'uuml': 252,
  380. 'yacute': 253,
  381. 'yuml': 255,
  382. 'copy': 169,
  383. 'reg': 174,
  384. 'nbsp': 160,
  385. 'iexcl': 161,
  386. 'cent': 162,
  387. 'pound': 163,
  388. 'curren': 164,
  389. 'yen': 165,
  390. 'brvbar': 166,
  391. 'sect': 167,
  392. 'uml': 168,
  393. 'ordf': 170,
  394. 'laquo': 171,
  395. 'not': 172,
  396. 'shy': 173,
  397. 'macr': 175,
  398. 'deg': 176,
  399. 'plusmn': 177,
  400. 'sup1': 185,
  401. 'sup2': 178,
  402. 'sup3': 179,
  403. 'acute': 180,
  404. 'micro': 181,
  405. 'para': 182,
  406. 'middot': 183,
  407. 'cedil': 184,
  408. 'ordm': 186,
  409. 'raquo': 187,
  410. 'frac14': 188,
  411. 'frac12': 189,
  412. 'frac34': 190,
  413. 'iquest': 191,
  414. 'times': 215,
  415. 'divide': 247,
  416. 'OElig': 338,
  417. 'oelig': 339,
  418. 'Scaron': 352,
  419. 'scaron': 353,
  420. 'Yuml': 376,
  421. 'fnof': 402,
  422. 'circ': 710,
  423. 'tilde': 732,
  424. 'Alpha': 913,
  425. 'Beta': 914,
  426. 'Gamma': 915,
  427. 'Delta': 916,
  428. 'Epsilon': 917,
  429. 'Zeta': 918,
  430. 'Eta': 919,
  431. 'Theta': 920,
  432. 'Iota': 921,
  433. 'Kappa': 922,
  434. 'Lambda': 923,
  435. 'Mu': 924,
  436. 'Nu': 925,
  437. 'Xi': 926,
  438. 'Omicron': 927,
  439. 'Pi': 928,
  440. 'Rho': 929,
  441. 'Sigma': 931,
  442. 'Tau': 932,
  443. 'Upsilon': 933,
  444. 'Phi': 934,
  445. 'Chi': 935,
  446. 'Psi': 936,
  447. 'Omega': 937,
  448. 'alpha': 945,
  449. 'beta': 946,
  450. 'gamma': 947,
  451. 'delta': 948,
  452. 'epsilon': 949,
  453. 'zeta': 950,
  454. 'eta': 951,
  455. 'theta': 952,
  456. 'iota': 953,
  457. 'kappa': 954,
  458. 'lambda': 955,
  459. 'mu': 956,
  460. 'nu': 957,
  461. 'xi': 958,
  462. 'omicron': 959,
  463. 'pi': 960,
  464. 'rho': 961,
  465. 'sigmaf': 962,
  466. 'sigma': 963,
  467. 'tau': 964,
  468. 'upsilon': 965,
  469. 'phi': 966,
  470. 'chi': 967,
  471. 'psi': 968,
  472. 'omega': 969,
  473. 'thetasym': 977,
  474. 'upsih': 978,
  475. 'piv': 982,
  476. 'ensp': 8194,
  477. 'emsp': 8195,
  478. 'thinsp': 8201,
  479. 'zwnj': 8204,
  480. 'zwj': 8205,
  481. 'lrm': 8206,
  482. 'rlm': 8207,
  483. 'ndash': 8211,
  484. 'mdash': 8212,
  485. 'lsquo': 8216,
  486. 'rsquo': 8217,
  487. 'sbquo': 8218,
  488. 'ldquo': 8220,
  489. 'rdquo': 8221,
  490. 'bdquo': 8222,
  491. 'dagger': 8224,
  492. 'Dagger': 8225,
  493. 'bull': 8226,
  494. 'hellip': 8230,
  495. 'permil': 8240,
  496. 'prime': 8242,
  497. 'Prime': 8243,
  498. 'lsaquo': 8249,
  499. 'rsaquo': 8250,
  500. 'oline': 8254,
  501. 'frasl': 8260,
  502. 'euro': 8364,
  503. 'image': 8465,
  504. 'weierp': 8472,
  505. 'real': 8476,
  506. 'trade': 8482,
  507. 'alefsym': 8501,
  508. 'larr': 8592,
  509. 'uarr': 8593,
  510. 'rarr': 8594,
  511. 'darr': 8595,
  512. 'harr': 8596,
  513. 'crarr': 8629,
  514. 'lArr': 8656,
  515. 'uArr': 8657,
  516. 'rArr': 8658,
  517. 'dArr': 8659,
  518. 'hArr': 8660,
  519. 'forall': 8704,
  520. 'part': 8706,
  521. 'exist': 8707,
  522. 'empty': 8709,
  523. 'nabla': 8711,
  524. 'isin': 8712,
  525. 'notin': 8713,
  526. 'ni': 8715,
  527. 'prod': 8719,
  528. 'sum': 8721,
  529. 'minus': 8722,
  530. 'lowast': 8727,
  531. 'radic': 8730,
  532. 'prop': 8733,
  533. 'infin': 8734,
  534. 'ang': 8736,
  535. 'and': 8743,
  536. 'or': 8744,
  537. 'cap': 8745,
  538. 'cup': 8746,
  539. 'int': 8747,
  540. 'there4': 8756,
  541. 'sim': 8764,
  542. 'cong': 8773,
  543. 'asymp': 8776,
  544. 'ne': 8800,
  545. 'equiv': 8801,
  546. 'le': 8804,
  547. 'ge': 8805,
  548. 'sub': 8834,
  549. 'sup': 8835,
  550. 'nsub': 8836,
  551. 'sube': 8838,
  552. 'supe': 8839,
  553. 'oplus': 8853,
  554. 'otimes': 8855,
  555. 'perp': 8869,
  556. 'sdot': 8901,
  557. 'lceil': 8968,
  558. 'rceil': 8969,
  559. 'lfloor': 8970,
  560. 'rfloor': 8971,
  561. 'lang': 9001,
  562. 'rang': 9002,
  563. 'loz': 9674,
  564. 'spades': 9824,
  565. 'clubs': 9827,
  566. 'hearts': 9829,
  567. 'diams': 9830
  568. }
  569. Object.keys(sax.ENTITIES).forEach(function (key) {
  570. var e = sax.ENTITIES[key]
  571. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  572. sax.ENTITIES[key] = s
  573. })
  574. for (var s in sax.STATE) {
  575. sax.STATE[sax.STATE[s]] = s
  576. }
  577. // shorthand
  578. S = sax.STATE
  579. function emit (parser, event, data) {
  580. parser[event] && parser[event](data)
  581. }
  582. function emitNode (parser, nodeType, data) {
  583. if (parser.textNode) closeText(parser)
  584. emit(parser, nodeType, data)
  585. }
  586. function closeText (parser) {
  587. parser.textNode = textopts(parser.opt, parser.textNode)
  588. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  589. parser.textNode = ''
  590. }
  591. function textopts (opt, text) {
  592. if (opt.trim) text = text.trim()
  593. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  594. return text
  595. }
  596. function error (parser, er) {
  597. closeText(parser)
  598. if (parser.trackPosition) {
  599. er += '\nLine: ' + parser.line +
  600. '\nColumn: ' + parser.column +
  601. '\nChar: ' + parser.c
  602. }
  603. er = new Error(er)
  604. parser.error = er
  605. emit(parser, 'onerror', er)
  606. return parser
  607. }
  608. function end (parser) {
  609. if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
  610. if ((parser.state !== S.BEGIN) &&
  611. (parser.state !== S.BEGIN_WHITESPACE) &&
  612. (parser.state !== S.TEXT)) {
  613. error(parser, 'Unexpected end')
  614. }
  615. closeText(parser)
  616. parser.c = ''
  617. parser.closed = true
  618. emit(parser, 'onend')
  619. SAXParser.call(parser, parser.strict, parser.opt)
  620. return parser
  621. }
  622. function strictFail (parser, message) {
  623. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  624. throw new Error('bad call to strictFail')
  625. }
  626. if (parser.strict) {
  627. error(parser, message)
  628. }
  629. }
  630. function newTag (parser) {
  631. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  632. var parent = parser.tags[parser.tags.length - 1] || parser
  633. var tag = parser.tag = { name: parser.tagName, attributes: {} }
  634. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  635. if (parser.opt.xmlns) {
  636. tag.ns = parent.ns
  637. }
  638. parser.attribList.length = 0
  639. emitNode(parser, 'onopentagstart', tag)
  640. }
  641. function qname (name, attribute) {
  642. var i = name.indexOf(':')
  643. var qualName = i < 0 ? [ '', name ] : name.split(':')
  644. var prefix = qualName[0]
  645. var local = qualName[1]
  646. // <x "xmlns"="http://foo">
  647. if (attribute && name === 'xmlns') {
  648. prefix = 'xmlns'
  649. local = ''
  650. }
  651. return { prefix: prefix, local: local }
  652. }
  653. function attrib (parser) {
  654. if (!parser.strict) {
  655. parser.attribName = parser.attribName[parser.looseCase]()
  656. }
  657. if (parser.attribList.indexOf(parser.attribName) !== -1 ||
  658. parser.tag.attributes.hasOwnProperty(parser.attribName)) {
  659. parser.attribName = parser.attribValue = ''
  660. return
  661. }
  662. if (parser.opt.xmlns) {
  663. var qn = qname(parser.attribName, true)
  664. var prefix = qn.prefix
  665. var local = qn.local
  666. if (prefix === 'xmlns') {
  667. // namespace binding attribute. push the binding into scope
  668. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  669. strictFail(parser,
  670. 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
  671. 'Actual: ' + parser.attribValue)
  672. } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
  673. strictFail(parser,
  674. 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
  675. 'Actual: ' + parser.attribValue)
  676. } else {
  677. var tag = parser.tag
  678. var parent = parser.tags[parser.tags.length - 1] || parser
  679. if (tag.ns === parent.ns) {
  680. tag.ns = Object.create(parent.ns)
  681. }
  682. tag.ns[local] = parser.attribValue
  683. }
  684. }
  685. // defer onattribute events until all attributes have been seen
  686. // so any new bindings can take effect. preserve attribute order
  687. // so deferred events can be emitted in document order
  688. parser.attribList.push([parser.attribName, parser.attribValue])
  689. } else {
  690. // in non-xmlns mode, we can emit the event right away
  691. parser.tag.attributes[parser.attribName] = parser.attribValue
  692. emitNode(parser, 'onattribute', {
  693. name: parser.attribName,
  694. value: parser.attribValue
  695. })
  696. }
  697. parser.attribName = parser.attribValue = ''
  698. }
  699. function openTag (parser, selfClosing) {
  700. if (parser.opt.xmlns) {
  701. // emit namespace binding events
  702. var tag = parser.tag
  703. // add namespace info to tag
  704. var qn = qname(parser.tagName)
  705. tag.prefix = qn.prefix
  706. tag.local = qn.local
  707. tag.uri = tag.ns[qn.prefix] || ''
  708. if (tag.prefix && !tag.uri) {
  709. strictFail(parser, 'Unbound namespace prefix: ' +
  710. JSON.stringify(parser.tagName))
  711. tag.uri = qn.prefix
  712. }
  713. var parent = parser.tags[parser.tags.length - 1] || parser
  714. if (tag.ns && parent.ns !== tag.ns) {
  715. Object.keys(tag.ns).forEach(function (p) {
  716. emitNode(parser, 'onopennamespace', {
  717. prefix: p,
  718. uri: tag.ns[p]
  719. })
  720. })
  721. }
  722. // handle deferred onattribute events
  723. // Note: do not apply default ns to attributes:
  724. // http://www.w3.org/TR/REC-xml-names/#defaulting
  725. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  726. var nv = parser.attribList[i]
  727. var name = nv[0]
  728. var value = nv[1]
  729. var qualName = qname(name, true)
  730. var prefix = qualName.prefix
  731. var local = qualName.local
  732. var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
  733. var a = {
  734. name: name,
  735. value: value,
  736. prefix: prefix,
  737. local: local,
  738. uri: uri
  739. }
  740. // if there's any attributes with an undefined namespace,
  741. // then fail on them now.
  742. if (prefix && prefix !== 'xmlns' && !uri) {
  743. strictFail(parser, 'Unbound namespace prefix: ' +
  744. JSON.stringify(prefix))
  745. a.uri = prefix
  746. }
  747. parser.tag.attributes[name] = a
  748. emitNode(parser, 'onattribute', a)
  749. }
  750. parser.attribList.length = 0
  751. }
  752. parser.tag.isSelfClosing = !!selfClosing
  753. // process the tag
  754. parser.sawRoot = true
  755. parser.tags.push(parser.tag)
  756. emitNode(parser, 'onopentag', parser.tag)
  757. if (!selfClosing) {
  758. // special case for <script> in non-strict mode.
  759. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  760. parser.state = S.SCRIPT
  761. } else {
  762. parser.state = S.TEXT
  763. }
  764. parser.tag = null
  765. parser.tagName = ''
  766. }
  767. parser.attribName = parser.attribValue = ''
  768. parser.attribList.length = 0
  769. }
  770. function closeTag (parser) {
  771. if (!parser.tagName) {
  772. strictFail(parser, 'Weird empty close tag.')
  773. parser.textNode += '</>'
  774. parser.state = S.TEXT
  775. return
  776. }
  777. if (parser.script) {
  778. if (parser.tagName !== 'script') {
  779. parser.script += '</' + parser.tagName + '>'
  780. parser.tagName = ''
  781. parser.state = S.SCRIPT
  782. return
  783. }
  784. emitNode(parser, 'onscript', parser.script)
  785. parser.script = ''
  786. }
  787. // first make sure that the closing tag actually exists.
  788. // <a><b></c></b></a> will close everything, otherwise.
  789. var t = parser.tags.length
  790. var tagName = parser.tagName
  791. if (!parser.strict) {
  792. tagName = tagName[parser.looseCase]()
  793. }
  794. var closeTo = tagName
  795. while (t--) {
  796. var close = parser.tags[t]
  797. if (close.name !== closeTo) {
  798. // fail the first time in strict mode
  799. strictFail(parser, 'Unexpected close tag')
  800. } else {
  801. break
  802. }
  803. }
  804. // didn't find it. we already failed for strict, so just abort.
  805. if (t < 0) {
  806. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  807. parser.textNode += '</' + parser.tagName + '>'
  808. parser.state = S.TEXT
  809. return
  810. }
  811. parser.tagName = tagName
  812. var s = parser.tags.length
  813. while (s-- > t) {
  814. var tag = parser.tag = parser.tags.pop()
  815. parser.tagName = parser.tag.name
  816. emitNode(parser, 'onclosetag', parser.tagName)
  817. var x = {}
  818. for (var i in tag.ns) {
  819. x[i] = tag.ns[i]
  820. }
  821. var parent = parser.tags[parser.tags.length - 1] || parser
  822. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  823. // remove namespace bindings introduced by tag
  824. Object.keys(tag.ns).forEach(function (p) {
  825. var n = tag.ns[p]
  826. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  827. })
  828. }
  829. }
  830. if (t === 0) parser.closedRoot = true
  831. parser.tagName = parser.attribValue = parser.attribName = ''
  832. parser.attribList.length = 0
  833. parser.state = S.TEXT
  834. }
  835. function parseEntity (parser) {
  836. var entity = parser.entity
  837. var entityLC = entity.toLowerCase()
  838. var num
  839. var numStr = ''
  840. if (parser.ENTITIES[entity]) {
  841. return parser.ENTITIES[entity]
  842. }
  843. if (parser.ENTITIES[entityLC]) {
  844. return parser.ENTITIES[entityLC]
  845. }
  846. entity = entityLC
  847. if (entity.charAt(0) === '#') {
  848. if (entity.charAt(1) === 'x') {
  849. entity = entity.slice(2)
  850. num = parseInt(entity, 16)
  851. numStr = num.toString(16)
  852. } else {
  853. entity = entity.slice(1)
  854. num = parseInt(entity, 10)
  855. numStr = num.toString(10)
  856. }
  857. }
  858. entity = entity.replace(/^0+/, '')
  859. if (isNaN(num) || numStr.toLowerCase() !== entity) {
  860. strictFail(parser, 'Invalid character entity')
  861. return '&' + parser.entity + ';'
  862. }
  863. return String.fromCodePoint(num)
  864. }
  865. function beginWhiteSpace (parser, c) {
  866. if (c === '<') {
  867. parser.state = S.OPEN_WAKA
  868. parser.startTagPosition = parser.position
  869. } else if (!isWhitespace(c)) {
  870. // have to process this as a text node.
  871. // weird, but happens.
  872. strictFail(parser, 'Non-whitespace before first tag.')
  873. parser.textNode = c
  874. parser.state = S.TEXT
  875. }
  876. }
  877. function charAt (chunk, i) {
  878. var result = ''
  879. if (i < chunk.length) {
  880. result = chunk.charAt(i)
  881. }
  882. return result
  883. }
  884. function write (chunk) {
  885. var parser = this
  886. if (this.error) {
  887. throw this.error
  888. }
  889. if (parser.closed) {
  890. return error(parser,
  891. 'Cannot write after close. Assign an onready handler.')
  892. }
  893. if (chunk === null) {
  894. return end(parser)
  895. }
  896. if (typeof chunk === 'object') {
  897. chunk = chunk.toString()
  898. }
  899. var i = 0
  900. var c = ''
  901. while (true) {
  902. c = charAt(chunk, i++)
  903. parser.c = c
  904. if (!c) {
  905. break
  906. }
  907. if (parser.trackPosition) {
  908. parser.position++
  909. if (c === '\n') {
  910. parser.line++
  911. parser.column = 0
  912. } else {
  913. parser.column++
  914. }
  915. }
  916. switch (parser.state) {
  917. case S.BEGIN:
  918. parser.state = S.BEGIN_WHITESPACE
  919. if (c === '\uFEFF') {
  920. continue
  921. }
  922. beginWhiteSpace(parser, c)
  923. continue
  924. case S.BEGIN_WHITESPACE:
  925. beginWhiteSpace(parser, c)
  926. continue
  927. case S.TEXT:
  928. if (parser.sawRoot && !parser.closedRoot) {
  929. var starti = i - 1
  930. while (c && c !== '<' && c !== '&') {
  931. c = charAt(chunk, i++)
  932. if (c && parser.trackPosition) {
  933. parser.position++
  934. if (c === '\n') {
  935. parser.line++
  936. parser.column = 0
  937. } else {
  938. parser.column++
  939. }
  940. }
  941. }
  942. parser.textNode += chunk.substring(starti, i - 1)
  943. }
  944. if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
  945. parser.state = S.OPEN_WAKA
  946. parser.startTagPosition = parser.position
  947. } else {
  948. if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
  949. strictFail(parser, 'Text data outside of root node.')
  950. }
  951. if (c === '&') {
  952. parser.state = S.TEXT_ENTITY
  953. } else {
  954. parser.textNode += c
  955. }
  956. }
  957. continue
  958. case S.SCRIPT:
  959. // only non-strict
  960. if (c === '<') {
  961. parser.state = S.SCRIPT_ENDING
  962. } else {
  963. parser.script += c
  964. }
  965. continue
  966. case S.SCRIPT_ENDING:
  967. if (c === '/') {
  968. parser.state = S.CLOSE_TAG
  969. } else {
  970. parser.script += '<' + c
  971. parser.state = S.SCRIPT
  972. }
  973. continue
  974. case S.OPEN_WAKA:
  975. // either a /, ?, !, or text is coming next.
  976. if (c === '!') {
  977. parser.state = S.SGML_DECL
  978. parser.sgmlDecl = ''
  979. } else if (isWhitespace(c)) {
  980. // wait for it...
  981. } else if (isMatch(nameStart, c)) {
  982. parser.state = S.OPEN_TAG
  983. parser.tagName = c
  984. } else if (c === '/') {
  985. parser.state = S.CLOSE_TAG
  986. parser.tagName = ''
  987. } else if (c === '?') {
  988. parser.state = S.PROC_INST
  989. parser.procInstName = parser.procInstBody = ''
  990. } else {
  991. strictFail(parser, 'Unencoded <')
  992. // if there was some whitespace, then add that in.
  993. if (parser.startTagPosition + 1 < parser.position) {
  994. var pad = parser.position - parser.startTagPosition
  995. c = new Array(pad).join(' ') + c
  996. }
  997. parser.textNode += '<' + c
  998. parser.state = S.TEXT
  999. }
  1000. continue
  1001. case S.SGML_DECL:
  1002. if (parser.sgmlDecl + c === '--') {
  1003. parser.state = S.COMMENT
  1004. parser.comment = ''
  1005. parser.sgmlDecl = ''
  1006. continue;
  1007. }
  1008. if (parser.doctype && parser.doctype !== true && parser.sgmlDecl) {
  1009. parser.state = S.DOCTYPE_DTD
  1010. parser.doctype += '<!' + parser.sgmlDecl + c
  1011. parser.sgmlDecl = ''
  1012. } else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  1013. emitNode(parser, 'onopencdata')
  1014. parser.state = S.CDATA
  1015. parser.sgmlDecl = ''
  1016. parser.cdata = ''
  1017. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1018. parser.state = S.DOCTYPE
  1019. if (parser.doctype || parser.sawRoot) {
  1020. strictFail(parser,
  1021. 'Inappropriately located doctype declaration')
  1022. }
  1023. parser.doctype = ''
  1024. parser.sgmlDecl = ''
  1025. } else if (c === '>') {
  1026. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1027. parser.sgmlDecl = ''
  1028. parser.state = S.TEXT
  1029. } else if (isQuote(c)) {
  1030. parser.state = S.SGML_DECL_QUOTED
  1031. parser.sgmlDecl += c
  1032. } else {
  1033. parser.sgmlDecl += c
  1034. }
  1035. continue
  1036. case S.SGML_DECL_QUOTED:
  1037. if (c === parser.q) {
  1038. parser.state = S.SGML_DECL
  1039. parser.q = ''
  1040. }
  1041. parser.sgmlDecl += c
  1042. continue
  1043. case S.DOCTYPE:
  1044. if (c === '>') {
  1045. parser.state = S.TEXT
  1046. emitNode(parser, 'ondoctype', parser.doctype)
  1047. parser.doctype = true // just remember that we saw it.
  1048. } else {
  1049. parser.doctype += c
  1050. if (c === '[') {
  1051. parser.state = S.DOCTYPE_DTD
  1052. } else if (isQuote(c)) {
  1053. parser.state = S.DOCTYPE_QUOTED
  1054. parser.q = c
  1055. }
  1056. }
  1057. continue
  1058. case S.DOCTYPE_QUOTED:
  1059. parser.doctype += c
  1060. if (c === parser.q) {
  1061. parser.q = ''
  1062. parser.state = S.DOCTYPE
  1063. }
  1064. continue
  1065. case S.DOCTYPE_DTD:
  1066. if (c === ']') {
  1067. parser.doctype += c
  1068. parser.state = S.DOCTYPE
  1069. } else if (c === '<') {
  1070. parser.state = S.OPEN_WAKA
  1071. parser.startTagPosition = parser.position
  1072. } else if (isQuote(c)) {
  1073. parser.doctype += c
  1074. parser.state = S.DOCTYPE_DTD_QUOTED
  1075. parser.q = c
  1076. } else {
  1077. parser.doctype += c
  1078. }
  1079. continue
  1080. case S.DOCTYPE_DTD_QUOTED:
  1081. parser.doctype += c
  1082. if (c === parser.q) {
  1083. parser.state = S.DOCTYPE_DTD
  1084. parser.q = ''
  1085. }
  1086. continue
  1087. case S.COMMENT:
  1088. if (c === '-') {
  1089. parser.state = S.COMMENT_ENDING
  1090. } else {
  1091. parser.comment += c
  1092. }
  1093. continue
  1094. case S.COMMENT_ENDING:
  1095. if (c === '-') {
  1096. parser.state = S.COMMENT_ENDED
  1097. parser.comment = textopts(parser.opt, parser.comment)
  1098. if (parser.comment) {
  1099. emitNode(parser, 'oncomment', parser.comment)
  1100. }
  1101. parser.comment = ''
  1102. } else {
  1103. parser.comment += '-' + c
  1104. parser.state = S.COMMENT
  1105. }
  1106. continue
  1107. case S.COMMENT_ENDED:
  1108. if (c !== '>') {
  1109. strictFail(parser, 'Malformed comment')
  1110. // allow <!-- blah -- bloo --> in non-strict mode,
  1111. // which is a comment of " blah -- bloo "
  1112. parser.comment += '--' + c
  1113. parser.state = S.COMMENT
  1114. } else if (parser.doctype && parser.doctype !== true) {
  1115. parser.state = S.DOCTYPE_DTD
  1116. } else {
  1117. parser.state = S.TEXT
  1118. }
  1119. continue
  1120. case S.CDATA:
  1121. if (c === ']') {
  1122. parser.state = S.CDATA_ENDING
  1123. } else {
  1124. parser.cdata += c
  1125. }
  1126. continue
  1127. case S.CDATA_ENDING:
  1128. if (c === ']') {
  1129. parser.state = S.CDATA_ENDING_2
  1130. } else {
  1131. parser.cdata += ']' + c
  1132. parser.state = S.CDATA
  1133. }
  1134. continue
  1135. case S.CDATA_ENDING_2:
  1136. if (c === '>') {
  1137. if (parser.cdata) {
  1138. emitNode(parser, 'oncdata', parser.cdata)
  1139. }
  1140. emitNode(parser, 'onclosecdata')
  1141. parser.cdata = ''
  1142. parser.state = S.TEXT
  1143. } else if (c === ']') {
  1144. parser.cdata += ']'
  1145. } else {
  1146. parser.cdata += ']]' + c
  1147. parser.state = S.CDATA
  1148. }
  1149. continue
  1150. case S.PROC_INST:
  1151. if (c === '?') {
  1152. parser.state = S.PROC_INST_ENDING
  1153. } else if (isWhitespace(c)) {
  1154. parser.state = S.PROC_INST_BODY
  1155. } else {
  1156. parser.procInstName += c
  1157. }
  1158. continue
  1159. case S.PROC_INST_BODY:
  1160. if (!parser.procInstBody && isWhitespace(c)) {
  1161. continue
  1162. } else if (c === '?') {
  1163. parser.state = S.PROC_INST_ENDING
  1164. } else {
  1165. parser.procInstBody += c
  1166. }
  1167. continue
  1168. case S.PROC_INST_ENDING:
  1169. if (c === '>') {
  1170. emitNode(parser, 'onprocessinginstruction', {
  1171. name: parser.procInstName,
  1172. body: parser.procInstBody
  1173. })
  1174. parser.procInstName = parser.procInstBody = ''
  1175. parser.state = S.TEXT
  1176. } else {
  1177. parser.procInstBody += '?' + c
  1178. parser.state = S.PROC_INST_BODY
  1179. }
  1180. continue
  1181. case S.OPEN_TAG:
  1182. if (isMatch(nameBody, c)) {
  1183. parser.tagName += c
  1184. } else {
  1185. newTag(parser)
  1186. if (c === '>') {
  1187. openTag(parser)
  1188. } else if (c === '/') {
  1189. parser.state = S.OPEN_TAG_SLASH
  1190. } else {
  1191. if (!isWhitespace(c)) {
  1192. strictFail(parser, 'Invalid character in tag name')
  1193. }
  1194. parser.state = S.ATTRIB
  1195. }
  1196. }
  1197. continue
  1198. case S.OPEN_TAG_SLASH:
  1199. if (c === '>') {
  1200. openTag(parser, true)
  1201. closeTag(parser)
  1202. } else {
  1203. strictFail(parser, 'Forward-slash in opening tag not followed by >')
  1204. parser.state = S.ATTRIB
  1205. }
  1206. continue
  1207. case S.ATTRIB:
  1208. // haven't read the attribute name yet.
  1209. if (isWhitespace(c)) {
  1210. continue
  1211. } else if (c === '>') {
  1212. openTag(parser)
  1213. } else if (c === '/') {
  1214. parser.state = S.OPEN_TAG_SLASH
  1215. } else if (isMatch(nameStart, c)) {
  1216. parser.attribName = c
  1217. parser.attribValue = ''
  1218. parser.state = S.ATTRIB_NAME
  1219. } else {
  1220. strictFail(parser, 'Invalid attribute name')
  1221. }
  1222. continue
  1223. case S.ATTRIB_NAME:
  1224. if (c === '=') {
  1225. parser.state = S.ATTRIB_VALUE
  1226. } else if (c === '>') {
  1227. strictFail(parser, 'Attribute without value')
  1228. parser.attribValue = parser.attribName
  1229. attrib(parser)
  1230. openTag(parser)
  1231. } else if (isWhitespace(c)) {
  1232. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1233. } else if (isMatch(nameBody, c)) {
  1234. parser.attribName += c
  1235. } else {
  1236. strictFail(parser, 'Invalid attribute name')
  1237. }
  1238. continue
  1239. case S.ATTRIB_NAME_SAW_WHITE:
  1240. if (c === '=') {
  1241. parser.state = S.ATTRIB_VALUE
  1242. } else if (isWhitespace(c)) {
  1243. continue
  1244. } else {
  1245. strictFail(parser, 'Attribute without value')
  1246. parser.tag.attributes[parser.attribName] = ''
  1247. parser.attribValue = ''
  1248. emitNode(parser, 'onattribute', {
  1249. name: parser.attribName,
  1250. value: ''
  1251. })
  1252. parser.attribName = ''
  1253. if (c === '>') {
  1254. openTag(parser)
  1255. } else if (isMatch(nameStart, c)) {
  1256. parser.attribName = c
  1257. parser.state = S.ATTRIB_NAME
  1258. } else {
  1259. strictFail(parser, 'Invalid attribute name')
  1260. parser.state = S.ATTRIB
  1261. }
  1262. }
  1263. continue
  1264. case S.ATTRIB_VALUE:
  1265. if (isWhitespace(c)) {
  1266. continue
  1267. } else if (isQuote(c)) {
  1268. parser.q = c
  1269. parser.state = S.ATTRIB_VALUE_QUOTED
  1270. } else {
  1271. if (!parser.opt.unquotedAttributeValues) {
  1272. error(parser, 'Unquoted attribute value')
  1273. }
  1274. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1275. parser.attribValue = c
  1276. }
  1277. continue
  1278. case S.ATTRIB_VALUE_QUOTED:
  1279. if (c !== parser.q) {
  1280. if (c === '&') {
  1281. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1282. } else {
  1283. parser.attribValue += c
  1284. }
  1285. continue
  1286. }
  1287. attrib(parser)
  1288. parser.q = ''
  1289. parser.state = S.ATTRIB_VALUE_CLOSED
  1290. continue
  1291. case S.ATTRIB_VALUE_CLOSED:
  1292. if (isWhitespace(c)) {
  1293. parser.state = S.ATTRIB
  1294. } else if (c === '>') {
  1295. openTag(parser)
  1296. } else if (c === '/') {
  1297. parser.state = S.OPEN_TAG_SLASH
  1298. } else if (isMatch(nameStart, c)) {
  1299. strictFail(parser, 'No whitespace between attributes')
  1300. parser.attribName = c
  1301. parser.attribValue = ''
  1302. parser.state = S.ATTRIB_NAME
  1303. } else {
  1304. strictFail(parser, 'Invalid attribute name')
  1305. }
  1306. continue
  1307. case S.ATTRIB_VALUE_UNQUOTED:
  1308. if (!isAttribEnd(c)) {
  1309. if (c === '&') {
  1310. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1311. } else {
  1312. parser.attribValue += c
  1313. }
  1314. continue
  1315. }
  1316. attrib(parser)
  1317. if (c === '>') {
  1318. openTag(parser)
  1319. } else {
  1320. parser.state = S.ATTRIB
  1321. }
  1322. continue
  1323. case S.CLOSE_TAG:
  1324. if (!parser.tagName) {
  1325. if (isWhitespace(c)) {
  1326. continue
  1327. } else if (notMatch(nameStart, c)) {
  1328. if (parser.script) {
  1329. parser.script += '</' + c
  1330. parser.state = S.SCRIPT
  1331. } else {
  1332. strictFail(parser, 'Invalid tagname in closing tag.')
  1333. }
  1334. } else {
  1335. parser.tagName = c
  1336. }
  1337. } else if (c === '>') {
  1338. closeTag(parser)
  1339. } else if (isMatch(nameBody, c)) {
  1340. parser.tagName += c
  1341. } else if (parser.script) {
  1342. parser.script += '</' + parser.tagName
  1343. parser.tagName = ''
  1344. parser.state = S.SCRIPT
  1345. } else {
  1346. if (!isWhitespace(c)) {
  1347. strictFail(parser, 'Invalid tagname in closing tag')
  1348. }
  1349. parser.state = S.CLOSE_TAG_SAW_WHITE
  1350. }
  1351. continue
  1352. case S.CLOSE_TAG_SAW_WHITE:
  1353. if (isWhitespace(c)) {
  1354. continue
  1355. }
  1356. if (c === '>') {
  1357. closeTag(parser)
  1358. } else {
  1359. strictFail(parser, 'Invalid characters in closing tag')
  1360. }
  1361. continue
  1362. case S.TEXT_ENTITY:
  1363. case S.ATTRIB_VALUE_ENTITY_Q:
  1364. case S.ATTRIB_VALUE_ENTITY_U:
  1365. var returnState
  1366. var buffer
  1367. switch (parser.state) {
  1368. case S.TEXT_ENTITY:
  1369. returnState = S.TEXT
  1370. buffer = 'textNode'
  1371. break
  1372. case S.ATTRIB_VALUE_ENTITY_Q:
  1373. returnState = S.ATTRIB_VALUE_QUOTED
  1374. buffer = 'attribValue'
  1375. break
  1376. case S.ATTRIB_VALUE_ENTITY_U:
  1377. returnState = S.ATTRIB_VALUE_UNQUOTED
  1378. buffer = 'attribValue'
  1379. break
  1380. }
  1381. if (c === ';') {
  1382. var parsedEntity = parseEntity(parser)
  1383. if (parser.opt.unparsedEntities && !Object.values(sax.XML_ENTITIES).includes(parsedEntity)) {
  1384. parser.entity = ''
  1385. parser.state = returnState
  1386. parser.write(parsedEntity)
  1387. } else {
  1388. parser[buffer] += parsedEntity
  1389. parser.entity = ''
  1390. parser.state = returnState
  1391. }
  1392. } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
  1393. parser.entity += c
  1394. } else {
  1395. strictFail(parser, 'Invalid character in entity name')
  1396. parser[buffer] += '&' + parser.entity + c
  1397. parser.entity = ''
  1398. parser.state = returnState
  1399. }
  1400. continue
  1401. default: /* istanbul ignore next */ {
  1402. throw new Error(parser, 'Unknown state: ' + parser.state)
  1403. }
  1404. }
  1405. } // while
  1406. if (parser.position >= parser.bufferCheckPosition) {
  1407. checkBufferLength(parser)
  1408. }
  1409. return parser
  1410. }
  1411. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1412. /* istanbul ignore next */
  1413. if (!String.fromCodePoint) {
  1414. (function () {
  1415. var stringFromCharCode = String.fromCharCode
  1416. var floor = Math.floor
  1417. var fromCodePoint = function () {
  1418. var MAX_SIZE = 0x4000
  1419. var codeUnits = []
  1420. var highSurrogate
  1421. var lowSurrogate
  1422. var index = -1
  1423. var length = arguments.length
  1424. if (!length) {
  1425. return ''
  1426. }
  1427. var result = ''
  1428. while (++index < length) {
  1429. var codePoint = Number(arguments[index])
  1430. if (
  1431. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1432. codePoint < 0 || // not a valid Unicode code point
  1433. codePoint > 0x10FFFF || // not a valid Unicode code point
  1434. floor(codePoint) !== codePoint // not an integer
  1435. ) {
  1436. throw RangeError('Invalid code point: ' + codePoint)
  1437. }
  1438. if (codePoint <= 0xFFFF) { // BMP code point
  1439. codeUnits.push(codePoint)
  1440. } else { // Astral code point; split in surrogate halves
  1441. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1442. codePoint -= 0x10000
  1443. highSurrogate = (codePoint >> 10) + 0xD800
  1444. lowSurrogate = (codePoint % 0x400) + 0xDC00
  1445. codeUnits.push(highSurrogate, lowSurrogate)
  1446. }
  1447. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1448. result += stringFromCharCode.apply(null, codeUnits)
  1449. codeUnits.length = 0
  1450. }
  1451. }
  1452. return result
  1453. }
  1454. /* istanbul ignore next */
  1455. if (Object.defineProperty) {
  1456. Object.defineProperty(String, 'fromCodePoint', {
  1457. value: fromCodePoint,
  1458. configurable: true,
  1459. writable: true
  1460. })
  1461. } else {
  1462. String.fromCodePoint = fromCodePoint
  1463. }
  1464. }())
  1465. }
  1466. })(typeof exports === 'undefined' ? this.sax = {} : exports)