diff --git a/.travis.yml b/.travis.yml index 1d022d5..6dd6c69 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: node_js node_js: - - 0.12 + - node # latest stable Node.js release sudo: false diff --git a/README.md b/README.md index 61c6579..d1ab5da 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,11 @@ Citation.find("that term in section 5362(5) of title 31, United States Code.", { }] ``` +### `Citation.fromId(id, options)` + +Returns the same information as `Citation.find`, but instead of passing a textual citation, an identifier +in the format of the `id` given in the output of `Citation.find` (see above) is given. + ## HTTP API Start the API on a given port (defaults to 3000): diff --git a/browser/citation-with-walverine.js b/browser/citation-with-walverine.js index 08f531b..7004f6a 100644 --- a/browser/citation-with-walverine.js +++ b/browser/citation-with-walverine.js @@ -195,6 +195,7 @@ Citation = { // match-level info Citation._.extend(result, matchInfo); + result.type_name = Citation.types[type].name; // handle _submatch, which lets the user-level citator override the // match and index with a sub-part of the whole matched regex @@ -395,8 +396,37 @@ Citation = { return impl(array, []); } - } + }, + fromId: function(id, options) { + // Offer the id to each Citator class that has a fromId + // function. If it returns an object, then it has parsed + // the id into the reverse of the data structure given to + // a Citator 'id' method. + var type; + var citator; + var citeobj; + for (type in Citation.types) { + citator = Citation.types[type]; + if (!citator.fromId) continue; + citeobj = citator.fromId(id); + if (citeobj) break; + } + if (!citeobj) + return; // no parse found + + // Construct the resulting citation object. + var cite = { + type: type, + type_name: citator.name, + citation: citator.canonical ? citator.canonical(citeobj) : null, + }; + cite[type] = citeobj; + cite[type].id = citator.id(citeobj); + if (options && options.links) + cite[type].links = Citation.getLinksForCitation(type, cite[type]); + return cite; + } }; @@ -426,7 +456,6 @@ if (typeof(require) !== "undefined") { Citation.links.govtrack = require("./links/govtrack"); Citation.links.gpo = require("./links/gpo"); Citation.links.house = require("./links/house"); - Citation.links.legislink = require("./links/legislink"); Citation.links.libraryofcongress = require("./links/libraryofcongress"); Citation.links.nara = require("./links/nara"); Citation.links.vadecoded = require("./links/vadecoded"); @@ -440,10 +469,12 @@ return Citation; })(); -},{"./citations/cfr":2,"./citations/dc_code":3,"./citations/dc_law":4,"./citations/dc_register":5,"./citations/dc_stat":6,"./citations/fedreg":7,"./citations/law":9,"./citations/reporter":10,"./citations/stat":11,"./citations/usc":12,"./citations/usconst":13,"./citations/va_code":14,"./filters/lines":16,"./filters/xpath_html":17,"./filters/xpath_xml":18,"./links/cornell_lii":19,"./links/courtlistener":20,"./links/dc_council":21,"./links/govtrack":22,"./links/gpo":23,"./links/house":24,"./links/legislink":25,"./links/libraryofcongress":26,"./links/nara":27,"./links/vadecoded":28}],2:[function(require,module,exports){ +},{"./citations/cfr":2,"./citations/dc_code":3,"./citations/dc_law":4,"./citations/dc_register":5,"./citations/dc_stat":6,"./citations/fedreg":7,"./citations/law":9,"./citations/reporter":10,"./citations/stat":11,"./citations/usc":12,"./citations/usconst":13,"./citations/va_code":14,"./filters/lines":16,"./filters/xpath_html":17,"./filters/xpath_xml":18,"./links/cornell_lii":19,"./links/courtlistener":20,"./links/dc_council":21,"./links/govtrack":22,"./links/gpo":23,"./links/house":24,"./links/libraryofcongress":25,"./links/nara":26,"./links/vadecoded":27}],2:[function(require,module,exports){ module.exports = { type: "regex", + name: "U.S. Code of Federal Regulations", + id: function(data) { return ["cfr", data.title, (data.section || data.part)] .concat(data.subsections || []) @@ -543,6 +574,8 @@ var base_regex = module.exports = { type: "regex", + name: "Code of the District of Columbia", + // normalize all cites to an ID, with and without subsections id: function(cite) { return ["dc-code", cite.title, cite.section] @@ -640,6 +673,8 @@ function split_subsections(match) { module.exports = { type: "regex", + name: "District of Columbia Law", + id: function(cite) { return ["dc-law", cite.period, cite.number].join("/"); }, @@ -675,6 +710,8 @@ module.exports = { module.exports = { type: "regex", + name: "District of Columbia Register", + id: function(cite) { return ["dc-register", cite.volume, cite.page].join("/"); }, @@ -701,6 +738,8 @@ module.exports = { module.exports = { type: "regex", + name: "D.C. Statutes at Large", + // normalize all cites to an ID id: function(cite) { return ["dcstat", cite.volume, cite.page].join("/") @@ -728,6 +767,8 @@ module.exports = { module.exports = { type: "regex", + name: "Federal Register", + // normalize all cites to an ID id: function(cite) { return ["fedreg", cite.volume, cite.page].join("/") @@ -772,16 +813,30 @@ module.exports = { }); } }; -},{"walverine":70}],9:[function(require,module,exports){ +},{"walverine":69}],9:[function(require,module,exports){ + module.exports = { type: "regex", + name: "U.S. Law", + id: function(cite) { return ["us-law", cite.type, cite.congress, cite.number] .concat(cite.sections || []) .join("/"); }, + fromId: function(id) { + var parts = id.split("/"); + if (parts[0] != "us-law") return; + return { + type: parts[1], + congress: parts[2], + number: parts[3], + sections: parts.slice(4) || undefined + }; + }, + canonical: function(cite) { if (!cite.sections || cite.sections.length == 0) // this style matches GPO at http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW&browsePath=112&isCollapsed=false&leafLevelBrowse=false&ycord=0 @@ -853,6 +908,8 @@ module.exports = { module.exports = { type: "regex", + name: "Case Law", + // normalize all cites to an ID id: function(cite) { return ["reporter", cite.volume, cite.reporter, cite.page].join("/") @@ -884,11 +941,22 @@ module.exports = { module.exports = { type: "regex", + name: "U.S. Statutes at Large", + // normalize all cites to an ID id: function(cite) { return ["stat", cite.volume, cite.page].join("/") }, + fromId: function(id) { + var parts = id.split("/"); + if (parts[0] != "stat") return; + return { + volume: parts[1], + page: parts[2] + }; + }, + canonical: function(cite) { return cite.volume + " Stat. " + cite.page; }, @@ -916,6 +984,8 @@ module.exports = { module.exports = { type: "regex", + name: "United States Code", + id: function(cite) { return ["usc", cite.title, cite.section] .concat(cite.subsections || []) @@ -1081,6 +1151,8 @@ var part_types = { module.exports = { type: "regex", + name: "United States Constitution", + // normalize all cites to an ID id: function(cite) { return ["usconst"].concat((cite.part || []).map(function(part) { @@ -1140,10 +1212,12 @@ function process_part(part) { } -},{"nomar":35}],14:[function(require,module,exports){ +},{"nomar":34}],14:[function(require,module,exports){ module.exports = { type: "regex", + name: "Code of Virginia", + id: function(data) { return ["va-code", data.title, data.section].join("/"); }, @@ -1283,7 +1357,7 @@ module.exports = { }; -},{"parse5":41}],18:[function(require,module,exports){ +},{"parse5":40}],18:[function(require,module,exports){ var DOMParser = require("xmldom").DOMParser; function recurse(node, partialXpath, extract) { @@ -1350,7 +1424,7 @@ module.exports = { }; -},{"xmldom":71}],19:[function(require,module,exports){ +},{"xmldom":70}],19:[function(require,module,exports){ module.exports = { id: "cornell_lii", @@ -1369,7 +1443,7 @@ module.exports = { var subsections = (cite.subsections.slice() || []); // clone if (subsections.length && subsections[subsections.length-1] == "et-seq") subsections.pop(); // don't include eq-seq in a link return { - landing: "https://www.law.cornell.edu/uscode/text/" + (title + (is_appendix ? "a" : "")) + html: "https://www.law.cornell.edu/uscode/text/" + (title + (is_appendix ? "a" : "")) + "/" + cite.section + (subsections.length ? ("#" + subsections.join("_")) : ""), note: "Link is to most current version of the US Code, as available at law.cornell.edu." @@ -1424,12 +1498,12 @@ module.exports = { citations: { dc_law: function(cite) { return { - landing: "https://beta.code.dccouncil.us/dc/council/laws/" + cite.period + "-" + cite.number + ".html" + landing: "https://code.dccouncil.us/dc/council/laws/" + cite.period + "-" + cite.number + ".html" }; }, dc_code: function(cite) { return { - landing: "https://beta.code.dccouncil.us/dc/council/code/sections/" + cite.title + "-" + cite.section + ".html" + landing: "https://code.dccouncil.us/dc/council/code/sections/" + cite.title + "-" + cite.section + ".html" }; } } @@ -1461,85 +1535,69 @@ module.exports = { name: "U.S. Government Publishing Office", abbreviation: "US GPO", - link: "https://www.gpo.gov", + link: "https://govinfo.gov", authoritative: true, citations: { cfr: function(cite) { - var gpo_url = "http://api.fdsys.gov/link?collection=cfr&year=mostrecent" - + "&titlenum=" + cite.title + "&partnum=" + cite.part; + var usgpo_url = "https://www.govinfo.gov/link/cfr/" + cite.title + "/" + cite.part; if (cite.section) // section, if present, is of the form PART.SECTION, and for the GPO url only include the (inner) section - gpo_url += "§ionnum=" + cite.section.substring(cite.part.length+1) + ""; + usgpo_url += "?sectionnum=" + cite.section.substring(cite.part.length+1) + ""; + else + usgpo_url += "?"; return { - pdf: gpo_url + landing: usgpo_url + "&link-type=details", + pdf: usgpo_url + "&link-type=pdf", + mods: usgpo_url + "&link-type=mods" }; }, fedreg: function(cite) { + var usgpo_url = "https://www.govinfo.gov/link/fr/" + cite.volume + "/" + cite.page; return { - pdf: "http://api.fdsys.gov/link?collection=fr&volume=" + cite.volume + "&page=" + cite.page + landing: usgpo_url + "?link-type=details", + pdf: usgpo_url + "?link-type=pdf", + mods: usgpo_url + "?link-type=mods" }; }, law: function(cite) { if (cite.congress < 104) return null; + var usgpo_url = "https://www.govinfo.gov/link/plaw/" + cite.congress + "/" + cite.type + "/" + cite.number; return { - pdf: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number, - mods: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number + "&link-type=mods" + landing: usgpo_url + "?link-type=details", + pdf: usgpo_url + "?link-type=pdf", + mods: usgpo_url + "?link-type=mods" }; }, stat: function(cite) { if (cite.volume < 65 || cite.volume > 125) return null; - var usgpo_url = "http://api.fdsys.gov/link?collection=statute&volume=" + cite.volume + "&page=" + cite.page; + var usgpo_url = "https://www.govinfo.gov/link/statute/" + cite.volume + "/" + cite.page; return { - pdf: usgpo_url, - mods: usgpo_url + "&link-type=mods" + landing: usgpo_url + "?link-type=details", + pdf: usgpo_url + "?link-type=pdf", + mods: usgpo_url + "?link-type=mods" }; }, usc: function(cite) { var title = cite.title.replace(/-app$/, ''); var is_appendix = cite.title.indexOf("-app") != -1; - - var edition; - for (var i = 0; i < us_code_editions.length; i++) { - if (us_code_editions[i].titles == null || us_code_editions[i].titles.indexOf(title) >= 0) { - // This edition contains the title. - edition = us_code_editions[i] - break; - } - } - - if (!edition) return null; - - var url = "http://api.fdsys.gov/link?collection=uscode&year=" - + edition.edition + "&title=" + title - + "§ion=" + cite.section - + "&type=" + (!is_appendix ? "usc" : "uscappendix"); - + var usgpo_url = "https://www.govinfo.gov/link/uscode/" + title + "/" + cite.section + + "?type=" + (!is_appendix ? "usc" : "uscappendix"); return { - pdf: url, - html: url + "&link-type=html", - landing: url + "&link-type=contentdetail", - note: edition.edition + " edition." + ((cite.subsections && cite.subsections.length) ? " Sub-section citation is not reflected in the link." : "") + landing: usgpo_url + "&link-type=details", + pdf: usgpo_url + "&link-type=pdf", + mods: usgpo_url + "&link-type=mods" }; } } } -// Map published editions of the US Code to the titles they contain. Not all -// published editions have the full US Code. Some are updates. This is per -// http://www.gpo.gov/fdsys/browse/collectionUScode.action?collectionCode=USCODE. -// Most recent first. -var us_code_editions = [ - { edition: '2014', titles: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'] }, - { edition: '2013', titles: null }, // all titles available in this edition -]; - },{}],24:[function(require,module,exports){ module.exports = { id: "house", @@ -1556,52 +1614,13 @@ module.exports = { var is_appendix = cite.title.indexOf("-app") != -1; return { note: "Link is to most current version of the US Code.", - html: "http://uscode.house.gov/view.xhtml?req=(" + encodeURIComponent("title:" + (title + (is_appendix ? "a" : "")) + " section:" + cite.section + " edition:prelim") + ")" + html: "https://uscode.house.gov/view.xhtml?req=(" + encodeURIComponent("title:" + (title + (is_appendix ? "a" : "")) + " section:" + cite.section + " edition:prelim") + ")" } } } } },{}],25:[function(require,module,exports){ -module.exports = { - id: "legislink", - - name: "Legislink", - abbreviation: "Legislink", - link: "http://legislink.org/us", - - authoritative: false, - - citations: { - stat: function(cite) { - var legislink_url = "http://legislink.org/us/stat-" + cite.volume + "-" + cite.page; - - // the format differs depending on the volume, and where it is a simple - // redirect to US GPO (and not hosted content) then we can note that. - if (cite.volume >= 125) { - // hosted content is a mirror of US GPO Public and Private Laws in text format - return { - text: legislink_url - }; - - } else if (cite.volume >= 65) { - // redirect to US GPO (so same content as the usgpo link) - return { - pdf: legislink_url, - note: "Link redirects to US GPO Statutes at Large." - }; - - } else { - // original content - return { - pdf: legislink_url - }; - } - } - } -} - -},{}],26:[function(require,module,exports){ module.exports = { id: "libraryofcongress", @@ -1612,6 +1631,7 @@ module.exports = { authoritative: true, citations: { + /* stat: function(cite) { // LoC organizes the volumes by Congress and, for some Congresses, by chapter // number. This is well and good but awful for direct linking of citations @@ -1624,6 +1644,7 @@ module.exports = { note: "Link is to LoC's general Statutes at Large landing page." }; }, + */ usconst: function(cite) { return { @@ -1683,7 +1704,7 @@ function get_conan_link(cite) { } return null; } -},{}],27:[function(require,module,exports){ +},{}],26:[function(require,module,exports){ module.exports = { id: "nara", @@ -1696,13 +1717,13 @@ module.exports = { citations: { usconst: function(cite) { return { - landing: "http://www.archives.gov/exhibits/charters/constitution_transcript.html" + html: "https://www.archives.gov/founding-docs/constitution-transcript" } } } } -},{}],28:[function(require,module,exports){ +},{}],27:[function(require,module,exports){ module.exports = { id: "vadecoded", @@ -1721,7 +1742,7 @@ module.exports = { } }; -},{}],29:[function(require,module,exports){ +},{}],28:[function(require,module,exports){ var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; ;(function (exports) { @@ -1847,7 +1868,7 @@ var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; exports.fromByteArray = uint8ToBase64 }(typeof exports === 'undefined' ? (this.base64js = {}) : exports)) -},{}],30:[function(require,module,exports){ +},{}],29:[function(require,module,exports){ // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a @@ -2040,7 +2061,7 @@ function base64DetectIncompleteChar(buffer) { return incomplete; } -},{"buffer":31}],31:[function(require,module,exports){ +},{"buffer":30}],30:[function(require,module,exports){ /*! * The buffer module from node.js, for the browser. * @@ -3151,7 +3172,7 @@ function assert (test, message) { if (!test) throw new Error(message || 'Failed assertion') } -},{"base64-js":29,"ieee754":33}],32:[function(require,module,exports){ +},{"base64-js":28,"ieee754":32}],31:[function(require,module,exports){ // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a @@ -3454,10 +3475,10 @@ function isUndefined(arg) { return arg === void 0; } -},{}],33:[function(require,module,exports){ +},{}],32:[function(require,module,exports){ exports.read = function (buffer, offset, isLE, mLen, nBytes) { var e, m - var eLen = nBytes * 8 - mLen - 1 + var eLen = (nBytes * 8) - mLen - 1 var eMax = (1 << eLen) - 1 var eBias = eMax >> 1 var nBits = -7 @@ -3470,12 +3491,12 @@ exports.read = function (buffer, offset, isLE, mLen, nBytes) { e = s & ((1 << (-nBits)) - 1) s >>= (-nBits) nBits += eLen - for (; nBits > 0; e = e * 256 + buffer[offset + i], i += d, nBits -= 8) {} + for (; nBits > 0; e = (e * 256) + buffer[offset + i], i += d, nBits -= 8) {} m = e & ((1 << (-nBits)) - 1) e >>= (-nBits) nBits += mLen - for (; nBits > 0; m = m * 256 + buffer[offset + i], i += d, nBits -= 8) {} + for (; nBits > 0; m = (m * 256) + buffer[offset + i], i += d, nBits -= 8) {} if (e === 0) { e = 1 - eBias @@ -3490,7 +3511,7 @@ exports.read = function (buffer, offset, isLE, mLen, nBytes) { exports.write = function (buffer, value, offset, isLE, mLen, nBytes) { var e, m, c - var eLen = nBytes * 8 - mLen - 1 + var eLen = (nBytes * 8) - mLen - 1 var eMax = (1 << eLen) - 1 var eBias = eMax >> 1 var rt = (mLen === 23 ? Math.pow(2, -24) - Math.pow(2, -77) : 0) @@ -3523,7 +3544,7 @@ exports.write = function (buffer, value, offset, isLE, mLen, nBytes) { m = 0 e = eMax } else if (e + eBias >= 1) { - m = (value * c - 1) * Math.pow(2, mLen) + m = ((value * c) - 1) * Math.pow(2, mLen) e = e + eBias } else { m = value * Math.pow(2, eBias - 1) * Math.pow(2, mLen) @@ -3540,32 +3561,36 @@ exports.write = function (buffer, value, offset, isLE, mLen, nBytes) { buffer[offset + i - d] |= s * 128 } -},{}],34:[function(require,module,exports){ +},{}],33:[function(require,module,exports){ if (typeof Object.create === 'function') { // implementation from standard node.js 'util' module module.exports = function inherits(ctor, superCtor) { - ctor.super_ = superCtor - ctor.prototype = Object.create(superCtor.prototype, { - constructor: { - value: ctor, - enumerable: false, - writable: true, - configurable: true - } - }); + if (superCtor) { + ctor.super_ = superCtor + ctor.prototype = Object.create(superCtor.prototype, { + constructor: { + value: ctor, + enumerable: false, + writable: true, + configurable: true + } + }) + } }; } else { // old school shim for old browsers module.exports = function inherits(ctor, superCtor) { - ctor.super_ = superCtor - var TempCtor = function () {} - TempCtor.prototype = superCtor.prototype - ctor.prototype = new TempCtor() - ctor.prototype.constructor = ctor + if (superCtor) { + ctor.super_ = superCtor + var TempCtor = function () {} + TempCtor.prototype = superCtor.prototype + ctor.prototype = new TempCtor() + ctor.prototype.constructor = ctor + } } } -},{}],35:[function(require,module,exports){ +},{}],34:[function(require,module,exports){ 'use strict'; var SYMBOLS = { @@ -3681,7 +3706,7 @@ module.exports = function (input) { return convert(input); }; -},{}],36:[function(require,module,exports){ +},{}],35:[function(require,module,exports){ 'use strict'; //Const @@ -3805,7 +3830,7 @@ exports.isQuirks = function (name, publicId, systemId) { exports.serializeContent = function (name, publicId, systemId) { var str = '!DOCTYPE '; - if (name) + if(name) str += name; if (publicId !== null) @@ -3820,269 +3845,259 @@ exports.serializeContent = function (name, publicId, systemId) { return str; }; -},{}],37:[function(require,module,exports){ -'use strict'; - -var Tokenizer = require('../tokenizer'), - HTML = require('./html'); - -//Aliases -var $ = HTML.TAG_NAMES, - NS = HTML.NAMESPACES, - ATTRS = HTML.ATTRS; - - -//MIME types -var MIME_TYPES = { - TEXT_HTML: 'text/html', - APPLICATION_XML: 'application/xhtml+xml' -}; - -//Attributes -var DEFINITION_URL_ATTR = 'definitionurl', - ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL', - SVG_ATTRS_ADJUSTMENT_MAP = { - 'attributename': 'attributeName', - 'attributetype': 'attributeType', - 'basefrequency': 'baseFrequency', - 'baseprofile': 'baseProfile', - 'calcmode': 'calcMode', - 'clippathunits': 'clipPathUnits', - 'diffuseconstant': 'diffuseConstant', - 'edgemode': 'edgeMode', - 'filterunits': 'filterUnits', - 'glyphref': 'glyphRef', - 'gradienttransform': 'gradientTransform', - 'gradientunits': 'gradientUnits', - 'kernelmatrix': 'kernelMatrix', - 'kernelunitlength': 'kernelUnitLength', - 'keypoints': 'keyPoints', - 'keysplines': 'keySplines', - 'keytimes': 'keyTimes', - 'lengthadjust': 'lengthAdjust', - 'limitingconeangle': 'limitingConeAngle', - 'markerheight': 'markerHeight', - 'markerunits': 'markerUnits', - 'markerwidth': 'markerWidth', - 'maskcontentunits': 'maskContentUnits', - 'maskunits': 'maskUnits', - 'numoctaves': 'numOctaves', - 'pathlength': 'pathLength', - 'patterncontentunits': 'patternContentUnits', - 'patterntransform': 'patternTransform', - 'patternunits': 'patternUnits', - 'pointsatx': 'pointsAtX', - 'pointsaty': 'pointsAtY', - 'pointsatz': 'pointsAtZ', - 'preservealpha': 'preserveAlpha', - 'preserveaspectratio': 'preserveAspectRatio', - 'primitiveunits': 'primitiveUnits', - 'refx': 'refX', - 'refy': 'refY', - 'repeatcount': 'repeatCount', - 'repeatdur': 'repeatDur', - 'requiredextensions': 'requiredExtensions', - 'requiredfeatures': 'requiredFeatures', - 'specularconstant': 'specularConstant', - 'specularexponent': 'specularExponent', - 'spreadmethod': 'spreadMethod', - 'startoffset': 'startOffset', - 'stddeviation': 'stdDeviation', - 'stitchtiles': 'stitchTiles', - 'surfacescale': 'surfaceScale', - 'systemlanguage': 'systemLanguage', - 'tablevalues': 'tableValues', - 'targetx': 'targetX', - 'targety': 'targetY', - 'textlength': 'textLength', - 'viewbox': 'viewBox', - 'viewtarget': 'viewTarget', - 'xchannelselector': 'xChannelSelector', - 'ychannelselector': 'yChannelSelector', - 'zoomandpan': 'zoomAndPan' - }, - XML_ATTRS_ADJUSTMENT_MAP = { - 'xlink:actuate': {prefix: 'xlink', name: 'actuate', namespace: NS.XLINK}, - 'xlink:arcrole': {prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK}, - 'xlink:href': {prefix: 'xlink', name: 'href', namespace: NS.XLINK}, - 'xlink:role': {prefix: 'xlink', name: 'role', namespace: NS.XLINK}, - 'xlink:show': {prefix: 'xlink', name: 'show', namespace: NS.XLINK}, - 'xlink:title': {prefix: 'xlink', name: 'title', namespace: NS.XLINK}, - 'xlink:type': {prefix: 'xlink', name: 'type', namespace: NS.XLINK}, - 'xml:base': {prefix: 'xml', name: 'base', namespace: NS.XML}, - 'xml:lang': {prefix: 'xml', name: 'lang', namespace: NS.XML}, - 'xml:space': {prefix: 'xml', name: 'space', namespace: NS.XML}, - 'xmlns': {prefix: '', name: 'xmlns', namespace: NS.XMLNS}, - 'xmlns:xlink': {prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS} - - }; - -//SVG tag names adjustment map -var SVG_TAG_NAMES_ADJUSTMENT_MAP = exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = { - 'altglyph': 'altGlyph', - 'altglyphdef': 'altGlyphDef', - 'altglyphitem': 'altGlyphItem', - 'animatecolor': 'animateColor', - 'animatemotion': 'animateMotion', - 'animatetransform': 'animateTransform', - 'clippath': 'clipPath', - 'feblend': 'feBlend', - 'fecolormatrix': 'feColorMatrix', - 'fecomponenttransfer': 'feComponentTransfer', - 'fecomposite': 'feComposite', - 'feconvolvematrix': 'feConvolveMatrix', - 'fediffuselighting': 'feDiffuseLighting', - 'fedisplacementmap': 'feDisplacementMap', - 'fedistantlight': 'feDistantLight', - 'feflood': 'feFlood', - 'fefunca': 'feFuncA', - 'fefuncb': 'feFuncB', - 'fefuncg': 'feFuncG', - 'fefuncr': 'feFuncR', - 'fegaussianblur': 'feGaussianBlur', - 'feimage': 'feImage', - 'femerge': 'feMerge', - 'femergenode': 'feMergeNode', - 'femorphology': 'feMorphology', - 'feoffset': 'feOffset', - 'fepointlight': 'fePointLight', - 'fespecularlighting': 'feSpecularLighting', - 'fespotlight': 'feSpotLight', - 'fetile': 'feTile', - 'feturbulence': 'feTurbulence', - 'foreignobject': 'foreignObject', - 'glyphref': 'glyphRef', - 'lineargradient': 'linearGradient', - 'radialgradient': 'radialGradient', - 'textpath': 'textPath' -}; - -//Tags that causes exit from foreign content -var EXITS_FOREIGN_CONTENT = {}; - -EXITS_FOREIGN_CONTENT[$.B] = true; -EXITS_FOREIGN_CONTENT[$.BIG] = true; -EXITS_FOREIGN_CONTENT[$.BLOCKQUOTE] = true; -EXITS_FOREIGN_CONTENT[$.BODY] = true; -EXITS_FOREIGN_CONTENT[$.BR] = true; -EXITS_FOREIGN_CONTENT[$.CENTER] = true; -EXITS_FOREIGN_CONTENT[$.CODE] = true; -EXITS_FOREIGN_CONTENT[$.DD] = true; -EXITS_FOREIGN_CONTENT[$.DIV] = true; -EXITS_FOREIGN_CONTENT[$.DL] = true; -EXITS_FOREIGN_CONTENT[$.DT] = true; -EXITS_FOREIGN_CONTENT[$.EM] = true; -EXITS_FOREIGN_CONTENT[$.EMBED] = true; -EXITS_FOREIGN_CONTENT[$.H1] = true; -EXITS_FOREIGN_CONTENT[$.H2] = true; -EXITS_FOREIGN_CONTENT[$.H3] = true; -EXITS_FOREIGN_CONTENT[$.H4] = true; -EXITS_FOREIGN_CONTENT[$.H5] = true; -EXITS_FOREIGN_CONTENT[$.H6] = true; -EXITS_FOREIGN_CONTENT[$.HEAD] = true; -EXITS_FOREIGN_CONTENT[$.HR] = true; -EXITS_FOREIGN_CONTENT[$.I] = true; -EXITS_FOREIGN_CONTENT[$.IMG] = true; -EXITS_FOREIGN_CONTENT[$.LI] = true; -EXITS_FOREIGN_CONTENT[$.LISTING] = true; -EXITS_FOREIGN_CONTENT[$.MENU] = true; -EXITS_FOREIGN_CONTENT[$.META] = true; -EXITS_FOREIGN_CONTENT[$.NOBR] = true; -EXITS_FOREIGN_CONTENT[$.OL] = true; -EXITS_FOREIGN_CONTENT[$.P] = true; -EXITS_FOREIGN_CONTENT[$.PRE] = true; -EXITS_FOREIGN_CONTENT[$.RUBY] = true; -EXITS_FOREIGN_CONTENT[$.S] = true; -EXITS_FOREIGN_CONTENT[$.SMALL] = true; -EXITS_FOREIGN_CONTENT[$.SPAN] = true; -EXITS_FOREIGN_CONTENT[$.STRONG] = true; -EXITS_FOREIGN_CONTENT[$.STRIKE] = true; -EXITS_FOREIGN_CONTENT[$.SUB] = true; -EXITS_FOREIGN_CONTENT[$.SUP] = true; -EXITS_FOREIGN_CONTENT[$.TABLE] = true; -EXITS_FOREIGN_CONTENT[$.TT] = true; -EXITS_FOREIGN_CONTENT[$.U] = true; -EXITS_FOREIGN_CONTENT[$.UL] = true; -EXITS_FOREIGN_CONTENT[$.VAR] = true; - -//Check exit from foreign content -exports.causesExit = function (startTagToken) { - var tn = startTagToken.tagName; - var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null || - Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null || - Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null); - - return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn]; -}; - -//Token adjustments -exports.adjustTokenMathMLAttrs = function (token) { - for (var i = 0; i < token.attrs.length; i++) { - if (token.attrs[i].name === DEFINITION_URL_ATTR) { - token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; - break; - } - } -}; - -exports.adjustTokenSVGAttrs = function (token) { - for (var i = 0; i < token.attrs.length; i++) { - var adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; - - if (adjustedAttrName) - token.attrs[i].name = adjustedAttrName; - } -}; - -exports.adjustTokenXMLAttrs = function (token) { - for (var i = 0; i < token.attrs.length; i++) { - var adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; - - if (adjustedAttrEntry) { - token.attrs[i].prefix = adjustedAttrEntry.prefix; - token.attrs[i].name = adjustedAttrEntry.name; - token.attrs[i].namespace = adjustedAttrEntry.namespace; - } - } -}; - -exports.adjustTokenSVGTagName = function (token) { - var adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName]; - - if (adjustedTagName) - token.tagName = adjustedTagName; -}; - -//Integration points -function isMathMLTextIntegrationPoint(tn, ns) { - return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT); -} - -function isHtmlIntegrationPoint(tn, ns, attrs) { - if (ns === NS.MATHML && tn === $.ANNOTATION_XML) { - for (var i = 0; i < attrs.length; i++) { - if (attrs[i].name === ATTRS.ENCODING) { - var value = attrs[i].value.toLowerCase(); - - return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; - } - } - } - - return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE); -} - -exports.isIntegrationPoint = function (tn, ns, attrs, foreignNS) { - if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) - return true; - - if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) - return true; - - return false; -}; +},{}],36:[function(require,module,exports){ +'use strict'; + +var Tokenizer = require('../tokenizer'), + HTML = require('./html'); + +//Aliases +var $ = HTML.TAG_NAMES, + NS = HTML.NAMESPACES, + ATTRS = HTML.ATTRS; + + +//MIME types +var MIME_TYPES = { + TEXT_HTML: 'text/html', + APPLICATION_XML: 'application/xhtml+xml' +}; + +//Attributes +var DEFINITION_URL_ATTR = 'definitionurl', + ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL', + SVG_ATTRS_ADJUSTMENT_MAP = { + 'attributename': 'attributeName', + 'attributetype': 'attributeType', + 'basefrequency': 'baseFrequency', + 'baseprofile': 'baseProfile', + 'calcmode': 'calcMode', + 'clippathunits': 'clipPathUnits', + 'diffuseconstant': 'diffuseConstant', + 'edgemode': 'edgeMode', + 'filterunits': 'filterUnits', + 'glyphref': 'glyphRef', + 'gradienttransform': 'gradientTransform', + 'gradientunits': 'gradientUnits', + 'kernelmatrix': 'kernelMatrix', + 'kernelunitlength': 'kernelUnitLength', + 'keypoints': 'keyPoints', + 'keysplines': 'keySplines', + 'keytimes': 'keyTimes', + 'lengthadjust': 'lengthAdjust', + 'limitingconeangle': 'limitingConeAngle', + 'markerheight': 'markerHeight', + 'markerunits': 'markerUnits', + 'markerwidth': 'markerWidth', + 'maskcontentunits': 'maskContentUnits', + 'maskunits': 'maskUnits', + 'numoctaves': 'numOctaves', + 'pathlength': 'pathLength', + 'patterncontentunits': 'patternContentUnits', + 'patterntransform': 'patternTransform', + 'patternunits': 'patternUnits', + 'pointsatx': 'pointsAtX', + 'pointsaty': 'pointsAtY', + 'pointsatz': 'pointsAtZ', + 'preservealpha': 'preserveAlpha', + 'preserveaspectratio': 'preserveAspectRatio', + 'primitiveunits': 'primitiveUnits', + 'refx': 'refX', + 'refy': 'refY', + 'repeatcount': 'repeatCount', + 'repeatdur': 'repeatDur', + 'requiredextensions': 'requiredExtensions', + 'requiredfeatures': 'requiredFeatures', + 'specularconstant': 'specularConstant', + 'specularexponent': 'specularExponent', + 'spreadmethod': 'spreadMethod', + 'startoffset': 'startOffset', + 'stddeviation': 'stdDeviation', + 'stitchtiles': 'stitchTiles', + 'surfacescale': 'surfaceScale', + 'systemlanguage': 'systemLanguage', + 'tablevalues': 'tableValues', + 'targetx': 'targetX', + 'targety': 'targetY', + 'textlength': 'textLength', + 'viewbox': 'viewBox', + 'viewtarget': 'viewTarget', + 'xchannelselector': 'xChannelSelector', + 'ychannelselector': 'yChannelSelector', + 'zoomandpan': 'zoomAndPan' + }, + XML_ATTRS_ADJUSTMENT_MAP = { + 'xlink:actuate': {prefix: 'xlink', name: 'actuate', namespace: NS.XLINK}, + 'xlink:arcrole': {prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK}, + 'xlink:href': {prefix: 'xlink', name: 'href', namespace: NS.XLINK}, + 'xlink:role': {prefix: 'xlink', name: 'role', namespace: NS.XLINK}, + 'xlink:show': {prefix: 'xlink', name: 'show', namespace: NS.XLINK}, + 'xlink:title': {prefix: 'xlink', name: 'title', namespace: NS.XLINK}, + 'xlink:type': {prefix: 'xlink', name: 'type', namespace: NS.XLINK}, + 'xml:base': {prefix: 'xml', name: 'base', namespace: NS.XML}, + 'xml:lang': {prefix: 'xml', name: 'lang', namespace: NS.XML}, + 'xml:space': {prefix: 'xml', name: 'space', namespace: NS.XML}, + 'xmlns': {prefix: '', name: 'xmlns', namespace: NS.XMLNS}, + 'xmlns:xlink': {prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS} + + }; + +//SVG tag names adjustment map +var SVG_TAG_NAMES_ADJUSTMENT_MAP = { + 'altglyph': 'altGlyph', + 'altglyphdef': 'altGlyphDef', + 'altglyphitem': 'altGlyphItem', + 'animatecolor': 'animateColor', + 'animatemotion': 'animateMotion', + 'animatetransform': 'animateTransform', + 'clippath': 'clipPath', + 'feblend': 'feBlend', + 'fecolormatrix': 'feColorMatrix', + 'fecomponenttransfer': 'feComponentTransfer', + 'fecomposite': 'feComposite', + 'feconvolvematrix': 'feConvolveMatrix', + 'fediffuselighting': 'feDiffuseLighting', + 'fedisplacementmap': 'feDisplacementMap', + 'fedistantlight': 'feDistantLight', + 'feflood': 'feFlood', + 'fefunca': 'feFuncA', + 'fefuncb': 'feFuncB', + 'fefuncg': 'feFuncG', + 'fefuncr': 'feFuncR', + 'fegaussianblur': 'feGaussianBlur', + 'feimage': 'feImage', + 'femerge': 'feMerge', + 'femergenode': 'feMergeNode', + 'femorphology': 'feMorphology', + 'feoffset': 'feOffset', + 'fepointlight': 'fePointLight', + 'fespecularlighting': 'feSpecularLighting', + 'fespotlight': 'feSpotLight', + 'fetile': 'feTile', + 'feturbulence': 'feTurbulence', + 'foreignobject': 'foreignObject', + 'glyphref': 'glyphRef', + 'lineargradient': 'linearGradient', + 'radialgradient': 'radialGradient', + 'textpath': 'textPath' +}; + +//Tags that causes exit from foreign content +var EXITS_FOREIGN_CONTENT = {}; + +EXITS_FOREIGN_CONTENT[$.B] = true; +EXITS_FOREIGN_CONTENT[$.BIG] = true; +EXITS_FOREIGN_CONTENT[$.BLOCKQUOTE] = true; +EXITS_FOREIGN_CONTENT[$.BODY] = true; +EXITS_FOREIGN_CONTENT[$.BR] = true; +EXITS_FOREIGN_CONTENT[$.CENTER] = true; +EXITS_FOREIGN_CONTENT[$.CODE] = true; +EXITS_FOREIGN_CONTENT[$.DD] = true; +EXITS_FOREIGN_CONTENT[$.DIV] = true; +EXITS_FOREIGN_CONTENT[$.DL] = true; +EXITS_FOREIGN_CONTENT[$.DT] = true; +EXITS_FOREIGN_CONTENT[$.EM] = true; +EXITS_FOREIGN_CONTENT[$.EMBED] = true; +EXITS_FOREIGN_CONTENT[$.H1] = true; +EXITS_FOREIGN_CONTENT[$.H2] = true; +EXITS_FOREIGN_CONTENT[$.H3] = true; +EXITS_FOREIGN_CONTENT[$.H4] = true; +EXITS_FOREIGN_CONTENT[$.H5] = true; +EXITS_FOREIGN_CONTENT[$.H6] = true; +EXITS_FOREIGN_CONTENT[$.HEAD] = true; +EXITS_FOREIGN_CONTENT[$.HR] = true; +EXITS_FOREIGN_CONTENT[$.I] = true; +EXITS_FOREIGN_CONTENT[$.IMG] = true; +EXITS_FOREIGN_CONTENT[$.LI] = true; +EXITS_FOREIGN_CONTENT[$.LISTING] = true; +EXITS_FOREIGN_CONTENT[$.MENU] = true; +EXITS_FOREIGN_CONTENT[$.META] = true; +EXITS_FOREIGN_CONTENT[$.NOBR] = true; +EXITS_FOREIGN_CONTENT[$.OL] = true; +EXITS_FOREIGN_CONTENT[$.P] = true; +EXITS_FOREIGN_CONTENT[$.PRE] = true; +EXITS_FOREIGN_CONTENT[$.RUBY] = true; +EXITS_FOREIGN_CONTENT[$.S] = true; +EXITS_FOREIGN_CONTENT[$.SMALL] = true; +EXITS_FOREIGN_CONTENT[$.SPAN] = true; +EXITS_FOREIGN_CONTENT[$.STRONG] = true; +EXITS_FOREIGN_CONTENT[$.STRIKE] = true; +EXITS_FOREIGN_CONTENT[$.SUB] = true; +EXITS_FOREIGN_CONTENT[$.SUP] = true; +EXITS_FOREIGN_CONTENT[$.TABLE] = true; +EXITS_FOREIGN_CONTENT[$.TT] = true; +EXITS_FOREIGN_CONTENT[$.U] = true; +EXITS_FOREIGN_CONTENT[$.UL] = true; +EXITS_FOREIGN_CONTENT[$.VAR] = true; + +//Check exit from foreign content +exports.causesExit = function (startTagToken) { + var tn = startTagToken.tagName; + var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null); + + return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn]; +}; + +//Token adjustments +exports.adjustTokenMathMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + if (token.attrs[i].name === DEFINITION_URL_ATTR) { + token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; + break; + } + } +}; + +exports.adjustTokenSVGAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrName) + token.attrs[i].name = adjustedAttrName; + } +}; + +exports.adjustTokenXMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrEntry) { + token.attrs[i].prefix = adjustedAttrEntry.prefix; + token.attrs[i].name = adjustedAttrEntry.name; + token.attrs[i].namespace = adjustedAttrEntry.namespace; + } + } +}; + +exports.adjustTokenSVGTagName = function (token) { + var adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName]; + + if (adjustedTagName) + token.tagName = adjustedTagName; +}; + +//Integration points +exports.isMathMLTextIntegrationPoint = function (tn, ns) { + return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT); +}; + +exports.isHtmlIntegrationPoint = function (tn, ns, attrs) { + if (ns === NS.MATHML && tn === $.ANNOTATION_XML) { + for (var i = 0; i < attrs.length; i++) { + if (attrs[i].name === ATTRS.ENCODING) { + var value = attrs[i].value.toLowerCase(); + + return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; + } + } + } + + return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE); +}; -},{"../tokenizer":53,"./html":38}],38:[function(require,module,exports){ +},{"../tokenizer":52,"./html":37}],37:[function(require,module,exports){ 'use strict'; var NS = exports.NAMESPACES = { @@ -4169,6 +4184,7 @@ var $ = exports.TAG_NAMES = { IMAGE: 'image', INPUT: 'input', IFRAME: 'iframe', + ISINDEX: 'isindex', KEYGEN: 'keygen', @@ -4299,12 +4315,14 @@ SPECIAL_ELEMENTS[NS.HTML][$.HTML] = true; SPECIAL_ELEMENTS[NS.HTML][$.IFRAME] = true; SPECIAL_ELEMENTS[NS.HTML][$.IMG] = true; SPECIAL_ELEMENTS[NS.HTML][$.INPUT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.ISINDEX] = true; SPECIAL_ELEMENTS[NS.HTML][$.LI] = true; SPECIAL_ELEMENTS[NS.HTML][$.LINK] = true; SPECIAL_ELEMENTS[NS.HTML][$.LISTING] = true; SPECIAL_ELEMENTS[NS.HTML][$.MAIN] = true; SPECIAL_ELEMENTS[NS.HTML][$.MARQUEE] = true; SPECIAL_ELEMENTS[NS.HTML][$.MENU] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MENUITEM] = true; SPECIAL_ELEMENTS[NS.HTML][$.META] = true; SPECIAL_ELEMENTS[NS.HTML][$.NAV] = true; SPECIAL_ELEMENTS[NS.HTML][$.NOEMBED] = true; @@ -4350,7 +4368,7 @@ SPECIAL_ELEMENTS[NS.SVG][$.TITLE] = true; SPECIAL_ELEMENTS[NS.SVG][$.FOREIGN_OBJECT] = true; SPECIAL_ELEMENTS[NS.SVG][$.DESC] = true; -},{}],39:[function(require,module,exports){ +},{}],38:[function(require,module,exports){ 'use strict'; module.exports = function mergeOptions(defaults, options) { @@ -4365,7 +4383,7 @@ module.exports = function mergeOptions(defaults, options) { }, {}); }; -},{}],40:[function(require,module,exports){ +},{}],39:[function(require,module,exports){ 'use strict'; exports.REPLACEMENT_CHARACTER = '\uFFFD'; @@ -4414,126 +4432,126 @@ exports.CODE_POINT_SEQUENCES = { SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4D] //SYSTEM }; -},{}],41:[function(require,module,exports){ -'use strict'; - -var Parser = require('./parser'), - Serializer = require('./serializer'); - -/** @namespace parse5 */ - -/** - * Parses an HTML string. - * @function parse - * @memberof parse5 - * @instance - * @param {string} html - Input HTML string. - * @param {ParserOptions} [options] - Parsing options. - * @returns {ASTNode} document - * @example - * var parse5 = require('parse5'); - * - * var document = parse5.parse('Hi there!'); - */ -exports.parse = function parse(html, options) { - var parser = new Parser(options); - - return parser.parse(html); -}; - -/** - * Parses an HTML fragment. - * @function parseFragment - * @memberof parse5 - * @instance - * @param {ASTNode} [fragmentContext] - Parsing context element. If specified, given fragment - * will be parsed as if it was set to the context element's `innerHTML` property. - * @param {string} html - Input HTML fragment string. - * @param {ParserOptions} [options] - Parsing options. - * @returns {ASTNode} documentFragment - * @example - * var parse5 = require('parse5'); - * - * var documentFragment = parse5.parseFragment('
'); - * - * // Parses the html fragment in the context of the parsed element. - * var trFragment = parser.parseFragment(documentFragment.childNodes[0], ''); - */ -exports.parseFragment = function parseFragment(fragmentContext, html, options) { - if (typeof fragmentContext === 'string') { - options = html; - html = fragmentContext; - fragmentContext = null; - } - - var parser = new Parser(options); - - return parser.parseFragment(html, fragmentContext); -}; - -/** - * Serializes an AST node to an HTML string. - * @function serialize - * @memberof parse5 - * @instance - * @param {ASTNode} node - Node to serialize. - * @param {SerializerOptions} [options] - Serialization options. - * @returns {String} html - * @example - * var parse5 = require('parse5'); - * - * var document = parse5.parse('Hi there!'); - * - * // Serializes a document. - * var html = parse5.serialize(document); - * - * // Serializes the element content. - * var bodyInnerHtml = parse5.serialize(document.childNodes[0].childNodes[1]); - */ -exports.serialize = function (node, options) { - var serializer = new Serializer(node, options); - - return serializer.serialize(); -}; - -/** - * Provides built-in tree adapters that can be used for parsing and serialization. - * @var treeAdapters - * @memberof parse5 - * @instance - * @property {TreeAdapter} default - Default tree format for parse5. - * @property {TreeAdapter} htmlparser2 - Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format - * (e.g. used by [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)). - * @example - * var parse5 = require('parse5'); - * - * // Uses the default tree adapter for parsing. - * var document = parse5.parse('
', { treeAdapter: parse5.treeAdapters.default }); - * - * // Uses the htmlparser2 tree adapter with the SerializerStream. - * var serializer = new parse5.SerializerStream(node, { treeAdapter: parse5.treeAdapters.htmlparser2 }); - */ -exports.treeAdapters = { - default: require('./tree_adapters/default'), - htmlparser2: require('./tree_adapters/htmlparser2') -}; - - -// Streaming -exports.ParserStream = require('./parser/stream'); -exports.SerializerStream = require('./serializer/stream'); -exports.SAXParser = require('./sax'); - -},{"./parser":45,"./parser/stream":47,"./sax":49,"./serializer":51,"./serializer/stream":52,"./tree_adapters/default":56,"./tree_adapters/htmlparser2":57}],42:[function(require,module,exports){ +},{}],40:[function(require,module,exports){ 'use strict'; -var OpenElementStack = require('../parser/open_element_stack'), - Tokenizer = require('../tokenizer'), - HTML = require('../common/html'); +var Parser = require('./parser'), + Serializer = require('./serializer'); +/** @namespace parse5 */ -//Aliases -var $ = HTML.TAG_NAMES; +/** + * Parses an HTML string. + * @function parse + * @memberof parse5 + * @instance + * @param {string} html - Input HTML string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} document + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + */ +exports.parse = function parse(html, options) { + var parser = new Parser(options); + + return parser.parse(html); +}; + +/** + * Parses an HTML fragment. + * @function parseFragment + * @memberof parse5 + * @instance + * @param {ASTNode} [fragmentContext] - Parsing context element. If specified, given fragment + * will be parsed as if it was set to the context element's `innerHTML` property. + * @param {string} html - Input HTML fragment string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} documentFragment + * @example + * var parse5 = require('parse5'); + * + * var documentFragment = parse5.parseFragment('
Shake it, baby
'); + * + * // Parses the html fragment in the context of the parsed element. + * var trFragment = parser.parseFragment(documentFragment.childNodes[0], ''); + */ +exports.parseFragment = function parseFragment(fragmentContext, html, options) { + if (typeof fragmentContext === 'string') { + options = html; + html = fragmentContext; + fragmentContext = null; + } + + var parser = new Parser(options); + + return parser.parseFragment(html, fragmentContext); +}; + +/** + * Serializes an AST node to an HTML string. + * @function serialize + * @memberof parse5 + * @instance + * @param {ASTNode} node - Node to serialize. + * @param {SerializerOptions} [options] - Serialization options. + * @returns {String} html + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + * + * // Serializes a document. + * var html = parse5.serialize(document); + * + * // Serializes the element content. + * var bodyInnerHtml = parse5.serialize(document.childNodes[0].childNodes[1]); + */ +exports.serialize = function (node, options) { + var serializer = new Serializer(node, options); + + return serializer.serialize(); +}; + +/** + * Provides built-in tree adapters that can be used for parsing and serialization. + * @var treeAdapters + * @memberof parse5 + * @instance + * @property {TreeAdapter} default - Default tree format for parse5. + * @property {TreeAdapter} htmlparser2 - Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format + * (e.g. used by [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)). + * @example + * var parse5 = require('parse5'); + * + * // Uses the default tree adapter for parsing. + * var document = parse5.parse('
', { treeAdapter: parse5.treeAdapters.default }); + * + * // Uses the htmlparser2 tree adapter with the SerializerStream. + * var serializer = new parse5.SerializerStream(node, { treeAdapter: parse5.treeAdapters.htmlparser2 }); + */ +exports.treeAdapters = { + default: require('./tree_adapters/default'), + htmlparser2: require('./tree_adapters/htmlparser2') +}; + + +// Streaming +exports.ParserStream = require('./parser/stream'); +exports.SerializerStream = require('./serializer/stream'); +exports.SAXParser = require('./sax'); + +},{"./parser":44,"./parser/stream":46,"./sax":48,"./serializer":50,"./serializer/stream":51,"./tree_adapters/default":55,"./tree_adapters/htmlparser2":56}],41:[function(require,module,exports){ +'use strict'; + +var OpenElementStack = require('../parser/open_element_stack'), + Tokenizer = require('../tokenizer'), + HTML = require('../common/html'); + + +//Aliases +var $ = HTML.TAG_NAMES; function setEndLocation(element, closingToken, treeAdapter) { @@ -4577,10 +4595,7 @@ function setEndLocation(element, closingToken, treeAdapter) { }; } - if (isClosingEndTag) - loc.endOffset = ctLocation.endOffset; - else - loc.endOffset = ctLocation.startOffset; + loc.endOffset = ctLocation.endOffset; } } @@ -4743,178 +4758,177 @@ exports.assign = function (parser) { }; -},{"../common/html":38,"../parser/open_element_stack":46,"../tokenizer":53}],43:[function(require,module,exports){ -'use strict'; - -var UNICODE = require('../common/unicode'); - -//Aliases -var $ = UNICODE.CODE_POINTS; - - -exports.assign = function (tokenizer) { - //NOTE: obtain Tokenizer proto this way to avoid module circular references - var tokenizerProto = Object.getPrototypeOf(tokenizer), - tokenStartOffset = -1, - tokenCol = -1, - tokenLine = 1, - isEol = false, - lineStartPosStack = [0], - lineStartPos = 0, - col = -1, - line = 1; - - function attachLocationInfo(token) { - /** - * @typedef {Object} LocationInfo - * - * @property {Number} line - One-based line index - * @property {Number} col - One-based column index - * @property {Number} startOffset - Zero-based first character index - * @property {Number} endOffset - Zero-based last character index - */ - token.location = { - line: tokenLine, - col: tokenCol, - startOffset: tokenStartOffset, - endOffset: -1 - }; - } - - //NOTE: patch consumption method to track line/col information - tokenizer._consume = function () { - var cp = tokenizerProto._consume.call(this); - - //NOTE: LF should be in the last column of the line - if (isEol) { - isEol = false; - line++; - lineStartPosStack.push(this.preprocessor.sourcePos); - lineStartPos = this.preprocessor.sourcePos; - } - - if (cp === $.LINE_FEED) - isEol = true; - - col = this.preprocessor.sourcePos - lineStartPos + 1; - - return cp; - }; - - tokenizer._unconsume = function () { - tokenizerProto._unconsume.call(this); - isEol = false; - - while (lineStartPos > this.preprocessor.sourcePos && lineStartPosStack.length > 1) { - lineStartPos = lineStartPosStack.pop(); - line--; - } - - col = this.preprocessor.sourcePos - lineStartPos + 1; - }; - - //NOTE: patch token creation methods and attach location objects - tokenizer._createStartTagToken = function () { - tokenizerProto._createStartTagToken.call(this); - attachLocationInfo(this.currentToken); - }; - - tokenizer._createEndTagToken = function () { - tokenizerProto._createEndTagToken.call(this); - attachLocationInfo(this.currentToken); - }; - - tokenizer._createCommentToken = function () { - tokenizerProto._createCommentToken.call(this); - attachLocationInfo(this.currentToken); - }; - - tokenizer._createDoctypeToken = function (initialName) { - tokenizerProto._createDoctypeToken.call(this, initialName); - attachLocationInfo(this.currentToken); - }; - - tokenizer._createCharacterToken = function (type, ch) { - tokenizerProto._createCharacterToken.call(this, type, ch); - attachLocationInfo(this.currentCharacterToken); - }; - - tokenizer._createAttr = function (attrNameFirstCh) { - tokenizerProto._createAttr.call(this, attrNameFirstCh); - this.currentAttrLocation = { - line: line, - col: col, - startOffset: this.preprocessor.sourcePos, - endOffset: -1 - }; - }; - - tokenizer._leaveAttrName = function (toState) { - tokenizerProto._leaveAttrName.call(this, toState); - this._attachCurrentAttrLocationInfo(); - }; - - tokenizer._leaveAttrValue = function (toState) { - tokenizerProto._leaveAttrValue.call(this, toState); - this._attachCurrentAttrLocationInfo(); - }; - - tokenizer._attachCurrentAttrLocationInfo = function () { - this.currentAttrLocation.endOffset = this.preprocessor.sourcePos; - - if (!this.currentToken.location.attrs) - this.currentToken.location.attrs = {}; - - /** - * @typedef {Object} StartTagLocationInfo - * @extends LocationInfo - * - * @property {Dictionary} attrs - Start tag attributes' location info. - */ - this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation; - }; - - //NOTE: patch token emission methods to determine end location - tokenizer._emitCurrentToken = function () { - //NOTE: if we have pending character token make it's end location equal to the - //current token's start location. - if (this.currentCharacterToken) - this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset; - - this.currentToken.location.endOffset = this.preprocessor.sourcePos + 1; - tokenizerProto._emitCurrentToken.call(this); - }; - - tokenizer._emitCurrentCharacterToken = function () { - //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), - //then set it's location at the current preprocessor position. - //We don't need to increment preprocessor position, since character token - //emission is always forced by the start of the next character token here. - //So, we already have advanced position. - if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1) - this.currentCharacterToken.location.endOffset = this.preprocessor.sourcePos; - - tokenizerProto._emitCurrentCharacterToken.call(this); - }; - - //NOTE: patch initial states for each mode to obtain token start position - Object.keys(tokenizerProto.MODE) - - .map(function (modeName) { - return tokenizerProto.MODE[modeName]; - }) - - .forEach(function (state) { - tokenizer[state] = function (cp) { - tokenStartOffset = this.preprocessor.sourcePos; - tokenLine = line; - tokenCol = col; - tokenizerProto[state].call(this, cp); - }; - }); -}; +},{"../common/html":37,"../parser/open_element_stack":45,"../tokenizer":52}],42:[function(require,module,exports){ +'use strict'; + +var UNICODE = require('../common/unicode'); + +//Aliases +var $ = UNICODE.CODE_POINTS; + + +exports.assign = function (tokenizer) { + //NOTE: obtain Tokenizer proto this way to avoid module circular references + var tokenizerProto = Object.getPrototypeOf(tokenizer), + tokenStartOffset = -1, + tokenCol = -1, + tokenLine = 1, + isEol = false, + lineStartPosStack = [0], + lineStartPos = 0, + col = -1, + line = 1; + + function attachLocationInfo(token) { + /** + * @typedef {Object} LocationInfo + * + * @property {Number} line - One-based line index + * @property {Number} col - One-based column index + * @property {Number} startOffset - Zero-based first character index + * @property {Number} endOffset - Zero-based last character index + */ + token.location = { + line: tokenLine, + col: tokenCol, + startOffset: tokenStartOffset, + endOffset: -1 + }; + } + + //NOTE: patch consumption method to track line/col information + tokenizer._consume = function () { + var cp = tokenizerProto._consume.call(this); + + //NOTE: LF should be in the last column of the line + if (isEol) { + isEol = false; + line++; + lineStartPosStack.push(this.preprocessor.pos); + lineStartPos = this.preprocessor.pos; + } + + if (cp === $.LINE_FEED) + isEol = true; + + col = this.preprocessor.pos - lineStartPos + 1; + + return cp; + }; + + tokenizer._unconsume = function () { + tokenizerProto._unconsume.call(this); + + while (lineStartPos > this.preprocessor.pos && lineStartPosStack.length > 1) { + lineStartPos = lineStartPosStack.pop(); + line--; + } + + col = this.preprocessor.pos - lineStartPos + 1; + }; + + //NOTE: patch token creation methods and attach location objects + tokenizer._createStartTagToken = function (tagNameFirstCh) { + tokenizerProto._createStartTagToken.call(this, tagNameFirstCh); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createEndTagToken = function (tagNameFirstCh) { + tokenizerProto._createEndTagToken.call(this, tagNameFirstCh); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCommentToken = function () { + tokenizerProto._createCommentToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createDoctypeToken = function (doctypeNameFirstCh) { + tokenizerProto._createDoctypeToken.call(this, doctypeNameFirstCh); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCharacterToken = function (type, ch) { + tokenizerProto._createCharacterToken.call(this, type, ch); + attachLocationInfo(this.currentCharacterToken); + }; + + tokenizer._createAttr = function (attrNameFirstCh) { + tokenizerProto._createAttr.call(this, attrNameFirstCh); + this.currentAttrLocation = { + line: line, + col: col, + startOffset: this.preprocessor.pos, + endOffset: -1 + }; + }; + + tokenizer._leaveAttrName = function (toState) { + tokenizerProto._leaveAttrName.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._leaveAttrValue = function (toState) { + tokenizerProto._leaveAttrValue.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._attachCurrentAttrLocationInfo = function () { + this.currentAttrLocation.endOffset = this.preprocessor.pos; + + if (!this.currentToken.location.attrs) + this.currentToken.location.attrs = {}; + + /** + * @typedef {Object} StartTagLocationInfo + * @extends LocationInfo + * + * @property {Dictionary} attrs - Start tag attributes' location info. + */ + this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation; + }; + + //NOTE: patch token emission methods to determine end location + tokenizer._emitCurrentToken = function () { + //NOTE: if we have pending character token make it's end location equal to the + //current token's start location. + if (this.currentCharacterToken) + this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset; + + this.currentToken.location.endOffset = this.preprocessor.pos + 1; + tokenizerProto._emitCurrentToken.call(this); + }; + + tokenizer._emitCurrentCharacterToken = function () { + //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), + //then set it's location at the current preprocessor position. + //We don't need to increment preprocessor position, since character token + //emission is always forced by the start of the next character token here. + //So, we already have advanced position. + if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1) + this.currentCharacterToken.location.endOffset = this.preprocessor.pos; + + tokenizerProto._emitCurrentCharacterToken.call(this); + }; + + //NOTE: patch initial states for each mode to obtain token start position + Object.keys(tokenizerProto.MODE) + + .map(function (modeName) { + return tokenizerProto.MODE[modeName]; + }) + + .forEach(function (state) { + tokenizer[state] = function (cp) { + tokenStartOffset = this.preprocessor.pos; + tokenLine = line; + tokenCol = col; + tokenizerProto[state].call(this, cp); + }; + }); +}; -},{"../common/unicode":40}],44:[function(require,module,exports){ +},{"../common/unicode":39}],43:[function(require,module,exports){ 'use strict'; //Const @@ -5083,3223 +5097,3257 @@ FormattingElementList.prototype.getElementEntry = function (element) { return null; }; -},{}],45:[function(require,module,exports){ -'use strict'; - -var Tokenizer = require('../tokenizer'), - OpenElementStack = require('./open_element_stack'), - FormattingElementList = require('./formatting_element_list'), - locationInfoMixin = require('../location_info/parser_mixin'), - defaultTreeAdapter = require('../tree_adapters/default'), - doctype = require('../common/doctype'), - foreignContent = require('../common/foreign_content'), - mergeOptions = require('../common/merge_options'), - UNICODE = require('../common/unicode'), - HTML = require('../common/html'); - -//Aliases -var $ = HTML.TAG_NAMES, - NS = HTML.NAMESPACES, - ATTRS = HTML.ATTRS; - -/** - * @typedef {Object} ParserOptions - * - * @property {Boolean} [locationInfo=false] - Enables source code location information for the nodes. - * When enabled, each node (except root node) has the `__location` property. In case the node is not an empty element, - * `__location` will be {@link ElementLocationInfo} object, otherwise it's {@link LocationInfo}. - * If the element was implicitly created by the parser it's `__location` property will be `null`. - * - * @property {TreeAdapter} [treeAdapter=parse5.treeAdapters.default] - Specifies the resulting tree format. - */ -var DEFAULT_OPTIONS = { - locationInfo: false, - treeAdapter: defaultTreeAdapter -}; - -//Misc constants -var HIDDEN_INPUT_TYPE = 'hidden'; - -//Adoption agency loops iteration count -var AA_OUTER_LOOP_ITER = 8, - AA_INNER_LOOP_ITER = 3; - -//Insertion modes -var INITIAL_MODE = 'INITIAL_MODE', - BEFORE_HTML_MODE = 'BEFORE_HTML_MODE', - BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE', - IN_HEAD_MODE = 'IN_HEAD_MODE', - AFTER_HEAD_MODE = 'AFTER_HEAD_MODE', - IN_BODY_MODE = 'IN_BODY_MODE', - TEXT_MODE = 'TEXT_MODE', - IN_TABLE_MODE = 'IN_TABLE_MODE', - IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE', - IN_CAPTION_MODE = 'IN_CAPTION_MODE', - IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE', - IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE', - IN_ROW_MODE = 'IN_ROW_MODE', - IN_CELL_MODE = 'IN_CELL_MODE', - IN_SELECT_MODE = 'IN_SELECT_MODE', - IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE', - IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE', - AFTER_BODY_MODE = 'AFTER_BODY_MODE', - IN_FRAMESET_MODE = 'IN_FRAMESET_MODE', - AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE', - AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE', - AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; - -//Insertion mode reset map -var INSERTION_MODE_RESET_MAP = {}; - -INSERTION_MODE_RESET_MAP[$.TR] = IN_ROW_MODE; -INSERTION_MODE_RESET_MAP[$.TBODY] = -INSERTION_MODE_RESET_MAP[$.THEAD] = -INSERTION_MODE_RESET_MAP[$.TFOOT] = IN_TABLE_BODY_MODE; -INSERTION_MODE_RESET_MAP[$.CAPTION] = IN_CAPTION_MODE; -INSERTION_MODE_RESET_MAP[$.COLGROUP] = IN_COLUMN_GROUP_MODE; -INSERTION_MODE_RESET_MAP[$.TABLE] = IN_TABLE_MODE; -INSERTION_MODE_RESET_MAP[$.BODY] = IN_BODY_MODE; -INSERTION_MODE_RESET_MAP[$.FRAMESET] = IN_FRAMESET_MODE; - -//Template insertion mode switch map -var TEMPLATE_INSERTION_MODE_SWITCH_MAP = {}; - -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.CAPTION] = -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COLGROUP] = -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TBODY] = -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TFOOT] = -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.THEAD] = IN_TABLE_MODE; -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COL] = IN_COLUMN_GROUP_MODE; -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TR] = IN_TABLE_BODY_MODE; -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TD] = -TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TH] = IN_ROW_MODE; - -//Token handlers map for insertion modes -var _ = {}; - -_[INITIAL_MODE] = {}; -_[INITIAL_MODE][Tokenizer.CHARACTER_TOKEN] = -_[INITIAL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInInitialMode; -_[INITIAL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; -_[INITIAL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[INITIAL_MODE][Tokenizer.DOCTYPE_TOKEN] = doctypeInInitialMode; -_[INITIAL_MODE][Tokenizer.START_TAG_TOKEN] = -_[INITIAL_MODE][Tokenizer.END_TAG_TOKEN] = -_[INITIAL_MODE][Tokenizer.EOF_TOKEN] = tokenInInitialMode; - -_[BEFORE_HTML_MODE] = {}; -_[BEFORE_HTML_MODE][Tokenizer.CHARACTER_TOKEN] = -_[BEFORE_HTML_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHtml; -_[BEFORE_HTML_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; -_[BEFORE_HTML_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[BEFORE_HTML_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[BEFORE_HTML_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHtml; -_[BEFORE_HTML_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHtml; -_[BEFORE_HTML_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHtml; - -_[BEFORE_HEAD_MODE] = {}; -_[BEFORE_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = -_[BEFORE_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHead; -_[BEFORE_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; -_[BEFORE_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[BEFORE_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHead; -_[BEFORE_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHead; -_[BEFORE_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHead; - -_[IN_HEAD_MODE] = {}; -_[IN_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHead; -_[IN_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[IN_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHead; -_[IN_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHead; -_[IN_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenInHead; - -_[AFTER_HEAD_MODE] = {}; -_[AFTER_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = -_[AFTER_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterHead; -_[AFTER_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[AFTER_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[AFTER_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterHead; -_[AFTER_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterHead; -_[AFTER_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenAfterHead; - -_[IN_BODY_MODE] = {}; -_[IN_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; -_[IN_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[IN_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInBody; -_[IN_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInBody; -_[IN_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[TEXT_MODE] = {}; -_[TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = -_[TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = -_[TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[TEXT_MODE][Tokenizer.COMMENT_TOKEN] = -_[TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = -_[TEXT_MODE][Tokenizer.START_TAG_TOKEN] = ignoreToken; -_[TEXT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInText; -_[TEXT_MODE][Tokenizer.EOF_TOKEN] = eofInText; - -_[IN_TABLE_MODE] = {}; -_[IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = -_[IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; -_[IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTable; -_[IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTable; -_[IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_TABLE_TEXT_MODE] = {}; -_[IN_TABLE_TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = characterInTableText; -_[IN_TABLE_TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_TABLE_TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInTableText; -_[IN_TABLE_TEXT_MODE][Tokenizer.COMMENT_TOKEN] = -_[IN_TABLE_TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = -_[IN_TABLE_TEXT_MODE][Tokenizer.START_TAG_TOKEN] = -_[IN_TABLE_TEXT_MODE][Tokenizer.END_TAG_TOKEN] = -_[IN_TABLE_TEXT_MODE][Tokenizer.EOF_TOKEN] = tokenInTableText; - -_[IN_CAPTION_MODE] = {}; -_[IN_CAPTION_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; -_[IN_CAPTION_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_CAPTION_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[IN_CAPTION_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_CAPTION_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_CAPTION_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCaption; -_[IN_CAPTION_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCaption; -_[IN_CAPTION_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_COLUMN_GROUP_MODE] = {}; -_[IN_COLUMN_GROUP_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_COLUMN_GROUP_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInColumnGroup; -_[IN_COLUMN_GROUP_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[IN_COLUMN_GROUP_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_COLUMN_GROUP_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_COLUMN_GROUP_MODE][Tokenizer.START_TAG_TOKEN] = startTagInColumnGroup; -_[IN_COLUMN_GROUP_MODE][Tokenizer.END_TAG_TOKEN] = endTagInColumnGroup; -_[IN_COLUMN_GROUP_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_TABLE_BODY_MODE] = {}; -_[IN_TABLE_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_TABLE_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = -_[IN_TABLE_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; -_[IN_TABLE_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_TABLE_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_TABLE_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTableBody; -_[IN_TABLE_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTableBody; -_[IN_TABLE_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_ROW_MODE] = {}; -_[IN_ROW_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_ROW_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = -_[IN_ROW_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; -_[IN_ROW_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_ROW_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_ROW_MODE][Tokenizer.START_TAG_TOKEN] = startTagInRow; -_[IN_ROW_MODE][Tokenizer.END_TAG_TOKEN] = endTagInRow; -_[IN_ROW_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_CELL_MODE] = {}; -_[IN_CELL_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; -_[IN_CELL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_CELL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[IN_CELL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_CELL_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_CELL_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCell; -_[IN_CELL_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCell; -_[IN_CELL_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_SELECT_MODE] = {}; -_[IN_SELECT_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; -_[IN_SELECT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_SELECT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[IN_SELECT_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_SELECT_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_SELECT_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelect; -_[IN_SELECT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelect; -_[IN_SELECT_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_SELECT_IN_TABLE_MODE] = {}; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelectInTable; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelectInTable; -_[IN_SELECT_IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; - -_[IN_TEMPLATE_MODE] = {}; -_[IN_TEMPLATE_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; -_[IN_TEMPLATE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_TEMPLATE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[IN_TEMPLATE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_TEMPLATE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_TEMPLATE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTemplate; -_[IN_TEMPLATE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTemplate; -_[IN_TEMPLATE_MODE][Tokenizer.EOF_TOKEN] = eofInTemplate; - -_[AFTER_BODY_MODE] = {}; -_[AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = -_[AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterBody; -_[AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToRootHtmlElement; -_[AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterBody; -_[AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterBody; -_[AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; - -_[IN_FRAMESET_MODE] = {}; -_[IN_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = -_[IN_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[IN_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[IN_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[IN_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagInFrameset; -_[IN_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagInFrameset; -_[IN_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; - -_[AFTER_FRAMESET_MODE] = {}; -_[AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = -_[AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; -_[AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterFrameset; -_[AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterFrameset; -_[AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; - -_[AFTER_AFTER_BODY_MODE] = {}; -_[AFTER_AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = tokenAfterAfterBody; -_[AFTER_AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterAfterBody; -_[AFTER_AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[AFTER_AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; -_[AFTER_AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[AFTER_AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterBody; -_[AFTER_AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = tokenAfterAfterBody; -_[AFTER_AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; - -_[AFTER_AFTER_FRAMESET_MODE] = {}; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterFrameset; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = ignoreToken; -_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; - - -//Parser -var Parser = module.exports = function (options) { - this.options = mergeOptions(DEFAULT_OPTIONS, options); - - this.treeAdapter = this.options.treeAdapter; - this.pendingScript = null; - - if (this.options.locationInfo) - locationInfoMixin.assign(this); -}; - -// API -Parser.prototype.parse = function (html) { - var document = this.treeAdapter.createDocument(); - - this._bootstrap(document, null); - this.tokenizer.write(html, true); - this._runParsingLoop(null, null); - - return document; -}; - -Parser.prototype.parseFragment = function (html, fragmentContext) { - //NOTE: use
Shake it, baby