Short and strict HTML parser.
Output format: a tree of objects, starting with a root Document
node.
Element
and Document
nodes can have children, which are any of Text
, Comment
or Element
.
import { parse, pack, unpack } from '@homebots/parse-html';
import { serialize, materialize, normalize } from '@homebots/parse-html-runtime';
const document = parse(`
<html>
<head>
<meta charset="utf-8">
</head>
<body>
<div>text<br>text</div>
</body>
</html>`);
console.log(document);
// remove empty text nodes
normalize(document);
// create HTML from document
console.log(serialize(document));
// create DOM elements from document
console.log(materialize(document, (element, node) => {
// do something with the new element
// return element or something new
}));
// to generate a minimal version of a node tree
const packed = pack(document);
const unpacked = unpack(packed);
{
"type": "document",
"docType": "html",
"children": []
}
{
"type": "comment",
"text": "an html comment"
}
{
"type": "text",
"text": "a text\nnode with line breaks"
}
{
"type": "element",
"tag": "input",
"selfClose": true,
"children": [],
"attributes": [{ "name": "type", "value": "text" }]
}