/**
* While micromark is a lexer/tokenizer, the common case of going from markdown
* to html is currently built in as this module, even though the parts can be
* used separately to build ASTs, CSTs, or many other output formats.
*
* Having an HTML compiler built in is useful because it allows us to check for
* compliancy to CommonMark, the de facto norm of markdown, specified in roughly
* 600 input/output cases.
*
* This module has an interface that accepts lists of events instead of the
* whole at once, however, because markdown can’t be truly streaming, we buffer
* events before processing and outputting the final result.
*/
/**
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').CompileOptions} CompileOptions
* @typedef {import('micromark-util-types').CompileData} CompileData
* @typedef {import('micromark-util-types').CompileContext} CompileContext
* @typedef {import('micromark-util-types').Compile} Compile
* @typedef {import('micromark-util-types').Handle} Handle
* @typedef {import('micromark-util-types').HtmlExtension} HtmlExtension
* @typedef {import('micromark-util-types').NormalizedHtmlExtension} NormalizedHtmlExtension
*/
/**
* @typedef Media
* @property {boolean} [image]
* @property {string} [labelId]
* @property {string} [label]
* @property {string} [referenceId]
* @property {string} [destination]
* @property {string} [title]
*
* @typedef Definition
* @property {string} [destination]
* @property {string} [title]
*/
import {ok as assert} from 'uvu/assert'
import {decodeNamedCharacterReference} from 'decode-named-character-reference'
import {combineHtmlExtensions} from 'micromark-util-combine-extensions'
import {push} from 'micromark-util-chunked'
import {decodeNumericCharacterReference} from 'micromark-util-decode-numeric-character-reference'
import {encode as _encode} from 'micromark-util-encode'
import {normalizeIdentifier} from 'micromark-util-normalize-identifier'
import {sanitizeUri} from 'micromark-util-sanitize-uri'
import {codes} from 'micromark-util-symbol/codes.js'
import {constants} from 'micromark-util-symbol/constants.js'
import {types} from 'micromark-util-symbol/types.js'
const hasOwnProperty = {}.hasOwnProperty
/**
* These two are allowlists of safe protocols for full URLs in respectively the
* `href` (on ``) and `src` (on ` a ')
}
setData('slurpAllLineEndings')
}
/** @type {Handle} */
function onexitparagraph() {
if (tightStack[tightStack.length - 1]) {
setData('slurpAllLineEndings', true)
} else {
tag('`) attributes.
* They are based on what is allowed on GitHub,
*
\n
`.
* This variable hold the default line ending when given (or `undefined`),
* and in the latter case will be updated to the first found line ending if
* there is one.
*/
let lineEndingStyle = options.defaultLineEnding
// Return the function that handles a slice of events.
return compile
/**
* Deal w/ a slice of events.
* Return either the empty string if there’s nothing of note to return, or the
* result when done.
*
* @param {Event[]} events
* @returns {string}
*/
function compile(events) {
let index = -1
let start = 0
/** @type {number[]} */
const listStack = []
// As definitions can come after references, we need to figure out the media
// (urls and titles) defined by them before handling the references.
// So, we do sort of what HTML does: put metadata at the start (in head), and
// then put content after (`body`).
/** @type {Event[]} */
let head = []
/** @type {Event[]} */
let body = []
while (++index < events.length) {
// Figure out the line ending style used in the document.
if (
!lineEndingStyle &&
(events[index][1].type === types.lineEnding ||
events[index][1].type === types.lineEndingBlank)
) {
// @ts-expect-error Hush, it’s a line ending.
lineEndingStyle = events[index][2].sliceSerialize(events[index][1])
}
// Preprocess lists to infer whether the list is loose or not.
if (
events[index][1].type === types.listOrdered ||
events[index][1].type === types.listUnordered
) {
if (events[index][0] === 'enter') {
listStack.push(index)
} else {
prepareList(events.slice(listStack.pop(), index))
}
}
// Move definitions to the front.
if (events[index][1].type === types.definition) {
if (events[index][0] === 'enter') {
body = push(body, events.slice(start, index))
start = index
} else {
head = push(head, events.slice(start, index + 1))
start = index + 1
}
}
}
head = push(head, body)
head = push(head, events.slice(start))
index = -1
const result = head
// Handle the start of the document, if defined.
if (handlers.enter.null) {
handlers.enter.null.call(context)
}
// Handle all events.
while (++index < events.length) {
const handler = handlers[result[index][0]]
if (hasOwnProperty.call(handler, result[index][1].type)) {
handler[result[index][1].type].call(
Object.assign(
{sliceSerialize: result[index][2].sliceSerialize},
context
),
result[index][1]
)
}
}
// Handle the end of the document, if defined.
if (handlers.exit.null) {
handlers.exit.null.call(context)
}
return buffers[0].join('')
}
/**
* Figure out whether lists are loose or not.
*
* @param {Event[]} slice
* @returns {void}
*/
function prepareList(slice) {
const length = slice.length
let index = 0 // Skip open.
let containerBalance = 0
let loose = false
/** @type {boolean|undefined} */
let atMarker
while (++index < length) {
const event = slice[index]
if (event[1]._container) {
atMarker = undefined
if (event[0] === 'enter') {
containerBalance++
} else {
containerBalance--
}
} else
switch (event[1].type) {
case types.listItemPrefix: {
if (event[0] === 'exit') {
atMarker = true
}
break
}
case types.linePrefix: {
// Ignore
break
}
case types.lineEndingBlank: {
if (event[0] === 'enter' && !containerBalance) {
if (atMarker) {
atMarker = undefined
} else {
loose = true
}
}
break
}
default: {
atMarker = undefined
}
}
}
slice[0][1]._loose = loose
}
/**
* @type {CompileContext['setData']}
* @param [value]
*/
function setData(key, value) {
data[key] = value
}
/**
* @type {CompileContext['getData']}
* @template {string} K
* @param {K} key
* @returns {CompileData[K]}
*/
function getData(key) {
return data[key]
}
/** @type {CompileContext['buffer']} */
function buffer() {
buffers.push([])
}
/** @type {CompileContext['resume']} */
function resume() {
const buf = buffers.pop()
assert(buf !== undefined, 'Cannot resume w/o buffer')
return buf.join('')
}
/** @type {CompileContext['tag']} */
function tag(value) {
if (!tags) return
setData('lastWasTag', true)
buffers[buffers.length - 1].push(value)
}
/** @type {CompileContext['raw']} */
function raw(value) {
setData('lastWasTag')
buffers[buffers.length - 1].push(value)
}
/**
* Output an extra line ending.
*
* @returns {void}
*/
function lineEnding() {
raw(lineEndingStyle || '\n')
}
/** @type {CompileContext['lineEndingIfNeeded']} */
function lineEndingIfNeeded() {
const buffer = buffers[buffers.length - 1]
const slice = buffer[buffer.length - 1]
const previous = slice ? slice.charCodeAt(slice.length - 1) : codes.eof
if (
previous === codes.lf ||
previous === codes.cr ||
previous === codes.eof
) {
return
}
lineEnding()
}
/** @type {CompileContext['encode']} */
function encode(value) {
return getData('ignoreEncode') ? value : _encode(value)
}
//
// Handlers.
//
/** @type {Handle} */
function onenterlistordered(token) {
tightStack.push(!token._loose)
lineEndingIfNeeded()
tag('')
} else {
onexitlistitem()
}
lineEndingIfNeeded()
tag('
')
}
/** @type {Handle} */
function onexitlistunordered() {
onexitlistitem()
tightStack.pop()
lineEnding()
tag('')
}
/** @type {Handle} */
function onexitlistitem() {
if (getData('lastWasTag') && !getData('slurpAllLineEndings')) {
lineEndingIfNeeded()
}
tag('')
setData('slurpAllLineEndings')
}
/** @type {Handle} */
function onenterblockquote() {
tightStack.push(false)
lineEndingIfNeeded()
tag('')
}
/** @type {Handle} */
function onexitblockquote() {
tightStack.pop()
lineEndingIfNeeded()
tag('
')
setData('slurpAllLineEndings')
}
/** @type {Handle} */
function onenterparagraph() {
if (!tightStack[tightStack.length - 1]) {
lineEndingIfNeeded()
tag('')
setData('slurpOneLineEnding', true)
}
setData('fencesCount', count + 1)
}
/** @type {Handle} */
function onentercodeindented() {
lineEndingIfNeeded()
tag('
')
if (count !== undefined && count < 2) lineEndingIfNeeded()
setData('flowCodeSeenData')
setData('fencesCount')
setData('slurpOneLineEnding')
}
/** @type {Handle} */
function onenterimage() {
mediaStack.push({image: true})
tags = undefined // Disallow tags.
}
/** @type {Handle} */
function onenterlink() {
mediaStack.push({})
}
/** @type {Handle} */
function onexitlabeltext(token) {
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token)
}
/** @type {Handle} */
function onexitlabel() {
mediaStack[mediaStack.length - 1].label = resume()
}
/** @type {Handle} */
function onexitreferencestring(token) {
mediaStack[mediaStack.length - 1].referenceId = this.sliceSerialize(token)
}
/** @type {Handle} */
function onenterresource() {
buffer() // We can have line endings in the resource, ignore them.
mediaStack[mediaStack.length - 1].destination = ''
}
/** @type {Handle} */
function onenterresourcedestinationstring() {
buffer()
// Ignore encoding the result, as we’ll first percent encode the url and
// encode manually after.
setData('ignoreEncode', true)
}
/** @type {Handle} */
function onexitresourcedestinationstring() {
mediaStack[mediaStack.length - 1].destination = resume()
setData('ignoreEncode')
}
/** @type {Handle} */
function onexitresourcetitlestring() {
mediaStack[mediaStack.length - 1].title = resume()
}
/** @type {Handle} */
function onexitmedia() {
let index = mediaStack.length - 1 // Skip current.
const media = mediaStack[index]
const id = media.referenceId || media.labelId
assert(id !== undefined, 'media should have `referenceId` or `labelId`')
assert(media.label !== undefined, 'media should have `label`')
const context =
media.destination === undefined
? definitions[normalizeIdentifier(id)]
: media
tags = true
while (index--) {
if (mediaStack[index].image) {
tags = undefined
break
}
}
if (media.image) {
tag(
'')
}
/** @type {Handle} */
function onexitflowcode() {
const count = getData('fencesCount')
// One special case is if we are inside a container, and the fenced code was
// not closed (meaning it runs to the end).
// In that case, the following line ending, is considered *outside* the
// fenced code and block quote by micromark, but CM wants to treat that
// ending as part of the code.
if (
count !== undefined &&
count < 2 &&
// @ts-expect-error `tightStack` is always set.
data.tightStack.length > 0 &&
!getData('lastWasTag')
) {
lineEnding()
}
// But in most cases, it’s simpler: when we’ve seen some data, emit an extra
// line ending when needed.
if (getData('flowCodeSeenData')) {
lineEndingIfNeeded()
}
tag('
')
} else {
tag('>')
raw(media.label)
tag('
')
}
/** @type {Handle} */
function onenterhtmlflow() {
lineEndingIfNeeded()
onenterhtml()
}
/** @type {Handle} */
function onexithtml() {
setData('ignoreEncode')
}
/** @type {Handle} */
function onenterhtml() {
if (options.allowDangerousHtml) {
setData('ignoreEncode', true)
}
}
/** @type {Handle} */
function onenteremphasis() {
tag('')
}
/** @type {Handle} */
function onenterstrong() {
tag('')
}
/** @type {Handle} */
function onentercodetext() {
setData('inCodeText', true)
tag('')
}
/** @type {Handle} */
function onexitcodetext() {
setData('inCodeText')
tag('
')
}
/** @type {Handle} */
function onexitemphasis() {
tag('')
}
/** @type {Handle} */
function onexitstrong() {
tag('')
}
/** @type {Handle} */
function onexitthematicbreak() {
lineEndingIfNeeded()
tag('