/** * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').Tokenizer} Tokenizer * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').Code} Code */ import {ok as assert} from 'uvu/assert' import {factorySpace} from 'micromark-factory-space' import { asciiAlpha, asciiAlphanumeric, markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' import {codes} from 'micromark-util-symbol/codes.js' import {constants} from 'micromark-util-symbol/constants.js' import {types} from 'micromark-util-symbol/types.js' /** @type {Construct} */ export const htmlText = {name: 'htmlText', tokenize: tokenizeHtmlText} /** @type {Tokenizer} */ function tokenizeHtmlText(effects, ok, nok) { const self = this /** @type {NonNullable|undefined} */ let marker /** @type {string} */ let buffer /** @type {number} */ let index /** @type {State} */ let returnState return start /** @type {State} */ function start(code) { assert(code === codes.lessThan, 'expected `<`') effects.enter(types.htmlText) effects.enter(types.htmlTextData) effects.consume(code) return open } /** @type {State} */ function open(code) { if (code === codes.exclamationMark) { effects.consume(code) return declarationOpen } if (code === codes.slash) { effects.consume(code) return tagCloseStart } if (code === codes.questionMark) { effects.consume(code) return instruction } if (asciiAlpha(code)) { effects.consume(code) return tagOpen } return nok(code) } /** @type {State} */ function declarationOpen(code) { if (code === codes.dash) { effects.consume(code) return commentOpen } if (code === codes.leftSquareBracket) { effects.consume(code) buffer = constants.cdataOpeningString index = 0 return cdataOpen } if (asciiAlpha(code)) { effects.consume(code) return declaration } return nok(code) } /** @type {State} */ function commentOpen(code) { if (code === codes.dash) { effects.consume(code) return commentStart } return nok(code) } /** @type {State} */ function commentStart(code) { if (code === codes.eof || code === codes.greaterThan) { return nok(code) } if (code === codes.dash) { effects.consume(code) return commentStartDash } return comment(code) } /** @type {State} */ function commentStartDash(code) { if (code === codes.eof || code === codes.greaterThan) { return nok(code) } return comment(code) } /** @type {State} */ function comment(code) { if (code === codes.eof) { return nok(code) } if (code === codes.dash) { effects.consume(code) return commentClose } if (markdownLineEnding(code)) { returnState = comment return atLineEnding(code) } effects.consume(code) return comment } /** @type {State} */ function commentClose(code) { if (code === codes.dash) { effects.consume(code) return end } return comment(code) } /** @type {State} */ function cdataOpen(code) { if (code === buffer.charCodeAt(index++)) { effects.consume(code) return index === buffer.length ? cdata : cdataOpen } return nok(code) } /** @type {State} */ function cdata(code) { if (code === codes.eof) { return nok(code) } if (code === codes.rightSquareBracket) { effects.consume(code) return cdataClose } if (markdownLineEnding(code)) { returnState = cdata return atLineEnding(code) } effects.consume(code) return cdata } /** @type {State} */ function cdataClose(code) { if (code === codes.rightSquareBracket) { effects.consume(code) return cdataEnd } return cdata(code) } /** @type {State} */ function cdataEnd(code) { if (code === codes.greaterThan) { return end(code) } if (code === codes.rightSquareBracket) { effects.consume(code) return cdataEnd } return cdata(code) } /** @type {State} */ function declaration(code) { if (code === codes.eof || code === codes.greaterThan) { return end(code) } if (markdownLineEnding(code)) { returnState = declaration return atLineEnding(code) } effects.consume(code) return declaration } /** @type {State} */ function instruction(code) { if (code === codes.eof) { return nok(code) } if (code === codes.questionMark) { effects.consume(code) return instructionClose } if (markdownLineEnding(code)) { returnState = instruction return atLineEnding(code) } effects.consume(code) return instruction } /** @type {State} */ function instructionClose(code) { return code === codes.greaterThan ? end(code) : instruction(code) } /** @type {State} */ function tagCloseStart(code) { if (asciiAlpha(code)) { effects.consume(code) return tagClose } return nok(code) } /** @type {State} */ function tagClose(code) { if (code === codes.dash || asciiAlphanumeric(code)) { effects.consume(code) return tagClose } return tagCloseBetween(code) } /** @type {State} */ function tagCloseBetween(code) { if (markdownLineEnding(code)) { returnState = tagCloseBetween return atLineEnding(code) } if (markdownSpace(code)) { effects.consume(code) return tagCloseBetween } return end(code) } /** @type {State} */ function tagOpen(code) { if (code === codes.dash || asciiAlphanumeric(code)) { effects.consume(code) return tagOpen } if ( code === codes.slash || code === codes.greaterThan || markdownLineEndingOrSpace(code) ) { return tagOpenBetween(code) } return nok(code) } /** @type {State} */ function tagOpenBetween(code) { if (code === codes.slash) { effects.consume(code) return end } if (code === codes.colon || code === codes.underscore || asciiAlpha(code)) { effects.consume(code) return tagOpenAttributeName } if (markdownLineEnding(code)) { returnState = tagOpenBetween return atLineEnding(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenBetween } return end(code) } /** @type {State} */ function tagOpenAttributeName(code) { if ( code === codes.dash || code === codes.dot || code === codes.colon || code === codes.underscore || asciiAlphanumeric(code) ) { effects.consume(code) return tagOpenAttributeName } return tagOpenAttributeNameAfter(code) } /** @type {State} */ function tagOpenAttributeNameAfter(code) { if (code === codes.equalsTo) { effects.consume(code) return tagOpenAttributeValueBefore } if (markdownLineEnding(code)) { returnState = tagOpenAttributeNameAfter return atLineEnding(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeNameAfter } return tagOpenBetween(code) } /** @type {State} */ function tagOpenAttributeValueBefore(code) { if ( code === codes.eof || code === codes.lessThan || code === codes.equalsTo || code === codes.greaterThan || code === codes.graveAccent ) { return nok(code) } if (code === codes.quotationMark || code === codes.apostrophe) { effects.consume(code) marker = code return tagOpenAttributeValueQuoted } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueBefore return atLineEnding(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeValueBefore } effects.consume(code) marker = undefined return tagOpenAttributeValueUnquoted } /** @type {State} */ function tagOpenAttributeValueQuoted(code) { if (code === marker) { effects.consume(code) return tagOpenAttributeValueQuotedAfter } if (code === codes.eof) { return nok(code) } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueQuoted return atLineEnding(code) } effects.consume(code) return tagOpenAttributeValueQuoted } /** @type {State} */ function tagOpenAttributeValueQuotedAfter(code) { if ( code === codes.greaterThan || code === codes.slash || markdownLineEndingOrSpace(code) ) { return tagOpenBetween(code) } return nok(code) } /** @type {State} */ function tagOpenAttributeValueUnquoted(code) { if ( code === codes.eof || code === codes.quotationMark || code === codes.apostrophe || code === codes.lessThan || code === codes.equalsTo || code === codes.graveAccent ) { return nok(code) } if (code === codes.greaterThan || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code) } effects.consume(code) return tagOpenAttributeValueUnquoted } // We can’t have blank lines in content, so no need to worry about empty // tokens. /** @type {State} */ function atLineEnding(code) { assert(returnState, 'expected return state') assert(markdownLineEnding(code), 'expected eol') effects.exit(types.htmlTextData) effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return factorySpace( effects, afterPrefix, types.linePrefix, self.parser.constructs.disable.null.includes('codeIndented') ? undefined : constants.tabSize ) } /** @type {State} */ function afterPrefix(code) { effects.enter(types.htmlTextData) return returnState(code) } /** @type {State} */ function end(code) { if (code === codes.greaterThan) { effects.consume(code) effects.exit(types.htmlTextData) effects.exit(types.htmlText) return ok } return nok(code) } }