分词器 Tokenizer

{    options: {},    rules: {        other: {},        block: {},        inline: {}    },    lexer: {}}

规则

具体正则参考规则rules
生成的词法凭证参考Token

块级类型 block

新行

spacesrc: string: Tokens.Space  undefined {    const cap = this.rules.block.newline.execsrc;    if cap && cap[0].length > 0 {        return {            type: 'space',            raw: cap[0],        };    }}

code

codesrc: string: Tokens.Code  undefined {    const cap = this.rules.block.code.execsrc;    if cap {        const text = cap[0].replacethis.rules.other.codeRemoveIndent, '';        return {            type: 'code',            raw: cap[0],            codeBlockStyle: 'indented',            text: !this.options.pedantic            ? rtrimtext, '\n'            : text,        };    }}

fences 代码块

fencessrc: string: Tokens.Code  undefined {    const cap = this.rules.block.fences.execsrc;    if cap {        const raw = cap[0];        const text = indentCodeCompensationraw, cap[3]  '', this.rules;        return {            type: 'code',            raw,            lang: cap[2] ? cap[2].trim.replacethis.rules.inline.anyPunctuation, '$1' : cap[2],            text,        };    }}

heading/heading 标题

headingsrc: string: Tokens.Heading  undefined {    const cap = this.rules.block.heading.execsrc;    if cap {        let text = cap[2].trim;        if this.rules.other.endingHash.testtext {            const trimmed = rtrimtext, '#';            if this.options.pedantic {            text = trimmed.trim;            } else if !trimmed  this.rules.other.endingSpaceChar.testtrimmed {            text = trimmed.trim;            }        }        return {            type: 'heading',            raw: cap[0],            depth: cap[1].length,            text,            tokens: this.lexer.inlinetext,        };    }}

hr 水平线

hrsrc: string: Tokens.Hr  undefined {    const cap = this.rules.block.hr.execsrc;    if cap {        return {            type: 'hr',            raw: rtrimcap[0], '\n',        };    }}

blockquote 块引用

blockquotesrc: string: Tokens.Blockquote  undefined {    const cap = this.rules.block.blockquote.execsrc;    if cap {    let lines = rtrimcap[0], '\n'.split'\n';    let raw = '';    let text = '';    const tokens: Token[] = [];    while lines.length > 0 {        let inBlockquote = false;        const currentLines = [];        let i;        for i = 0; i < lines.length; i++ {            if this.rules.other.blockquoteStart.testlines[i] {                currentLines.pushlines[i];                inBlockquote = true;            } else if !inBlockquote {                currentLines.pushlines[i];            } else {                break;            }        }        lines = lines.slicei;        const currentRaw = currentLines.join'\n';        const currentText = currentRaw        // precede setext continuation with 4 spaces so it isn't a setext        .replacethis.rules.other.blockquoteSetextReplace, '\n    $1'        .replacethis.rules.other.blockquoteSetextReplace2, '';        raw = raw ? `${raw}\n${currentRaw}` : currentRaw;        text = text ? `${text}\n${currentText}` : currentText;        // parse blockquote lines as top level tokens        // merge paragraphs if this is a continuation        const top = this.lexer.state.top;        this.lexer.state.top = true;        this.lexer.blockTokenscurrentText, tokens, true;        this.lexer.state.top = top;        // if there is no continuation then we are done        if lines.length === 0 {            break;        }        const lastToken = tokens.at-1;        if lastToken?.type === 'code' {        // blockquote continuation cannot be preceded by a code block            break;        } else if lastToken?.type === 'blockquote' {        // include continuation in nested blockquote            const oldToken = lastToken as Tokens.Blockquote;            const newText = oldToken.raw + '\n' + lines.join'\n';            const newToken = this.blockquotenewText!;            tokens[tokens.length - 1] = newToken;            raw = raw.substring0, raw.length - oldToken.raw.length + newToken.raw;            text = text.substring0, text.length - oldToken.text.length + newToken.text;        break;        } else if lastToken?.type === 'list' {        // include continuation in nested list            const oldToken = lastToken as Tokens.List;            const newText = oldToken.raw + '\n' + lines.join'\n';            const newToken = this.listnewText!;            tokens[tokens.length - 1] = newToken;            raw = raw.substring0, raw.length - lastToken.raw.length + newToken.raw;            text = text.substring0, text.length - oldToken.raw.length + newToken.raw;            lines = newText.substringtokens.at-1!.raw.length.split'\n';            continue;        }    }    return {        type: 'blockquote',        raw,        tokens,        text,    };    }}

list 列表

{type: 'list', raw: "", ordered： false, start: number, loose: [], item: []}

html

{type: 'html', block: false,  raw: "", pre: '', text: string}

def 引用

{type: 'def', tag: string, href: false,  raw: "", title: ''}

table 表格

{ type: 'table', raw: cap[0], header: [], align: [], rows: []};

paragraph 段落

{ type: 'paragraph', raw: string, text: string, tokens: [] }

text 文本

{ type: 'text', raw: string, text: string, tokens: [] }

escape 转义字符

{ type: 'escape', raw: string, text: string }

行内类型 inline

tag 标签

{ type: 'html', raw: "", inLink: boolean, inRawBlock: boolean, block: false, text: "" };

link/reflink 链接

{ type: 'link', raw: string,  href: string, title: string, text: string, tokens: [] }

emStrong 斜体加粗

{ type: 'emstrong', raw: string, text: string,  tokens: []  }

codespan 单行代码

{ type: 'codespan', raw: "", text: string};

br 换行

{ type: 'br', raw: ""};

del 删除

{ type: 'del', raw: "", text: string, token: []};

autolink

{ type: 'link', raw: string, href: string, title: string, text: string, tokens: [{ type: 'text', raw: string, text: string }] }

inlineText 行内文本

{ type: 'text', raw: string, text: string, escaped: boolean }

其他类型 other

表格相关

tableAlignRight
tableAlignLeft
tableAlignCenter
tableRowBlankLine
tableDelimiter

code相关

indentCodeCompensation
codeRemoveIndent

文本相关

beginningSpace
endingHash
endingSpaceChar
blockquoteStart
blockquoteSetextReplace/blockquoteSetextReplace2
blankLine
doubleBlankLine
anyLine

标签相关

startATag
endATag
startPreScriptTag
endPreScriptTag