Lexer 词法分析
初始化
- 初始化基础数据和分词器Tokenizer
- 初始化markword文档的解析规则
consstructoroptions?: MarkedOptions{ this.tokens = [] as unknown as TokensList; this.tokens.links = Object.createnull; this.options = options _defaults; this.options.tokenizer = this.options.tokenizer new _Tokenizer; this.tokenizer = this.options.tokenizer; this.tokenizer.options = this.options; this.tokenizer.lexer = this; this.inlineQueue = []; this.state = { inLink: false, inRawBlock: false, top: true, }; const rules = { other, block: block.normal, inline: inline.normal, }; if this.options.pedantic { rules.block = block.pedantic; rules.inline = inline.pedantic; } else if this.options.gfm { rules.block = block.gfm; if this.options.breaks { rules.inline = inline.breaks; } else { rules.inline = inline.gfm; } } this.tokenizer.rules = rules;}
lex 分解词汇
- 将文档中的回车换行统一为
\n - 处理块级的词汇凭证Token
- 在处理行内的词汇凭证Token
- 返回获取到的AST树
lexsrc: string { src = src.replaceother.carriageReturn, '\n'; this.blockTokenssrc, this.tokens; for let i = 0; i < this.inlineQueue.length; i++ { const next = this.inlineQueue[i]; this.inlineTokensnext.src, next.tokens; } this.inlineQueue = []; return this.tokens;}
blockTokens 块级
- 优先块级扩展中的词法分析器
- 执行空白的词法分析器
- 执行空白缩进或者制表符格式的代码行的词法分析器
- 执行围栏代码块的词法分析器
- 执行标题的词法分析器
- 执行水平线的词法分析器
- 执行块引用的词法分析器
blockTokenssrc: string, tokens?: Token[], lastParagraphClipped?: boolean: Token[];blockTokenssrc: string, tokens?: TokensList, lastParagraphClipped?: boolean: TokensList;blockTokenssrc: string, tokens: Token[] = [], lastParagraphClipped = false{ if this.options.pedantic { src = src.replaceother.tabCharGlobal, ' '.replaceother.spaceLine, ''; } whilesrc{ let token: Tokens.Generic undefined; if this.options.extensions?.block?.someextTokenizer => { if token = extTokenizer.call{ lexer: this }, src, tokens { src = src.substringtoken.raw.length; tokens.pushtoken; return true; } return false; } { continue; } if token = this.tokenizer.spacesrc { src = src.substringtoken.raw.length; const lastToken = tokens.at-1; if token.raw.length === 1 && lastToken !== undefined { lastToken.raw += '\n'; } else { tokens.pushtoken; } continue; } if token = this.tokenizer.codesrc { src = src.substringtoken.raw.length; const lastToken = tokens.at-1; if lastToken?.type === 'paragraph' lastToken?.type === 'text' { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue.at-1!.src = lastToken.text; } else { tokens.pushtoken; } continue; } if token = this.tokenizer.fencessrc { src = src.substringtoken.raw.length; tokens.pushtoken; continue; } if token = this.tokenizer.headingsrc { src = src.substringtoken.raw.length; tokens.pushtoken; continue; } if token = this.tokenizer.hrsrc { src = src.substringtoken.raw.length; tokens.pushtoken; continue; } if token = this.tokenizer.blockquotesrc { src = src.substringtoken.raw.length; tokens.pushtoken; continue; } }}
inlineTokens 行内