// // Copyright (c) 2007, Brian Frank and Andy Frank // Licensed under the Academic Free License version 3.0 // // History: // 17 Feb 07 Brian Frank Creation // ** ** FandocParser translate fandoc text into an in-memory ** representation of the document. ** ** See [pod doc]`pod-doc#api` for usage. ** @Js class FandocParser { ////////////////////////////////////////////////////////////////////////// // Parser ////////////////////////////////////////////////////////////////////////// ** ** Parse the document from the specified in stream into an in-memory ** tree structure. If close is true, the stream is guaranteed to be closed. ** Doc parse(Str filename, InStream in, Bool close := true) { this.filename = filename this.errs = FandocErr[,] readLines(in, close) doc := Doc.make try { header(doc) while (curt !== LineType.eof) doc.add(topBlock) } catch (Err e) { err("Invalid line $curLine", curLine, e) doc.removeAll.add(Pre.make.add(DocText(lines.join("\n")))) } lines = null return doc } ** ** Parse a string into its in-memory document tree structure. ** Doc parseStr(Str plaintext) { return parse("str", plaintext.in, true) } ////////////////////////////////////////////////////////////////////////// // Header ////////////////////////////////////////////////////////////////////////// private Void header(Doc doc) { skipBlankLines if (!parseHeader) return while (curt !== LineType.eof && cur.startsWith("**")) { colon := cur.index(":") if (colon != null) { key := cur[2..<colon].trim val := cur[colon+1..-1].trim doc.meta[key] = val } else { if (!cur.startsWith("****")) break } consume } skipBlankLines } ////////////////////////////////////////////////////////////////////////// // Block ////////////////////////////////////////////////////////////////////////// private DocElem topBlock() { switch (peekt) { case LineType.h1: case LineType.h2: case LineType.h3: case LineType.h4: return heading } return block(0) } private DocElem heading() { level := peekt.headingLevel h := Heading(level) curStart = 0 formattedText(h) consume skipBlankLines title := h.children.first as DocText if (title != null) title.str = title.str.trim return h } private DocElem block(Int indent) { switch (curt) { case LineType.ol: return ol case LineType.ul: return ul case LineType.blockquote: return blockquote case LineType.preStart: return preExplicit case LineType.hr: return hr case LineType.normal: if (curIndent >= indent+2) return pre else return para default: throw Err(curt.toStr) } } private DocElem para() { para := Para.make // if the first word is all capitals followed // by a colon then it is a admonition such as NOTE: first := cur.trim.split.first if (first[-1] == ':') { first = first[0..-2] if (first.all |Int ch->Bool| { return ch.isUpper }) { para.admonition = first curStart = cur.index(":") + 1 } } return formattedText(para) } private DocElem blockquote() { // block quote wraps paragraph return BlockQuote.make.add(formattedText(Para.make)) } private DocElem preExplicit() { // skip pre> consume // skip any blank lines while (curt === LineType.blank) consume // read preformatted lines, keep track of left most indentation lines := Str[,] indent := Int.maxVal while (curt !== LineType.preEnd && curt !== LineType.eof) { // use local indent logic since curIndent has special behavior if (curt != LineType.blank) { i := 0; while (cur[i] == ' ') i++; indent = indent.min(i) } lines.add(cur) consume } consume while (curt === LineType.blank) consume // align against left most indentation buf := StrBuf() lines.each |line| { if (line.size > indent) buf.add(line[indent..-1]) buf.addChar('\n') } pre := Pre.make pre.add(DocText(buf.toStr)) return pre } private DocElem pre() { // first line defines left margin indent := curIndent buf := StrBuf(256) buf.add(cur[indent..-1]) consume while (true) { // read in preformatted lines of code while (curt === LineType.normal && curIndent >= indent) { buf.add("\n").add(cur[indent..-1]) consume } // skip blanks but keep track of count blanks := 0 while (curt === LineType.blank) { consume; blanks++ } // if more code, then add blank lines and continue if (curt === LineType.normal && curIndent >= indent) blanks.times { buf.add("\n") } else break } pre := Pre.make pre.add(DocText(buf.toStr)) return pre } private DocElem hr() { consume skipBlankLines return Hr.make } private DocElem ol() { style := OrderedListStyle.fromFirstChar(cur.trim[0]) return listItems(OrderedList(style), curt, curIndent) } private DocElem ul() { return listItems(UnorderedList.make, curt, curIndent) } private DocElem listItems(DocElem list, LineType listType, Int listIndent) { while (true) { // next item in my own list if (curt === listType && curIndent == listIndent) { list.add(formattedText(ListItem.make)) } // otherwise if indent is same or greater, then // this is a continuation of the my last node else if (curIndent >= listIndent) { ((DocElem)list.children.last).add(block(listIndent)) } // end of list else { break } } return list } private DocElem formattedText(DocElem elem) { startLineNum := curLine startIndent := curStart isBlockQuote := curt === LineType.blockquote buf := StrBuf(256) buf.add(cur[curStart..-1].trim) consume while (curStart <= startIndent && (curt === LineType.normal || (isBlockQuote && curt == LineType.blockquote))) { buf.add("\n").add(cur[curStart..-1].trim) consume } endLineNum := this.lineIndex - 2 skipBlankLines oldNumChildren := elem.children.size try { InlineParser(this, buf, startLineNum).parse(elem) } catch (Err e) { if (e is FandocErr) errReport((FandocErr)e) else err("Internal error: $e", startLineNum, e) elem.children[oldNumChildren..-1].dup.each |badChild| { elem.remove(badChild) } elem.add(DocText(buf.toStr.replace("\n", " "))) } return elem } ////////////////////////////////////////////////////////////////////////// // IO ////////////////////////////////////////////////////////////////////////// ** ** Read all the lines into memory and close stream if required. ** private Void readLines(InStream in, Bool close) { try { lines = in.readAllLines numLines = lines.size lineIndex = curLine = 0 consume consume curLine = 1 } finally { if (close) in.close } } ////////////////////////////////////////////////////////////////////////// // Utils ////////////////////////////////////////////////////////////////////////// ** ** Log an error ** private Void err(Str msg, Int line, Err? cause := null) { errReport(FandocErr(msg, filename, line, cause)) } ** ** Log an error ** private Void errReport(FandocErr err) { errs.add(err) if (!silent) echo("ERROR: $err") } ** ** Skip any blank lines ** private Void skipBlankLines() { while (curt === LineType.blank) consume } ** ** Return if line starting at index i is an ordered ** list item: ** number* "." sp (digits) ** letter "." sp (a-z | A-Z single letter only) ** roman* "." sp (ivx | IVX combos) ** private static Bool isOrderedListMark(Str line, Int i) { // check if first char is alpha numeric if (!line[i].isAlphaNum) return false // find dot space dot := line.index(". ", i) if (dot == null) return false mark := line[i..<dot] if (mark[0].isDigit) { return mark.all |Int ch->Bool| { return ch.isDigit } } else { return mark.all |Int ch, Int index->Bool| { switch (ch) { case 'I': case 'V': case 'X': case 'i': case 'v': case 'x': return true default: return index == 0 } } } } ** ** Consume the current line and advance to the next line ** private Void consume() { // advance cur to peek cur = peek curt = peekt curIndent = peekIndent curStart = peekStart curNotBlank := curt != LineType.blank curLine++ // update peek, peekIndent, and peekType peek = (lineIndex < numLines) ? lines[lineIndex++] : null peekIndent = peekStart = 0 if (peek == null) peekt = LineType.eof else if (peek.isSpace) peekt = LineType.blank else if (peek.startsWith("pre>")) peekt = LineType.preStart else if (peek.startsWith("<pre")) peekt = LineType.preEnd else if (peek.startsWith("###") && curNotBlank) peekt = LineType.h1 else if (peek.startsWith("***") && curNotBlank) peekt = LineType.h2 else if (peek.startsWith("===") && curNotBlank) peekt = LineType.h3 else if (peek.startsWith("---") && curNotBlank) peekt = LineType.h4 else if (peek.startsWith("---") && curt == LineType.blank) peekt = LineType.hr else { peekt = LineType.normal while (peek[peekIndent].isSpace) peekIndent++ if (peekIndent+2 < peek.size) { if (peek[peekIndent] == '-' && peek[peekIndent+1].isSpace) { peekt = LineType.ul peekIndent += 2 peekStart = peekIndent } if (isOrderedListMark(peek, peekIndent)) { peekt = LineType.ol peekIndent += 2 peekStart = peek.index(".") + 2 } else if (peek[peekIndent] == '>' && peek[peekIndent+1].isSpace) { peekt = LineType.blockquote peekIndent += 2 peekStart = peekIndent } else { peekStart = peekIndent } } } } ////////////////////////////////////////////////////////////////////////// // Main ////////////////////////////////////////////////////////////////////////// static Void main(Str[] args := Env.cur.args) { doc := make.parse(args[0], File(args[0].toUri).in) doc.dump } ////////////////////////////////////////////////////////////////////////// // Fields ////////////////////////////////////////////////////////////////////////// ** If not silent, then errors are dumped to stdout Bool silent := false ** List of errors detected FandocErr[] errs := FandocErr[,] ** If true, then leading lines starting with '**' are parsed as header Bool parseHeader := true internal Str filename := "" // filename for reporting errors private Str[]? lines // lines of document private Int numLines // lines.size private Int lineIndex // current index in lines private Str? cur // current line private Str? peek // next line private LineType? curt // current line type private LineType? peekt // peek line type private Int curLine // one based line number of cur private Int curIndent // how many spaces is cur indented private Int peekIndent // how many spaces is peek indented private Int curStart // starting index of cur text private Int peekStart // starting index of cur text } ************************************************************************** ** LineType ************************************************************************** @Js internal enum class LineType { eof, // end of file blank, // space* ul, // space* "-" space* ol, // space* (number|letter)* "." space* h1, // ### h2, // *** h3, // === h4, // --- blockquote, // > preStart, // pre> preEnd, // <pre hr, // --- (with a leading blank line) normal // anything else Bool isList() { return this === ul } Int headingLevel() { switch (this) { case h1: return 1 case h2: return 2 case h3: return 3 case h4: return 4 default: throw Err(toStr) } } }