document.c (101175B)
/* $Id$ */ /* * Copyright (c) 2008, Natacha Porté * Copyright (c) 2011, Vicent Martà * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors * Copyright (c) 2016--2021 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "config.h" #if HAVE_SYS_QUEUE # include <sys/queue.h> #endif #include <assert.h> #include <ctype.h> #include <stdint.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "lowdown.h" #include "extern.h" /* * Make sure these are larger than enum hlist_fl. */ #define HLIST_LI_END (1 << 6) /* End of list item. */ /* * Mask of all list item types. */ #define HLIST_FL_MASK (HLIST_FL_DEF | \ HLIST_FL_ORDERED | \ HLIST_FL_UNORDERED) /* * Reference to a link. */ struct link_ref { struct lowdown_buf *name; /* id of link (or NULL) */ struct lowdown_buf *link; /* link address */ struct lowdown_buf *title; /* optional title */ struct lowdown_buf *attrs; /* optional attributes */ TAILQ_ENTRY(link_ref) entries; }; TAILQ_HEAD(link_refq, link_ref); /* * Reference to a footnote. This keeps track of all footnotes * definitions and whether there's both a definition and reference. */ struct foot_ref { size_t num; /* if used, the order */ struct lowdown_node *ref; /* if used, the reference */ struct lowdown_buf name; /* identifier */ struct lowdown_buf contents; /* definition */ TAILQ_ENTRY(foot_ref) entries; }; TAILQ_HEAD(foot_refq, foot_ref); struct lowdown_doc { struct link_refq refq; /* all internal references */ struct foot_refq footq; /* all footnotes */ size_t foots; /* # of used footnotes */ int active_char[256]; /* jump table */ unsigned int ext_flags; /* options */ int in_link_body; /* parsing link body */ int in_footnote; /* prevent nested */ size_t nodes; /* number of nodes */ struct lowdown_node *current; /* current node */ struct lowdown_metaq *metaq; /* raw metadata key/values */ size_t depth; /* current parse tree depth */ size_t maxdepth; /* max parse tree depth */ char **meta; /* primer metadata */ size_t metasz; /* size of meta */ char **metaovr; /* override metadata */ size_t metaovrsz; /* size of metaovr */ }; /* * Function pointer to render active chars, where "data" is the pointer * of the beginning of the span and "offset" is the number of valid * chars before data. * Returns the number of chars taken care of, or <0 on failure. */ typedef ssize_t (*char_trigger)(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_emphasis(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_linebreak(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_codespan(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_escape(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_entity(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_langle_tag(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_autolink_url(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_autolink_email(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_autolink_www(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_link(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_image(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_superscript(struct lowdown_doc *, char *, size_t, size_t); static ssize_t char_math(struct lowdown_doc *, char *, size_t, size_t); enum markdown_char_t { MD_CHAR_NONE = 0, MD_CHAR_EMPHASIS, MD_CHAR_CODESPAN, MD_CHAR_LINEBREAK, MD_CHAR_LINK, MD_CHAR_IMAGE, MD_CHAR_LANGLE, MD_CHAR_ESCAPE, MD_CHAR_ENTITY, MD_CHAR_AUTOLINK_URL, MD_CHAR_AUTOLINK_EMAIL, MD_CHAR_AUTOLINK_WWW, MD_CHAR_SUPERSCRIPT, MD_CHAR_QUOTE, MD_CHAR_MATH }; static const char_trigger markdown_char_ptrs[] = { NULL, &char_emphasis, &char_codespan, &char_linebreak, &char_link, &char_image, &char_langle_tag, &char_escape, &char_entity, &char_autolink_url, &char_autolink_email, &char_autolink_www, &char_superscript, NULL, &char_math }; static int parse_block(struct lowdown_doc *, char *, size_t); static ssize_t parse_listitem(struct lowdown_buf *, struct lowdown_doc *, char *, size_t, enum hlist_fl *, size_t); /* * Add a node to the parse stack, or retrieve a current node if * requesting multiple LOWDOWN_NORMAL_TEXTs in sequence. Returns the * node, initialised to the given type, after adjusting the parse * position. Returns NULL on memory allocation failure. */ static struct lowdown_node * pushnode(struct lowdown_doc *doc, enum lowdown_rndrt t) { struct lowdown_node *n; /* * Special case: if we're pushing a NORMAL_TEXT node, see if one * already exists and return that. This means that each push * for text nodes should be careful to use hbuf_push() instead * of hbuf_create() when adding text content. */ if (t == LOWDOWN_NORMAL_TEXT && doc->current != NULL) { n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); if (n != NULL && n->type == t) { doc->depth++; doc->current = n; return n; } } /* New node. */ if ((doc->depth++ > doc->maxdepth) && doc->maxdepth) return NULL; if ((n = calloc(1, sizeof(struct lowdown_node))) == NULL) return NULL; n->id = doc->nodes++; n->type = t; n->parent = doc->current; TAILQ_INIT(&n->children); if (n->parent != NULL) TAILQ_INSERT_TAIL(&n->parent->children, n, entries); doc->current = n; return n; } /* * Sets a buffer with the contents of "data" of size "datasz". The * buffer must be empty. Return FALSE on failure, TRUE on success. */ static int hbuf_create(struct lowdown_buf *buf, const char *data, size_t datasz) { assert(buf->size == 0); assert(buf->data == NULL); memset(buf, 0, sizeof(struct lowdown_buf)); if (datasz) { if ((buf->data = malloc(datasz)) == NULL) return 0; buf->unit = 1; buf->size = buf->maxsize = datasz; memcpy(buf->data, data, datasz); } return 1; } /* * See hbuf_create(). */ static int hbuf_createb(struct lowdown_buf *buf, const struct lowdown_buf *nbuf) { return hbuf_create(buf, nbuf->data, nbuf->size); } /* * Pushes data into the buffer, which is initialised if empty. Return * FALSE on failure, TRUE on success. */ static int hbuf_push(struct lowdown_buf *buf, const char *data, size_t datasz) { if (buf->size == 0 || buf->data == NULL) return hbuf_create(buf, data, datasz); return hbuf_put(buf, data, datasz); } /* * See pushnode(). * Pops the current node on the stack, replacing it with the parent. */ static void popnode(struct lowdown_doc *doc, const struct lowdown_node *n) { assert(doc->depth > 0); doc->depth--; assert(doc->current == n); doc->current = doc->current->parent; } /* * Remove the backslash from a text sequence. * Return zero on failure (memory), non-zero on success. */ static int unscape_text(struct lowdown_buf *ob, struct lowdown_buf *src) { size_t i, org; for (i = 0; i < src->size; i += 2) { org = i; while (i < src->size && src->data[i] != '\\') i++; if (i > org && !hbuf_put(ob, src->data + org, i - org)) return 0; if (i + 1 >= src->size) break; if (!hbuf_putc(ob, src->data[i + 1])) return 0; } return 1; } static struct link_ref * find_link_ref(struct link_refq *q, char *name, size_t length) { struct link_ref *ref; TAILQ_FOREACH(ref, q, entries) if ((ref->name == NULL && length == 0) || (ref->name != NULL && ref->name->size == length && memcmp(ref->name->data, name, length) == 0)) return ref; return NULL; } static void free_link_refs(struct link_refq *q) { struct link_ref *r; while ((r = TAILQ_FIRST(q)) != NULL) { TAILQ_REMOVE(q, r, entries); hbuf_free(r->link); hbuf_free(r->name); hbuf_free(r->title); hbuf_free(r->attrs); free(r); } } static void free_foot_refq(struct foot_refq *q) { struct foot_ref *ref; while ((ref = TAILQ_FIRST(q)) != NULL) { TAILQ_REMOVE(q, ref, entries); hbuf_free(&ref->contents); hbuf_free(&ref->name); free(ref); } } /* * Check whether a char is a Markdown spacing char. * Right now we only consider spaces the actual space and a newline: * tabs and carriage returns are filtered out during the preprocessing * phase. * If we wanted to actually be UTF-8 compliant, we should instead * extract an Unicode codepoint from this character and check for space * properties. */ static int xisspace(int c) { return c == ' ' || c == '\n'; } /* * Returns the number of leading spaces from data starting from offset * to size. * If maxlen is greater than zero, only at most maxlen number of leading * spaces will be counted. * Otherwise, all leading spaces will be counted. */ static size_t countspaces(const char *data, size_t offset, size_t size, size_t maxlen) { size_t i; for (i = offset; i < size; i++) { if (maxlen > 0 && i - offset == maxlen) break; if (data[i] != ' ') break; } return i; } /* * Replace all spacing characters in data with spaces. * As a special case, this collapses a newline with the previous space, * if possible. * Return zero on failure (memory), non-zero on success. */ static int replace_spacing(struct lowdown_buf *ob, const char *data, size_t size) { size_t i, mark; if (!hbuf_grow(ob, size)) return 0; for (i = 0; ; i++) { mark = i; while (i < size && data[i] != '\n') i++; if (!hbuf_put(ob, data + mark, i - mark)) return 0; if (i >= size) break; if (!(i > 0 && data[i - 1] == ' ')) if (!hbuf_putc(ob, ' ')) return 0; } return 1; } /* * Looks for the address part of a mail autolink and '>'. * This is less strict than the original markdown e-mail address * matching. */ static size_t is_mail_autolink(const char *data, size_t size) { size_t i, nb = 0; /* Assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'. */ for (i = 0; i < size; ++i) { if (isalnum((unsigned char)data[i])) continue; switch (data[i]) { case '@': nb++; case '-': case '.': case '_': break; case '>': return (nb == 1) ? i + 1 : 0; default: return 0; } } return 0; } /* * Returns the length of the given tag, or 0 is it's not valid. */ static size_t tag_length(const char *data, size_t size, enum halink_type *ltype) { size_t i, j; /* A valid tag can't be shorter than 3 chars. */ if (size < 3) return 0; if (data[0] != '<') return 0; /* HTML comment, laxist form. */ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { i = 5; while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) i++; i++; if (i <= size) return i; } /* * Begins with a '<' optionally followed by '/', followed by letter or * number. */ i = (data[1] == '/') ? 2 : 1; if (!isalnum((unsigned char)data[i])) return 0; /* Scheme test. */ *ltype = HALINK_NONE; /* Try to find the beginning of an URI. */ while (i < size && (isalnum((unsigned char)data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) i++; if (i > 1 && data[i] == '@') if ((j = is_mail_autolink(data + i, size - i)) != 0) { *ltype = HALINK_EMAIL; return i + j; } if (i > 2 && data[i] == ':') { *ltype = HALINK_NORMAL; i++; } /* Completing autolink test: no spacing or ' or ". */ if (i >= size) *ltype = HALINK_NONE; else if (*ltype) { j = i; while (i < size) { if (data[i] == '\\') i += 2; else if (data[i] == '>' || data[i] == '\'' || data[i] == '"' || data[i] == ' ' || data[i] == '\n') break; else i++; } if (i >= size) return 0; if (i > j && data[i] == '>') return i + 1; /* One of the forbidden chars has been found. */ *ltype = HALINK_NONE; } /* Looking for something looking like a tag end. */ while (i < size && data[i] != '>') i++; if (i >= size) return 0; return i + 1; } /* * Parses inline markdown elements. * This function is important because it handles raw input that we pass * directly to the output formatter ("normal_text"). * Return zero on failure, non-zero on success. */ static int parse_inline(struct lowdown_doc *doc, char *data, size_t size) { size_t i = 0, end = 0, consumed = 0; ssize_t rc; struct lowdown_buf work; const int *active_char = doc->active_char; struct lowdown_node *n; memset(&work, 0, sizeof(struct lowdown_buf)); while (i < size) { /* Copying non-macro chars into the output. */ while (end < size && active_char[(unsigned char)data[end]] == 0) end++; /* Only allocate if non-empty... */ if (end - i > 0) { n = pushnode(doc, LOWDOWN_NORMAL_TEXT); if (n == NULL) return 0; if (!hbuf_push(&n->rndr_normal_text.text, data + i, end - i)) return 0; popnode(doc, n); } /* End of file? */ if (end >= size) break; i = end; rc = markdown_char_ptrs[ active_char[(unsigned char)data[end]]] (doc, data + i, i - consumed, size - i); if (rc < 0) return 0; end = rc; /* Check if no action from the callback. */ if (end == 0) { end = i + 1; continue; } i += end; end = consumed = i; } return 1; } /* * Returns whether special char at data[loc] is escaped by '\\'. */ static int is_escaped(const char *data, size_t loc) { size_t i = loc; while (i >= 1 && data[i - 1] == '\\') i--; /* Odd numbers of backslashes escapes data[loc]. */ return (loc - i) % 2; } /* * Looks for the next emph char, skipping other constructs. */ static size_t find_emph_char(const char *data, size_t size, char c) { size_t i = 0, span_nb, bt, tmp_i; char cc; while (i < size) { while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') i++; if (i == size) return 0; /* Not counting escaped chars. */ if (is_escaped(data, i)) { i++; continue; } if (data[i] == c) return i; /* Skipping a codespan. */ if (data[i] == '`') { span_nb = 0; tmp_i = 0; /* Counting the number of opening backticks. */ while (i < size && data[i] == '`') { i++; span_nb++; } if (i >= size) return 0; /* Finding the matching closing sequence. */ bt = 0; while (i < size && bt < span_nb) { if (!tmp_i && data[i] == c) tmp_i = i; if (data[i] == '`') bt++; else bt = 0; i++; } /* * Not a well-formed codespan; use found * matching emph char. */ if (bt < span_nb && i >= size) return tmp_i; } else if (data[i] == '[') { tmp_i = 0; /* Skipping a link. */ i++; while (i < size && data[i] != ']') { if (!tmp_i && data[i] == c) tmp_i = i; i++; } i++; while (i < size && xisspace(data[i])) i++; if (i >= size) return tmp_i; switch (data[i]) { case '[': cc = ']'; break; case '(': cc = ')'; break; default: if (tmp_i) return tmp_i; else continue; } i++; while (i < size && data[i] != cc) { if (!tmp_i && data[i] == c) tmp_i = i; i++; } if (i >= size) return tmp_i; i++; } } return 0; } /* * Parsing single emphase. * Closed by a symbol not preceded by spacing and not followed by * symbol. * Return 0 if not an emphasis, <0 on failure, >0 on success. */ static ssize_t parse_emph1(struct lowdown_doc *doc, char *data, size_t size, char c) { size_t i = 0, len; struct lowdown_node *n; /* Skipping one symbol if coming from emph3. */ if (size > 1 && data[0] == c && data[1] == c) i = 1; while (i < size) { len = find_emph_char(data + i, size - i, c); if (!len) return 0; i += len; if (i >= size) return 0; if (data[i] == c && !xisspace(data[i - 1])) { if ((doc->ext_flags & LOWDOWN_NOINTEM) && i + 1 < size && isalnum((unsigned char)data[i + 1])) continue; n = pushnode(doc, LOWDOWN_EMPHASIS); if (n == NULL) return -1; if (!parse_inline(doc, data, i)) return -1; popnode(doc, n); return i + 1; } } return 0; } /* * Parsing single emphase. * Return 0 if not an emphasis, <0 on failure, >0 on success. */ static ssize_t parse_emph2(struct lowdown_doc *doc, char *data, size_t size, char c) { size_t i = 0, len; struct lowdown_node *n; enum lowdown_rndrt t; while (i < size) { len = find_emph_char(data + i, size - i, c); if (len == 0) return 0; i += len; if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !xisspace(data[i - 1])) { if (c == '~') t = LOWDOWN_STRIKETHROUGH; else if (c == '=') t = LOWDOWN_HIGHLIGHT; else t = LOWDOWN_DOUBLE_EMPHASIS; if ((n = pushnode(doc, t)) == NULL) return -1; if (!parse_inline(doc, data, i)) return -1; popnode(doc, n); return i + 2; } i++; } return 0; } /* * Parsing single emphase * Finds the first closing tag, and delegates to the other emph. * Return 0 if not an emphasis, <0 on failure, >0 on success. */ static size_t parse_emph3(struct lowdown_doc *doc, char *data, size_t size, char c) { size_t i = 0, len; ssize_t rc; struct lowdown_node *n; while (i < size) { len = find_emph_char(data + i, size - i, c); if (len == 0) return 0; i += len; /* Skip spacing preceded symbols. */ if (data[i] != c || xisspace(data[i - 1])) continue; /* Case for triple, double, and single asterisk. */ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c) { n = pushnode(doc, LOWDOWN_TRIPLE_EMPHASIS); if (n == NULL) return -1; if (!parse_inline(doc, data, i)) return -1; popnode(doc, n); return i + 3; } else if (i + 1 < size && data[i + 1] == c) { rc = parse_emph1(doc, data - 2, size + 2, c); if (rc < 0) return -1; assert(rc == 0 || rc >= 2); return rc == 0 ? 0 : rc - 2; } else { rc = parse_emph2(doc, data - 1, size + 1, c); if (rc < 0) return -1; return rc == 0 ? 0 : rc - 1; } } return 0; } /* * Parses a math span until the given ending delimiter. * Return 0 if not math, <0 on failure, >0 on success. */ static ssize_t parse_math(struct lowdown_doc *doc, char *data, size_t offset, size_t size, const char *end, size_t delimsz, int blockmode) { size_t i; struct lowdown_node *n; /* * Find ending delimiter. * All text within the equation is opaque, so we don't need to * care about embedded macros. */ for (i = delimsz; ; i++) { while (i < size && data[i] != end[0]) i++; if (i >= size) return 0; if (!is_escaped(data, i) && !(i + delimsz > size) && memcmp(data + i, end, delimsz) == 0) break; } i += delimsz; if (!(doc->ext_flags & LOWDOWN_MATH)) { n = pushnode(doc, LOWDOWN_NORMAL_TEXT); if (n == NULL) return -1; if (!hbuf_push(&n->rndr_normal_text.text, data, i)) return -1; popnode(doc, n); return i; } n = pushnode(doc, LOWDOWN_MATH_BLOCK); if (n == NULL) return -1; if (!hbuf_create(&n->rndr_math.text, data + delimsz, i - 2 * delimsz)) return -1; n->rndr_math.blockmode = blockmode; popnode(doc, n); return i; } /* * Single and double emphasis parsing. */ static ssize_t char_emphasis(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { char c = data[0]; ssize_t ret; if (doc->ext_flags & LOWDOWN_NOINTEM) if (offset > 0 && !xisspace(data[-1]) && data[-1] != '>' && data[-1] != '(') return 0; /* * Spacing cannot follow an opening emphasis: strikethrough and * highlight only takes '~~'. * FIXME: don't depend upon the "ret =" as the last part of an * "or" chain---it's hard to read. */ if (size > 2 && data[1] != c) { if (c == '~' || c == '=' || xisspace(data[1]) || (ret = parse_emph1 (doc, data + 1, size - 1, c)) == 0) return 0; return ret > 0 ? ret + 1 : ret; } if (size > 3 && data[1] == c && data[2] != c) { if (xisspace(data[2]) || (ret = parse_emph2 (doc, data + 2, size - 2, c)) == 0) return 0; return ret > 0 ? ret + 2 : ret; } if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { if (c == '~' || c == '=' || xisspace(data[3]) || (ret = parse_emph3 (doc, data + 3, size - 3, c)) == 0) return 0; return ret > 0 ? ret + 3 : ret; } return 0; } /* * '\n' preceded by two spaces (assuming linebreak != 0) */ static ssize_t char_linebreak(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_node *n; size_t w; struct lowdown_buf *b; if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') return 0; /* Removing the last space from nodes. */ assert(doc->current != NULL); n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); assert(n != NULL && LOWDOWN_NORMAL_TEXT == n->type); b = &n->rndr_normal_text.text; while (b->size && b->data[b->size - 1] == ' ') b->size--; /* * Swallow leading white-space of next line. * XXX: is this just CommonMark? */ for (w = 1; w < size; w++) if (data[w] != ' ') break; if ((n = pushnode(doc, LOWDOWN_LINEBREAK)) == NULL) return -1; popnode(doc, n); return w; } /* * '`' parsing a code span (assuming codespan != 0) */ static ssize_t char_codespan(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf work; struct lowdown_node *n; size_t end, nb = 0, i, f_begin, f_end; memset(&work, 0, sizeof(struct lowdown_buf)); /* Counting the number of backticks in the delimiter. */ while (nb < size && data[nb] == '`') nb++; /* Finding the next delimiter. */ i = 0; for (end = nb; end < size && i < nb; end++) { if (data[end] == '`') i++; else i = 0; } if (i < nb && end >= size) return 0; /* no matching delimiter */ /* Trimming outside spaces. */ f_begin = countspaces(data, nb, end, 0); f_end = end - nb; while (f_end > nb && data[f_end-1] == ' ') f_end--; /* Real code span. */ if ((n = pushnode(doc, LOWDOWN_CODESPAN)) == NULL) return -1; if (f_begin < f_end) { work.data = data + f_begin; work.size = f_end - f_begin; if (!hbuf_createb(&n->rndr_codespan.text, &work)) return -1; } popnode(doc, n); return end; } /* * '\\' backslash escape */ static ssize_t char_escape(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$"; struct lowdown_buf work; size_t w; ssize_t ret; const char *end; struct lowdown_node *n; memset(&work, 0, sizeof(struct lowdown_buf)); if (size > 1) { if (data[1] == '\\' && (doc->ext_flags & LOWDOWN_MATH) && size > 2 && (data[2] == '(' || data[2] == '[')) { end = (data[2] == '[') ? "\\\\]" : "\\\\)"; ret = parse_math(doc, data, offset, size, end, 3, data[2] == '['); if (ret != 0) return ret; } /* Swallow leading white-space of next line. */ if (LOWDOWN_COMMONMARK & doc->ext_flags && data[1] == '\n') { for (w = 2; w < size; w++) if (data[w] != ' ') break; n = pushnode(doc, LOWDOWN_LINEBREAK); if (n == NULL) return -1; popnode(doc, n); return w; } if (strchr(escape_chars, data[1]) == NULL) return 0; if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) return -1; if (!hbuf_push(&n->rndr_normal_text.text, data + 1, 1)) return -1; popnode(doc, n); } else if (size == 1) { if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) return -1; if (!hbuf_push(&n->rndr_normal_text.text, data, 1)) return -1; popnode(doc, n); } return 2; } /* * '&': parse entity, or escape if it's not an entity. * Valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */ static ssize_t char_entity(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { size_t end = 1; struct lowdown_node *n; if (end < size && data[end] == '#') end++; while (end < size && isalnum((unsigned char)data[end])) end++; if (end < size && data[end] == ';') end++; /* real entity */ else return 0; /* lone '&' */ if ((n = pushnode(doc, LOWDOWN_ENTITY)) == NULL) return -1; if (!hbuf_create(&n->rndr_entity.text, data, end)) return -1; popnode(doc, n); return end; } /* * '<': parse link when tags or autolinks are allowed. */ static ssize_t char_langle_tag(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf work; struct lowdown_buf *u_link = NULL; enum halink_type altype = HALINK_NONE; size_t end = tag_length(data, size, &altype); int ret = 0; struct lowdown_node *n; memset(&work, 0, sizeof(struct lowdown_buf)); work.data = data; work.size = end; if (end > 2) { if (altype != HALINK_NONE) { if ((u_link = hbuf_new(64)) == NULL) goto err; work.data = data + 1; work.size = end - 2; if (!unscape_text(u_link, &work)) goto err; n = pushnode(doc, LOWDOWN_LINK_AUTO); if (n == NULL) goto err; n->rndr_autolink.type = altype; if (!hbuf_createb(&n->rndr_autolink.link, u_link)) goto err; popnode(doc, n); } else { n = pushnode(doc, LOWDOWN_RAW_HTML); if (n == NULL) goto err; if (!hbuf_create (&n->rndr_raw_html.text, data, end)) goto err; popnode(doc, n); } ret = 1; } hbuf_free(u_link); return !ret ? 0 : end; err: hbuf_free(u_link); return -1; } /* * 'w': parse URL when autolinking is allowed (from "www"). */ static ssize_t char_autolink_www(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf *link = NULL, *link_url = NULL; size_t link_len, rewind; struct lowdown_node *n; ssize_t ret; if (doc->in_link_body) return 0; if ((link = hbuf_new(64)) == NULL) goto err; ret = halink_www(&rewind, link, data, offset, size); if (ret < 0) goto err; link_len = ret; if (link_len > 0) { if ((link_url = hbuf_new(64)) == NULL) goto err; if (!HBUF_PUTSL(link_url, "http://")) goto err; if (!hbuf_put(link_url, link->data, link->size)) goto err; if (doc->current && (n = TAILQ_LAST(&doc->current->children, lowdown_nodeq)) != NULL && n->type == LOWDOWN_NORMAL_TEXT) { if (n->rndr_normal_text.text.size > rewind) n->rndr_normal_text.text.size -= rewind; else n->rndr_normal_text.text.size = 0; } if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) goto err; n->rndr_autolink.type = HALINK_NORMAL; if (!hbuf_createb(&n->rndr_autolink.link, link_url)) goto err; popnode(doc, n); } hbuf_free(link); hbuf_free(link_url); return link_len; err: hbuf_free(link); hbuf_free(link_url); return -1; } /* * '@': parse email when autolinking is allowed (from the at sign). */ static ssize_t char_autolink_email(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf *link = NULL; size_t link_len, rewind; ssize_t ret; struct lowdown_node *n; if (doc->in_link_body) return 0; if ((link = hbuf_new(64)) == NULL) goto err; ret = halink_email(&rewind, link, data, offset, size); if (ret < 0) goto err; link_len = ret; if (link_len > 0) { if (doc->current && (n = TAILQ_LAST(&doc->current->children, lowdown_nodeq)) != NULL && n->type == LOWDOWN_NORMAL_TEXT) { if (n->rndr_normal_text.text.size > rewind) n->rndr_normal_text.text.size -= rewind; else n->rndr_normal_text.text.size = 0; } if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) goto err; n->rndr_autolink.type = HALINK_EMAIL; if (!hbuf_createb(&n->rndr_autolink.link, link)) goto err; popnode(doc, n); } hbuf_free(link); return link_len; err: hbuf_free(link); return -1; } /* * ':': parse URL when autolinking is allowed (from the schema). */ static ssize_t char_autolink_url(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf *link = NULL; size_t link_len, rewind; struct lowdown_node *n; ssize_t ret; if (doc->in_link_body) return 0; if ((link = hbuf_new(64)) == NULL) goto err; ret = halink_url(&rewind, link, data, offset, size); if (ret < 0) goto err; link_len = ret; if (link_len > 0) { if (doc->current && (n = TAILQ_LAST(&doc->current->children, lowdown_nodeq)) != NULL && n->type == LOWDOWN_NORMAL_TEXT) { if (n->rndr_normal_text.text.size > rewind) n->rndr_normal_text.text.size -= rewind; else n->rndr_normal_text.text.size = 0; } if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) goto err; n->rndr_autolink.type = HALINK_NORMAL; if (!hbuf_createb(&n->rndr_autolink.link, link)) goto err; popnode(doc, n); } hbuf_free(link); return link_len; err: hbuf_free(link); return -1; } /* * '!': parse an image. */ static ssize_t char_image(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { ssize_t ret; if (size < 2 || data[1] != '[') return 0; ret = char_link(doc, data + 1, offset + 1, size - 1); return ret <= 0 ? ret : ret + 1; } /* * Parse extended attributes from the buffer "data". The buffer should * not have any enclosing characters, e.g., { foo }. Return 0 on * failure or position of *next* word. */ static size_t parse_ext_attrs(const char *data, size_t size, struct lowdown_buf **attrid, struct lowdown_buf **attrcls, struct lowdown_buf **attrwidth, struct lowdown_buf **attrheight) { size_t word_b, word_e; word_b = 0; while (word_b < size) { while (word_b < size && data[word_b] == ' ') word_b++; word_e = word_b; while (word_e < size && data[word_e] != ' ') word_e++; /* Classes. */ if (attrid != NULL && word_e > word_b + 1 && data[word_b] == '#') { if (*attrid == NULL && (*attrid = hbuf_new(64)) == NULL) return 0; hbuf_truncate(*attrid); if (!hbuf_put(*attrid, data + word_b + 1, word_e - word_b - 1)) return 0; } if (attrwidth != NULL && word_e > word_b + 7 && strncasecmp(&data[word_b], "width=", 6) == 0) { if (*attrwidth == NULL && (*attrwidth = hbuf_new(64)) == NULL) return 0; hbuf_truncate(*attrwidth); if (!hbuf_put(*attrwidth, data + word_b + 6, word_e - word_b - 6)) return 0; } if (attrheight != NULL && word_e > word_b + 8 && strncasecmp(&data[word_b], "height=", 7) == 0) { if (*attrheight == NULL && (*attrheight = hbuf_new(64)) == NULL) return 0; hbuf_truncate(*attrheight); if (!hbuf_put(*attrheight, data + word_b + 7, word_e - word_b - 7)) return 0; } if (attrcls != NULL && word_e > word_b + 1 && data[word_b] == '.') { if (*attrcls != NULL && !hbuf_putc(*attrcls, ' ')) return 0; if (*attrcls == NULL && (*attrcls = hbuf_new(64)) == NULL) return 0; if (!hbuf_put(*attrcls, data + word_b + 1, word_e - word_b - 1)) return 0; } word_b = word_e + 1; } return word_b; } /* * Parse a header's extended attributes. Return FALSE on failure, TRUE * on success. */ static int parse_header_ext_attrs(struct lowdown_node *n) { struct lowdown_node *nn; struct lowdown_buf *b, *attrid = NULL, *attrcls = NULL; size_t i; int rc = 0; /* * The last node on the line must be non-empty normal text and * must end with a '}'. */ nn = TAILQ_LAST(&n->children, lowdown_nodeq); if (nn == NULL || nn->type != LOWDOWN_NORMAL_TEXT || nn->rndr_normal_text.text.size == 0 || nn->rndr_normal_text.text.data [nn->rndr_normal_text.text.size - 1] != '}') return 1; /* Scan from the trailing '}' to the opening '{'. */ b = &nn->rndr_normal_text.text; assert(b->size && b->data[b->size - 1] == '}'); for (i = b->size - 1; i > 0; i--) if (b->data[i] == '{') break; if (b->data[i] != '{') return 1; /* Parse the extended attributes. */ if (!parse_ext_attrs(&b->data[i + 1], b->size - i - 2, &attrid, &attrcls, NULL, NULL)) goto out; if (attrid != NULL && !hbuf_createb(&n->rndr_header.attr_id, attrid)) goto out; if (attrcls != NULL && !hbuf_createb(&n->rndr_header.attr_cls, attrcls)) goto out; b->size = i; while (b->size && b->data[b->size - 1] == ' ') b->size--; /* Is there nothing left? */ if (b->size == 0) { TAILQ_REMOVE(&n->children, nn, entries); lowdown_node_free(nn); } rc = 1; out: hbuf_free(attrid); hbuf_free(attrcls); return rc; } /* * '[': parsing a link, footnote, metadata, or image. */ static ssize_t char_link(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { struct lowdown_buf *content = NULL, *link = NULL, *title = NULL, *u_link = NULL, *dims = NULL, *idp = NULL, *linkp = NULL, *titlep = NULL, *attrcls = NULL, *attrid = NULL, *attrwidth = NULL, *attrheight = NULL; size_t i = 1, j, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0, nb_p, dims_b = 0, dims_e = 0; int ret = 0, in_title = 0, qtype = 0, is_img, is_footnote, is_metadata; struct lowdown_buf id; struct link_ref *lr = NULL; struct foot_ref *fr; struct lowdown_node *n; struct lowdown_meta *m; is_img = offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1); is_footnote = (doc->ext_flags & LOWDOWN_FOOTNOTES) && data[1] == '^'; is_metadata = (doc->ext_flags & LOWDOWN_METADATA) && data[1] == '%'; /* Looking for the matching closing bracket. */ i += find_emph_char(data + i, size - i, ']'); txt_e = i; if (i < size && data[i] == ']') i++; else goto cleanup; /* * If we start as an image then change into metadata or a * footnote, make sure to emit the exclamation mark. */ if (is_img && (is_footnote || is_metadata)) { n = pushnode(doc, LOWDOWN_NORMAL_TEXT); if (n == NULL) goto err; if (!hbuf_push(&n->rndr_normal_text.text, &data[-1], 1)) goto err; popnode(doc, n); } /* * Footnote (in footer): look up footnote by its key in our * queue of footnotes. This queue was created in the first pass * of the compiler. If we've already listed the footnote, don't * render it twice. Don't allow embedded footnotes as well. */ if (is_footnote) { memset(&id, 0, sizeof(struct lowdown_buf)); if (txt_e < 3) goto cleanup; id.data = data + 2; id.size = txt_e - 2; TAILQ_FOREACH(fr, &doc->footq, entries) if (hbuf_eq(&fr->name, &id)) break; /* Override. */ if (doc->in_footnote) fr = NULL; /* * Mark footnote used. If it's NULL, then there was no * footnote found. If it is NULL and the reference is * defined, then we've already registered the footnote. * XXX: Markdown, as is, can only use one footnote * reference per definition. This is stupid. */ if (fr != NULL && fr->ref == NULL) { n = pushnode(doc, LOWDOWN_FOOTNOTE); if (n == NULL) goto err; fr->num = ++doc->foots; fr->ref = n; assert(doc->in_footnote == 0); doc->in_footnote = 1; if (!parse_block(doc, fr->contents.data, fr->contents.size)) goto err; assert(doc->in_footnote); doc->in_footnote = 0; } else { n = pushnode(doc, LOWDOWN_NORMAL_TEXT); if (n == NULL) goto err; if (!hbuf_push(&n->rndr_normal_text.text, data, txt_e + 1)) goto err; } popnode(doc, n); ret = 1; goto cleanup; } /* * Metadata: simply copy the variable (if found) into our * stream. It's raw text, so we need to pass it into our * "normal text" formatter. */ if (is_metadata) { memset(&id, 0, sizeof(struct lowdown_buf)); if (txt_e < 3) goto cleanup; id.data = data + 2; id.size = txt_e - 2; /* FIXME: slow O(n). */ TAILQ_FOREACH(m, doc->metaq, entries) { if (!hbuf_streq(&id, m->key)) continue; assert(m->value != NULL); n = pushnode(doc, LOWDOWN_NORMAL_TEXT); if (n == NULL) goto err; if (!hbuf_push(&n->rndr_normal_text.text, m->value, strlen(m->value))) goto err; popnode(doc, n); break; } ret = 1; goto cleanup; } /* * Skip any amount of spacing. (This is much more laxist than * original markdown syntax.) */ while (i < size && xisspace(data[i])) i++; /* Different style of links (regular, reference, shortcut. */ if (i < size && data[i] == '(') { i++; while (i < size && xisspace(data[i])) i++; link_b = i; /* * Looking for link end: ' " ) * Count the number of open parenthesis. */ nb_p = 0; while (i < size) { if (data[i] == '\\') { i += 2; } else if (data[i] == '(' && i != 0) { nb_p++; i++; } else if (data[i] == ')') { if (nb_p == 0) break; else nb_p--; i++; } else if (i >= 1 && xisspace(data[i-1]) && (data[i] == '\'' || data[i] == '=' || data[i] == '"')) break; else i++; } if (i >= size) goto cleanup; link_e = i; /* * We might be at the end of the link, or we might be at * the title of the link. * In the latter case, progress til link-end. */ again: if (data[i] == '\'' || data[i] == '"') { /* * Looking for title end if present. * This is a quoted part after the image. */ qtype = data[i]; in_title = 1; i++; title_b = i; for ( ; i < size; i++) if (data[i] == '\\') i++; else if (data[i] == qtype) in_title = 0; else if ((data[i] == '=') && !in_title) break; else if ((data[i] == ')') && !in_title) break; if (i >= size) goto cleanup; /* Skipping spacing after title. */ title_e = i - 1; while (title_e > title_b && xisspace(data[title_e])) title_e--; /* Checking for closing quote presence. */ if (data[title_e] != '\'' && data[title_e] != '"') { title_b = title_e = 0; link_e = i; } /* * If we're followed by a dimension string, then * jump back into the parsing engine for it. */ if (data[i] == '=') goto again; } else if (data[i] == '=') { dims_b = ++i; for ( ; i < size; i++) if (data[i] == '\\') i++; else if ('\'' == data[i] || '"' == data[i]) break; else if (data[i] == ')') break; if (i >= size) goto cleanup; /* Skipping spacing after dimensions. */ dims_e = i; while (dims_e > dims_b && xisspace(data[dims_e])) dims_e--; /* * If we're followed by a title string, then * jump back into the parsing engine for it. */ if (data[i] == '"' || data[i] == '\'') goto again; } /* Remove spacing at the end of the link. */ while (link_e > link_b && xisspace(data[link_e - 1])) link_e--; /* Remove optional angle brackets around the link. */ if (data[link_b] == '<' && data[link_e - 1] == '>') { link_b++; link_e--; } /* building escaped link and title */ if (link_e > link_b) { link = linkp = hbuf_new(64); if (linkp == NULL) goto err; if (!hbuf_put(link, data + link_b, link_e - link_b)) goto err; } if (title_e > title_b) { title = titlep = hbuf_new(64); if (titlep == NULL) goto err; if (!hbuf_put(title, data + title_b, title_e - title_b)) goto err; } if (dims_e > dims_b) { if ((dims = hbuf_new(64)) == NULL) goto err; if (!hbuf_put(dims, data + dims_b, dims_e - dims_b)) goto err; } i++; } else if (i < size && data[i] == '[') { if ((idp = hbuf_new(64)) == NULL) goto err; /* Looking for the id. */ i++; link_b = i; while (i < size && data[i] != ']') i++; if (i >= size) goto cleanup; link_e = i; /* Finding the link_ref. */ if (link_b == link_e) { if (!replace_spacing (idp, data + 1, txt_e - 1)) goto err; } else if (!hbuf_put(idp, data + link_b, link_e - link_b)) goto err; lr = find_link_ref(&doc->refq, idp->data, idp->size); if (lr == NULL) goto cleanup; /* Keeping link and title from link_ref. */ link = lr->link; title = lr->title; if (lr->attrs != NULL && parse_ext_attrs (lr->attrs->data, lr->attrs->size, &attrid, &attrcls, &attrwidth, &attrheight) == 0) goto err; i++; } else { /* * Shortcut reference style link. */ if ((idp = hbuf_new(64)) == NULL) goto err; /* Crafting the id. */ if (!replace_spacing(idp, data + 1, txt_e - 1)) goto err; /* Finding the link_ref. */ lr = find_link_ref(&doc->refq, idp->data, idp->size); if (lr == NULL) goto cleanup; /* Keeping link and title from link_ref. */ link = lr->link; title = lr->title; if (lr->attrs != NULL && parse_ext_attrs (lr->attrs->data, lr->attrs->size, &attrid, &attrcls, &attrwidth, &attrheight) == 0) goto err; /* Rewinding the spacing. */ i = txt_e + 1; } /* PHP markdown extra attributes (if not ref link). */ if ((doc->ext_flags & LOWDOWN_ATTRS) && lr == NULL && i + 2 < size && data[i] == '{') { i++; /* Find trailing marker. */ for (j = i; j < size && data[j] != '}'; j++) continue; j = parse_ext_attrs(&data[i], j - i, &attrid, &attrcls, &attrwidth, &attrheight); if (j == 0) goto err; i += j; if (i < size && data[i] == '}') i++; } n = pushnode(doc, is_img ? LOWDOWN_IMAGE : LOWDOWN_LINK); if (n == NULL) goto err; /* * Building content: img alt is kept, only link content is * parsed. */ if (txt_e > 1) { if ( ! is_img) { /* * Disable autolinking when parsing inline the * content of a link. */ doc->in_link_body = 1; if (!parse_inline(doc, data + 1, txt_e - 1)) goto err; doc->in_link_body = 0; } else { if ((content = hbuf_new(64)) == NULL) goto err; if (!hbuf_put(content, data + 1, txt_e - 1)) goto err; } } if (link) { if ((u_link = hbuf_new(64)) == NULL) goto err; if (!unscape_text(u_link, link)) goto err; } /* Calling the relevant rendering function. */ if (is_img) { if (u_link != NULL && !hbuf_createb(&n->rndr_image.link, u_link)) goto err; if (title != NULL && !hbuf_createb(&n->rndr_image.title, title)) goto err; if (dims != NULL && !hbuf_createb(&n->rndr_image.dims, dims)) goto err; if (content != NULL && !hbuf_createb(&n->rndr_image.alt, content)) goto err; if (attrcls != NULL && !hbuf_createb(&n->rndr_image.attr_cls, attrcls)) goto err; if (attrid != NULL && !hbuf_createb(&n->rndr_image.attr_id, attrid)) goto err; if (attrwidth != NULL && !hbuf_createb(&n->rndr_image.attr_width, attrwidth)) goto err; if (attrheight != NULL && !hbuf_createb(&n->rndr_image.attr_height, attrheight)) goto err; ret = 1; } else { if (u_link != NULL && !hbuf_createb(&n->rndr_link.link, u_link)) goto err; if (title != NULL && !hbuf_createb(&n->rndr_link.title, title)) goto err; if (attrcls != NULL && !hbuf_createb(&n->rndr_link.attr_cls, attrcls)) goto err; if (attrid != NULL && !hbuf_createb(&n->rndr_link.attr_id, attrid)) goto err; ret = 1; } popnode(doc, n); goto cleanup; err: ret = -1; cleanup: hbuf_free(attrid); hbuf_free(attrcls); hbuf_free(attrheight); hbuf_free(attrwidth); hbuf_free(linkp); hbuf_free(titlep); hbuf_free(dims); hbuf_free(idp); hbuf_free(content); hbuf_free(u_link); return ret > 0 ? (ssize_t)i : ret; } static ssize_t char_superscript(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { size_t sup_start, sup_len; struct lowdown_node *n; if (size < 2) return 0; if (data[1] == '(') { sup_start = 2; sup_len = find_emph_char(data + 2, size - 2, ')') + 2; if (sup_len == size) return 0; } else { sup_start = sup_len = 1; while (sup_len < size && !xisspace(data[sup_len])) sup_len++; } if (sup_len - sup_start == 0) return (sup_start == 2) ? 3 : 0; if ((n = pushnode(doc, LOWDOWN_SUPERSCRIPT)) == NULL) return -1; if (!parse_inline(doc, data + sup_start, sup_len - sup_start)) return -1; popnode(doc, n); return (sup_start == 2) ? sup_len + 1 : sup_len; } static ssize_t char_math(struct lowdown_doc *doc, char *data, size_t offset, size_t size) { return size > 1 && data[1] == '$' ? parse_math(doc, data, offset, size, "$$", 2, 1) : parse_math(doc, data, offset, size, "$", 1, 0); } /* * Returns the line length when it is empty, 0 otherwise. */ static size_t is_empty(const char *data, size_t size) { size_t i; for (i = 0; i < size && data[i] != '\n'; i++) if (data[i] != ' ') return 0; return i + 1; } /* * Returns whether a line is a horizontal rule. */ static int is_hrule(const char *data, size_t size) { size_t i = 0, n = 0; char c; /* Skipping initial spaces. */ if (size < 3) return 0; i = countspaces(data, 0, size, 3); /* Looking at the hrule char. */ if (i + 2 >= size || (data[i] != '*' && data[i] != '-' && data[i] != '_')) return 0; c = data[i]; /* The whole line must be the char or space. */ while (i < size && data[i] != '\n') { if (data[i] == c) n++; else if (data[i] != ' ') return 0; i++; } return n >= 3; } /* * Check if a line is a code fence; return the end of the code fence. * If passed, width of the fence rule and character will be returned. */ static size_t is_codefence(const char *data, size_t size, size_t *width, char *chr) { size_t i = 0, n = 1; char c; /* Skipping initial spaces. */ if (size < 3) return 0; i = countspaces(data, 0, size, 3); /* Looking at the hrule char. */ c = data[i]; if (i + 2 >= size || !(c == '~' || c == '`')) return 0; /* The fence must be that same character. */ while (++i < size && data[i] == c) ++n; if (n < 3) return 0; if (width) *width = n; if (chr) *chr = c; return i; } /* * Expects single line, checks if it's a codefence and extracts * language. * Return zero if not a code-fence, >0 offset otherwise. */ static size_t parse_codefence(char *data, size_t size, struct lowdown_buf *lang, size_t *width, char *chr) { size_t i, w, lang_start; i = w = is_codefence(data, size, width, chr); if (i == 0) return 0; while (i < size && xisspace(data[i])) i++; lang_start = i; while (i < size && !xisspace(data[i])) i++; lang->data = data + lang_start; lang->size = i - lang_start; /* Avoid parsing a codespan as a fence */ i = lang_start + 2; while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++; return i < size ? 0 : w; } /* * Returns whether the line is a hash-prefixed header. * Return zero if not an at-header, non-zero otherwise. */ static int is_atxheader(const struct lowdown_doc *doc, const char *data, size_t size) { size_t level; if (data[0] != '#') return 0; /* * CommonMark requires a space. * Classical Markdown does not. */ if (doc->ext_flags & LOWDOWN_COMMONMARK) { level = 0; while (level < size && level < 6 && data[level] == '#') level++; if (level < size && data[level] != ' ') return 0; } return 1; } /* * Tests for level 1 setext-style header ("=") or level 2 ("-"). * Returns zero if it's not, non-zero otherwise. */ static int is_headerline(const char *data, size_t size) { size_t i; char hchr; int level; if ('=' == *data || '-' == *data) { level = '=' == *data ? 1 : 2; hchr = *data; } else return 0; for (i = 1; i < size && data[i] == hchr; i++) continue; i = countspaces(data, i, size, 0); return (i >= size || data[i] == '\n') ? level : 0; } static int is_next_headerline(const char *data, size_t size) { size_t i = 0; while (i < size && data[i] != '\n') i++; if (++i >= size) return 0; return is_headerline(data + i, size - i); } /* * Returns unordered list item prefix. * This does nothing if LOWDOWN_DEFLIST is not set. */ static size_t prefix_dli(const struct lowdown_doc *doc, const char *data, size_t size) { size_t i; if (!(doc->ext_flags & LOWDOWN_DEFLIST)) return 0; i = countspaces(data, 0, size, 3); if (i + 1 >= size || data[i] != ':' || data[i + 1] != ' ') return 0; if (is_next_headerline(data + i, size - i)) return 0; return i + 2; } /* * Returns blockquote prefix length. */ static size_t prefix_quote(const char *data, size_t size) { size_t i; i = countspaces(data, 0, size, 3); if (i < size && data[i] == '>') return countspaces(data, i + 1, size, 1); return 0; } /* * Returns prefix length for block code. */ static size_t prefix_code(const char *data, size_t size) { if (countspaces(data, 0, size, 4) == 4) return 4; return 0; } /* * Returns ordered list item prefix. * On success (return value >0) and if "value" is not NULL *and* we're * also commonmark processing, copy and NUL-terminate the value into it. * If all of those except for commonmark, simply NUL-terminate the * string. */ static size_t prefix_oli(const struct lowdown_doc *doc, const char *data, size_t size, char *value) { size_t i, st, vsize; const char *vdata; i = countspaces(data, 0, size, 3); if (i >= size || !isdigit((unsigned char)data[i])) return 0; st = i; vdata = &data[i]; while (i < size && isdigit((unsigned char)data[i])) i++; /* Commonmark limits us to nine characters. */ vsize = i - st; if ((doc->ext_flags & LOWDOWN_COMMONMARK) && vsize > 9) return 0; /* * Commonmark accepts ')' and '.' following the numeric prefix, * while regular markdown only has '.'. */ if (doc->ext_flags & LOWDOWN_COMMONMARK) { if (i + 1 >= size || (data[i] != '.' && data[i] != ')') || data[i + 1] != ' ') return 0; } else if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') return 0; if (is_next_headerline(data + i, size - i)) return 0; if (value != NULL) { if (doc->ext_flags & LOWDOWN_COMMONMARK) { assert(vsize > 0); assert(vsize < 10); memcpy(value, vdata, vsize); value[vsize] = '\0'; } else value[0] = '\0'; } return i + 2; } /* * Returns unordered list item prefix, including a GFM checkbox. The * "checked" pointer, if not NULL, is set to whether the check is set * (>0), unset (=0), or not there (<0). */ static size_t prefix_uli(const struct lowdown_doc *doc, const char *data, size_t size, int *checked) { size_t i; if (checked != NULL) *checked = -1; i = countspaces(data, 0, size, 3); if (i + 1 >= size || (data[i] != '*' && data[i] != '+' && data[i] != '-') || data[i + 1] != ' ') return 0; if (is_next_headerline(data + i, size - i)) return 0; if (!(doc->ext_flags & LOWDOWN_TASKLIST) || i + 5 >= size) return i + 2; if (data[i + 2] == '[' && (data[i + 3] == ' ' || data[i + 3] == 'x' || data[i + 3] == 'X') && data[i + 4] == ']' && data[i + 5] == ' ') { if (checked != NULL) *checked = data[i + 3] != ' '; return i + 6; } return i + 2; } /* * Handles parsing of a blockquote fragment. * Return <0 on failure, otherwise the end offset. */ static ssize_t parse_blockquote(struct lowdown_doc *doc, char *data, size_t size) { size_t beg = 0, end = 0, pre, work_size = 0; char *work_data = NULL; struct lowdown_node *n; while (beg < size) { for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) continue; pre = prefix_quote(data + beg, end - beg); /* Skip prefix or empty line followed by non-quote. */ if (pre) beg += pre; else if (is_empty(data + beg, end - beg) && (end >= size || (prefix_quote(data + end, size - end) == 0 && !is_empty(data + end, size - end)))) break; if (beg < end) { if (!work_data) work_data = data + beg; else if (data + beg != work_data + work_size) memmove(work_data + work_size, data + beg, end - beg); work_size += end - beg; } beg = end; } n = pushnode(doc, LOWDOWN_BLOCKQUOTE); if (n == NULL) return -1; if (!parse_block(doc, work_data, work_size)) return -1; popnode(doc, n); return end; } /* * Handles parsing of a regular paragraph, which terminates at sections * or blank lines. * Returns <0 on failure or the number of characters parsed from the * paragraph input. */ static ssize_t parse_paragraph(struct lowdown_doc *doc, char *data, size_t size) { struct lowdown_buf work; struct lowdown_node *n; size_t i = 0, end = 0, beg, lines = 0; int level = 0, beoln = 0; memset(&work, 0, sizeof(struct lowdown_buf)); work.data = data; while (i < size) { /* Parse ahead to the next newline. */ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) continue; /* * Empty line: end of paragraph. * However, check if we have a dli prefix following * that, which means that we're a block-mode dli. */ if (is_empty(data + i, size - i)) { beoln = 1; break; } /* Header line: end of paragraph. */ if ((level = is_headerline(data + i, size - i)) != 0) break; /* Other ways of ending a paragraph. */ if (is_atxheader(doc, data + i, size - i) || is_hrule(data + i, size - i) || (lines == 1 && prefix_dli(doc, data + i, size - i)) || prefix_quote(data + i, size - i)) { end = i; break; } lines++; i = end; } work.size = i; while (work.size && data[work.size - 1] == '\n') work.size--; /* * The paragraph isn't ending on a header line. * So it's a regular paragraph. */ if (!level) { n = pushnode(doc, LOWDOWN_PARAGRAPH); if (n == NULL) return -1; n->rndr_paragraph.lines = lines; n->rndr_paragraph.beoln = beoln; if (!parse_inline(doc, work.data, work.size)) return -1; popnode(doc, n); return end; } /* Paragraph material prior to header break. */ if (work.size) { i = work.size; work.size -= 1; while (work.size && data[work.size] != '\n') work.size -= 1; beg = work.size + 1; while (work.size && data[work.size - 1] == '\n') work.size -= 1; if (work.size > 0) { n = pushnode(doc, LOWDOWN_PARAGRAPH); if (n == NULL) return -1; n->rndr_paragraph.lines = lines - 1; n->rndr_paragraph.beoln = beoln; if (!parse_inline(doc, work.data, work.size)) return -1; popnode(doc, n); work.data += beg; work.size = i - beg; } else work.size = i; } /* Definition data parts. */ if ((n = pushnode(doc, LOWDOWN_HEADER)) == NULL) return -1; assert(level > 0); n->rndr_header.level = level - 1; if (!parse_inline(doc, work.data, work.size)) return -1; popnode(doc, n); if ((doc->ext_flags & LOWDOWN_ATTRS) && !parse_header_ext_attrs(n)) return -1; return end; } /* * Handles parsing of a block-level code fragment. * Return <0 on failure, 0 if not a fragment, >0 on success. */ static ssize_t parse_fencedcode(struct lowdown_doc *doc, char *data, size_t size) { struct lowdown_buf text, lang; size_t i = 0, text_start, line_start, w, w2, width, width2; char chr, chr2; struct lowdown_node *n; memset(&text, 0, sizeof(struct lowdown_buf)); memset(&lang, 0, sizeof(struct lowdown_buf)); /* Parse codefence line. */ while (i < size && data[i] != '\n') i++; if ((w = parse_codefence(data, i, &lang, &width, &chr)) == 0) return 0; /* Search for end. */ i++; text_start = i; while ((line_start = i) < size) { while (i < size && data[i] != '\n') i++; w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2); if (w == w2 && width == width2 && chr == chr2 && is_empty(data + (line_start+w), i - (line_start+w))) break; i++; } text.data = data + text_start; text.size = line_start - text_start; if ((n = pushnode(doc, LOWDOWN_BLOCKCODE)) == NULL) return -1; if (!hbuf_create(&n->rndr_blockcode.text, data + text_start, line_start - text_start)) return -1; if (!hbuf_createb(&n->rndr_blockcode.lang, &lang)) return -1; popnode(doc, n); return i; } static ssize_t parse_blockcode(struct lowdown_doc *doc, char *data, size_t size) { size_t beg = 0, end, pre; struct lowdown_buf *work = NULL; struct lowdown_node *n; if ((work = hbuf_new(256)) == NULL) goto err; while (beg < size) { for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) continue; pre = prefix_code(data + beg, end - beg); /* * Skip prefix or non-empty non-prefixed line breaking * the pre. */ if (pre) beg += pre; else if (!is_empty(data + beg, end - beg)) break; /* * Verbatim copy to the working buffer, escaping * entities. */ if (beg < end) { if (is_empty(data + beg, end - beg)) { if (!hbuf_putc(work, '\n')) goto err; } else { if (!hbuf_put(work, data + beg, end - beg)) goto err; } } beg = end; } while (work->size && work->data[work->size - 1] == '\n') work->size -= 1; if (!hbuf_putc(work, '\n')) goto err; if ((n = pushnode(doc, LOWDOWN_BLOCKCODE)) == NULL) goto err; if (!hbuf_createb(&n->rndr_blockcode.text, work)) goto err; popnode(doc, n); hbuf_free(work); return beg; err: hbuf_free(work); return -1; } /* * Parsing of a single list item assuming initial prefix is already * removed. */ static ssize_t parse_listitem(struct lowdown_buf *ob, struct lowdown_doc *doc, char *data, size_t size, enum hlist_fl *flags, size_t num) { struct lowdown_buf *work = NULL; size_t beg = 0, end, pre, sublist = 0, orgpre, i, has_next_uli = 0, dli_lines, has_next_oli = 0, has_next_dli = 0; int in_empty = 0, has_inside_empty = 0, in_fence = 0, ff, checked = -1; struct lowdown_node *n; /* Keeping track of the first indentation prefix. */ orgpre = countspaces(data, 0, size, 3); beg = prefix_uli(doc, data, size, &checked); if (!beg) beg = prefix_oli(doc, data, size, NULL); if (!beg) beg = prefix_dli(doc, data, size); if (!beg) return 0; /* Skipping to the beginning of the following line. */ end = beg; while (end < size && data[end - 1] != '\n') end++; /* Getting working buffers. */ if ((work = hbuf_new(64)) == NULL) goto err; /* Putting the first line into the working buffer. */ if (!hbuf_put(work, data + beg, end - beg)) goto err; beg = end; dli_lines = 1; /* * Process the following lines. * Use the "dli_lines" variable to see if we should consider an * opening dli prefix to be a valid dli token. */ while (beg < size) { has_next_uli = has_next_oli = has_next_dli = 0; end++; while (end < size && data[end - 1] != '\n') end++; /* Process an empty line. */ if (is_empty(data + beg, end - beg)) { in_empty = 1; beg = end; dli_lines = 0; continue; } dli_lines++; /* Calculating the indentation. */ pre = i = countspaces(data, beg, end, 4) - beg; if (doc->ext_flags & LOWDOWN_FENCED) if (is_codefence(data + beg + i, end - beg - i, NULL, NULL)) in_fence = !in_fence; /* * Only check for new list items if we are **not** * inside a fenced code block. * We only allow dli if we've had a single line of * content beforehand. */ if (!in_fence) { has_next_uli = prefix_uli(doc, data + beg + i, end - beg - i, NULL); has_next_dli = dli_lines <= 2 && prefix_dli (doc, data + beg + i, end - beg - i); has_next_oli = prefix_oli (doc, data + beg + i, end - beg - i, NULL); if (has_next_uli || has_next_dli || has_next_oli) dli_lines = 0; } /* Checking for a new item. */ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli || has_next_dli) { if (in_empty) has_inside_empty = 1; /* * The following item must have the same (or * less) indentation. */ if (pre <= orgpre) { /* * If the following item has different * list type, we end this list. */ ff = *flags & HLIST_FL_MASK; assert(ff == HLIST_FL_ORDERED || ff == HLIST_FL_UNORDERED || ff == HLIST_FL_DEF); if (in_empty && (((ff == HLIST_FL_ORDERED) && (has_next_uli || has_next_dli)) || ((ff == HLIST_FL_UNORDERED) && (has_next_oli || has_next_dli)) || ((ff == HLIST_FL_DEF) && (has_next_oli || has_next_uli)))) { *flags |= HLIST_LI_END; } break; } if (!sublist) sublist = work->size; } else if (in_empty && pre == 0) { /* * Joining only indented stuff after empty * lines; note that now we only require 1 space * of indentation to continue a list. */ *flags |= HLIST_LI_END; break; } if (in_empty) { if (!hbuf_putc(work, '\n')) goto err; has_inside_empty = 1; in_empty = 0; } /* * Adding the line without prefix into the working * buffer. */ if (!hbuf_put(work, data + beg + i, end - beg - i)) goto err; beg = end; } /* Render of li contents. */ if (has_inside_empty) *flags |= HLIST_FL_BLOCK; if ((n = pushnode(doc, LOWDOWN_LISTITEM)) == NULL) goto err; n->rndr_listitem.flags = *flags; n->rndr_listitem.num = num; if (checked > 0) n->rndr_listitem.flags |= HLIST_FL_CHECKED; else if (checked == 0) n->rndr_listitem.flags |= HLIST_FL_UNCHECKED; if (*flags & HLIST_FL_BLOCK) { /* Intermediate render of block li. */ if (sublist && sublist < work->size) { if (!parse_block(doc, work->data, sublist)) goto err; if (!parse_block(doc, work->data + sublist, work->size - sublist)) goto err; } else { if (!parse_block(doc, work->data, work->size)) goto err; } } else { /* Intermediate render of inline li. */ if (sublist && sublist < work->size) { if (!parse_inline(doc, work->data, sublist)) goto err; if (!parse_block(doc, work->data + sublist, work->size - sublist)) goto err; } else { if (!parse_inline(doc, work->data, work->size)) goto err; } } popnode(doc, n); hbuf_free(work); return beg; err: hbuf_free(work); return -1; } /* * Parse definition list. * This must follow a single-line paragraph, which it integrates as the * title of the list. * (The paragraph can contain arbitrary styling.) */ static ssize_t parse_definition(struct lowdown_doc *doc, char *data, size_t size) { struct lowdown_buf *work = NULL; size_t i = 0, k = 1; ssize_t ret; enum hlist_fl flags = HLIST_FL_DEF; struct lowdown_node *n, *nn, *cur, *prev; if ((work = hbuf_new(256)) == NULL) goto err; /* Record whether we want to start in block mode. */ cur = TAILQ_LAST(&doc->current->children, lowdown_nodeq); if (cur->rndr_paragraph.beoln) flags |= HLIST_FL_BLOCK; /* Do we need to merge into a previous definition list? */ prev = TAILQ_PREV(cur, lowdown_nodeq, entries); if (prev != NULL && prev->type == LOWDOWN_DEFINITION) { n = doc->current = prev; flags |= n->rndr_definition.flags; doc->depth++; } else { n = pushnode(doc, LOWDOWN_DEFINITION); if (n == NULL) goto err; n->rndr_definition.flags = flags; } TAILQ_REMOVE(&cur->parent->children, cur, entries); TAILQ_INSERT_TAIL(&n->children, cur, entries); cur->type = LOWDOWN_DEFINITION_TITLE; cur->parent = n; while (i < size) { nn = pushnode(doc, LOWDOWN_DEFINITION_DATA); if (nn == NULL) goto err; ret = parse_listitem(work, doc, data + i, size - i, &flags, k++); if (ret < 0) goto err; i += ret; popnode(doc, nn); if (ret == 0 || (flags & HLIST_LI_END)) break; } if (flags & HLIST_FL_BLOCK) n->rndr_definition.flags |= HLIST_FL_BLOCK; popnode(doc, n); hbuf_free(work); return i; err: hbuf_free(work); return -1; } /* * Parsing ordered or unordered list block. * If "oli_data" is not NULL, it's the numeric string prefix of the * ordered entry. It's either zero-length or well-formed. */ static ssize_t parse_list(struct lowdown_doc *doc, char *data, size_t size, const char *oli_data) { struct lowdown_buf *work = NULL; size_t i = 0, pos; ssize_t ret; enum hlist_fl flags; struct lowdown_node *n; flags = oli_data != NULL ? HLIST_FL_ORDERED : HLIST_FL_UNORDERED; if ((work = hbuf_new(256)) == NULL) goto err; if ((n = pushnode(doc, LOWDOWN_LIST)) == NULL) goto err; n->rndr_list.start = 1; n->rndr_list.flags = flags; if (oli_data != NULL && oli_data[0] != '\0') { n->rndr_list.start = strtonum (oli_data, 0, UINT32_MAX, NULL); if (n->rndr_list.start == 0) n->rndr_list.start = 1; } pos = n->rndr_list.start; while (i < size) { ret = parse_listitem(work, doc, data + i, size - i, &flags, pos++); if (ret < 0) goto err; i += ret; if (ret == 0 || (flags & HLIST_LI_END)) break; } if (flags & HLIST_FL_BLOCK) n->rndr_list.flags |= HLIST_FL_BLOCK; popnode(doc, n); hbuf_free(work); return i; err: hbuf_free(work); return -1; } /* * Parsing of atx-style headers. */ static ssize_t parse_atxheader(struct lowdown_doc *doc, char *data, size_t size) { size_t level = 0, i, end, skip; struct lowdown_node *n; while (level < size && level < 6 && data[level] == '#') level++; i = countspaces(data, level, size, 0); for (end = i; end < size && data[end] != '\n'; end++) continue; skip = end; while (end && data[end - 1] == '#') end--; while (end && data[end - 1] == ' ') end--; if (end > i) { if ((n = pushnode(doc, LOWDOWN_HEADER)) == NULL) return -1; assert(level > 0); n->rndr_header.level = level - 1; if (!parse_inline(doc, data + i, end - i)) return -1; popnode(doc, n); if ((doc->ext_flags & LOWDOWN_ATTRS) && !parse_header_ext_attrs(n)) return -1; } return skip; } /* * Check for end of HTML block : </tag>( *)\n * Returns tag length on match, 0 otherwise. * Assumes data starts with "<". */ static size_t htmlblock_is_end(const char *tag, size_t tag_len, struct lowdown_doc *doc, const char *data, size_t size) { size_t i = tag_len + 3, w; /* * Try to match the end tag * Note: we're not considering tags like "</tag >" which are * still valid. */ if (i > size || data[1] != '/' || strncasecmp(data + 2, tag, tag_len) != 0 || data[tag_len + 2] != '>') return 0; /* Rest of the line must be empty. */ if ((w = is_empty(data + i, size - i)) == 0 && i < size) return 0; return i + w; } /* * Try to find HTML block ending tag. * Returns the length on match, 0 otherwise. */ static size_t htmlblock_find_end(const char *tag, size_t tag_len, struct lowdown_doc *doc, const char *data, size_t size) { size_t i, w = 0; for (i = 0; ; i++) { while (i < size && data[i] != '<') i++; if (i >= size) return 0; w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i); if (w) break; } return i + w; } /* * Try to find end of HTML block in strict mode (it must be an * unindented line, and have a blank line afterwards). * Returns the length on match, 0 otherwise. */ static size_t htmlblock_find_end_strict(const char *tag, size_t tag_len, struct lowdown_doc *doc, const char *data, size_t size) { size_t i = 0, mark; while (1) { mark = i; while (i < size && data[i] != '\n') i++; if (i < size) i++; if (i == mark) return 0; if (data[mark] == ' ' && mark > 0) continue; mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark); if (mark == i && (is_empty(data + i, size - i) || i >= size)) break; } return i; } /* * Canonicalise a sequence of length "len" bytes in "str". * This returns NULL if the sequence is not recognised, or a * nil-terminated string of the sequence otherwise. */ static const char * hhtml_find_block(const char *str, size_t len) { size_t i; static const char *tags[] = { "address", "article", "aside", "blockquote", "del", "details", "dialog", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "iframe", "ins", "li", "main", "math", "nav", "noscript", "ol", "p", "pre", "section", "script", "style", "table", "ul", NULL, }; for (i = 0; tags[i] != NULL; i++) if (strncasecmp(tags[i], str, len) == 0) return tags[i]; return NULL; } /* * Parsing of inline HTML block. * Return <0 on failure, >0 on success, 0 if not a block. */ static ssize_t parse_htmlblock(struct lowdown_doc *doc, char *data, size_t size) { struct lowdown_buf work; size_t i, j = 0, tag_len, tag_end; const char *curtag = NULL; struct lowdown_node *n; memset(&work, 0, sizeof(struct lowdown_buf)); work.data = data; /* Identification of the opening tag. */ if (size < 2 || data[0] != '<') return 0; i = 1; while (i < size && data[i] != '>' && data[i] != ' ') i++; if (i < size) curtag = hhtml_find_block(data + 1, i - 1); /* Handling of special cases. */ if (!curtag) { /* HTML comment, laxist form. */ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { i = 5; while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) i++; i++; if (i < size) j = is_empty(data + i, size - i); if (j) { n = pushnode(doc, LOWDOWN_BLOCKHTML); if (n == NULL) return -1; work.size = i + j; if (!hbuf_createb (&n->rndr_blockhtml.text, &work)) return -1; popnode(doc, n); return work.size; } } /* * HR, which is the only self-closing block tag * considered. * FIXME: we should also do <br />. */ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { i = 3; while (i < size && data[i] != '>') i++; if (i + 1 < size) { i++; j = is_empty(data + i, size - i); if (j) { n = pushnode(doc, LOWDOWN_BLOCKHTML); if (n == NULL) return -1; work.size = i + j; if (!hbuf_createb (&n->rndr_blockhtml.text, &work)) return -1; popnode(doc, n); return work.size; } } } /* No special case recognised. */ return 0; } /* Looking for a matching closing tag in strict mode. */ tag_len = strlen(curtag); tag_end = htmlblock_find_end_strict (curtag, tag_len, doc, data, size); /* * If not found, trying a second pass looking for indented match * but not if tag is "ins" or "del" (following original * Markdown.pl). */ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size); if (!tag_end) return 0; /* The end of the block has been found. */ n = pushnode(doc, LOWDOWN_BLOCKHTML); if (n == NULL) return -1; work.size = tag_end; if (!hbuf_createb(&n->rndr_blockhtml.text, &work)) return -1; popnode(doc, n); return tag_end; } /* * Parse a table row. * Return zero on failure, non-zero on success. */ static int parse_table_row(struct lowdown_buf *ob, struct lowdown_doc *doc, char *data, size_t size, size_t columns, const enum htbl_flags *col_data, enum htbl_flags header_flag) { size_t i = 0, col, len, cell_start, cell_end; struct lowdown_buf empty_cell; struct lowdown_node *n, *nn; if (i < size && data[i] == '|') i++; if ((n = pushnode(doc, LOWDOWN_TABLE_ROW)) == NULL) return 0; for (col = 0; col < columns && i < size; ++col) { while (i < size && xisspace(data[i])) i++; cell_start = i; len = find_emph_char(data + i, size - i, '|'); /* * Two possibilities for len == 0: * (1) No more pipe char found in the current line. * (2) The next pipe is right after the current one, * i.e. empty cell. * For case 1, we skip to the end of line; for case 2 we * just continue. */ if (len == 0 && i < size && data[i] != '|') len = size - i; i += len; cell_end = i - 1; while (cell_end > cell_start && xisspace(data[cell_end])) cell_end--; nn = pushnode(doc, LOWDOWN_TABLE_CELL); if (nn == NULL) return 0; nn->rndr_table_cell.flags = col_data[col] | header_flag; nn->rndr_table_cell.col = col; nn->rndr_table_cell.columns = columns; if (!parse_inline(doc, data + cell_start, 1 + cell_end - cell_start)) return 0; popnode(doc, nn); i++; } for ( ; col < columns; ++col) { memset(&empty_cell, 0, sizeof(struct lowdown_buf)); nn = pushnode(doc, LOWDOWN_TABLE_CELL); if (nn == NULL) return 0; nn->rndr_table_cell.flags = col_data[col] | header_flag; nn->rndr_table_cell.col = col; nn->rndr_table_cell.columns = columns; popnode(doc, nn); } popnode(doc, n); return 1; } /* * Parse the initial line of a table. * Return <0 on failure, 0 if not a table row, >0 for the offset. */ static ssize_t parse_table_header(struct lowdown_node **np, struct lowdown_buf *ob, struct lowdown_doc *doc, char *data, size_t size, size_t *columns, enum htbl_flags **column_data) { size_t i = 0, col, header_end, under_end, dashes; ssize_t pipes = 0; struct lowdown_node *n; while (i < size && data[i] != '\n') if (data[i++] == '|') pipes++; if (i == size || pipes == 0) return 0; header_end = i; while (header_end > 0 && xisspace(data[header_end - 1])) header_end--; if (data[0] == '|') pipes--; if (header_end && data[header_end - 1] == '|') pipes--; if (pipes < 0) return 0; *columns = pipes + 1; *column_data = calloc(*columns, sizeof(enum htbl_flags)); if (*column_data == NULL) return -1; /* Parse the header underline */ i++; if (i < size && data[i] == '|') i++; under_end = i; while (under_end < size && data[under_end] != '\n') under_end++; for (col = 0; col < *columns && i < under_end; ++col) { dashes = 0; i = countspaces(data, i, under_end, 0); if (data[i] == ':') { i++; (*column_data)[col] |= HTBL_FL_ALIGN_LEFT; dashes++; } while (i < under_end && data[i] == '-') { i++; dashes++; } if (i < under_end && data[i] == ':') { i++; (*column_data)[col] |= HTBL_FL_ALIGN_RIGHT; dashes++; } i = countspaces(data, i, under_end, 0); if (i < under_end && data[i] != '|' && data[i] != '+') break; if (dashes < 3) break; i++; } if (col < *columns) return 0; /* (This calls pushnode for the table row.) */ *np = pushnode(doc, LOWDOWN_TABLE_BLOCK); if (*np == NULL) return -1; (*np)->rndr_table.columns = *columns; n = pushnode(doc, LOWDOWN_TABLE_HEADER); if (n == NULL) return -1; n->rndr_table_header.flags = calloc (*columns, sizeof(enum htbl_flags)); if (n->rndr_table_header.flags == NULL) return -1; for (i = 0; i < *columns; i++) n->rndr_table_header.flags[i] = (*column_data)[i]; n->rndr_table_header.columns = *columns; if (!parse_table_row(ob, doc, data, header_end, *columns, *column_data, HTBL_FL_HEADER)) return -1; popnode(doc, n); return under_end + 1; } /* * Parse a table block. * Return <0 on failure, zero if not a table, >0 offset otherwise. */ static ssize_t parse_table(struct lowdown_doc *doc, char *data, size_t size) { size_t i, columns, row_start, pipes; ssize_t ret; struct lowdown_buf *header_work = NULL, *body_work = NULL; enum htbl_flags *col_data = NULL; struct lowdown_node *n = NULL, *nn; if ((header_work = hbuf_new(64)) == NULL || (body_work = hbuf_new(256)) == NULL) goto err; ret = parse_table_header(&n, header_work, doc, data, size, &columns, &col_data); if (ret < 0) goto err; if ((i = ret) > 0) { nn = pushnode(doc, LOWDOWN_TABLE_BODY); if (nn == NULL) goto err; while (i < size) { pipes = 0; row_start = i; while (i < size && data[i] != '\n') if (data[i++] == '|') pipes++; if (pipes == 0 || i == size) { i = row_start; break; } if (!parse_table_row(body_work, doc, data + row_start, i - row_start, columns, col_data, 0)) goto err; i++; } popnode(doc, nn); popnode(doc, n); } free(col_data); hbuf_free(header_work); hbuf_free(body_work); return i; err: free(col_data); hbuf_free(header_work); hbuf_free(body_work); return -1; } /* * Parsing of one block, returning next char to parse. * We can assume, entering the block, that our output is newline * aligned. * Return zero on failure, non-zero on success. */ static int parse_block(struct lowdown_doc *doc, char *data, size_t size) { size_t beg = 0, end, i; char *txt_data; char oli_data[10]; struct lowdown_node *n; ssize_t rc; /* * What kind of block are we? * Go through all types of blocks, one by one. */ while (beg < size) { txt_data = data + beg; end = size - beg; /* We are at a #header. */ if (is_atxheader(doc, txt_data, end)) { rc = parse_atxheader(doc, txt_data, end); if (rc < 0) return 0; assert(rc > 0); beg += rc; continue; } /* We have some <HTML>. */ if (data[beg] == '<') { rc = parse_htmlblock(doc, txt_data, end); if (rc > 0) { beg += rc; continue; } else if (rc < 0) return 0; } /* Empty line. */ if ((i = is_empty(txt_data, end)) != 0) { beg += i; continue; } /* Horizontal rule. */ if (is_hrule(txt_data, end)) { n = pushnode(doc, LOWDOWN_HRULE); if (n == NULL) return 0; while (beg < size && data[beg] != '\n') beg++; beg++; popnode(doc, n); continue; } /* Fenced code. */ if (doc->ext_flags & LOWDOWN_FENCED) { rc = parse_fencedcode(doc, txt_data, end); if (rc > 0) { beg += rc; continue; } else if (rc < 0) return 0; } /* Table parsing. */ if (doc->ext_flags & LOWDOWN_TABLES) { rc = parse_table(doc, txt_data, end); if (rc > 0) { beg += rc; continue; } else if (rc < 0) return 0; } /* We're a > block quote. */ if (prefix_quote(txt_data, end)) { rc = parse_blockquote(doc, txt_data, end); if (rc < 0) return 0; beg += rc; continue; } /* Prefixed code (like block-quotes). */ if (!(doc->ext_flags & LOWDOWN_NOCODEIND) && prefix_code(txt_data, end)) { rc = parse_blockcode(doc, txt_data, end); if (rc < 0) return 0; beg += rc; continue; } /* Some sort of unordered list. */ if (prefix_uli(doc, txt_data, end, NULL)) { rc = parse_list(doc, txt_data, end, NULL); if (rc < 0) return 0; beg += rc; continue; } /* * A definition list. * Only use this is preceded by a one-line paragraph. */ if (doc->current != NULL && prefix_dli(doc, txt_data, end)) { n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); if (n != NULL && n->type == LOWDOWN_PARAGRAPH && n->rndr_paragraph.lines == 1) { rc = parse_definition(doc, txt_data, end); if (rc < 0) return 0; beg += rc; continue; } } /* An ordered list. */ if (prefix_oli(doc, txt_data, end, oli_data)) { rc = parse_list(doc, txt_data, end, oli_data); if (rc < 0) return 0; beg += rc; continue; } /* No match: just a regular paragraph. */ if ((rc = parse_paragraph(doc, txt_data, end)) < 0) return 0; beg += rc; } return 1; } /* * Returns >0 if a line is a footnote definition, 0 if not, <0 on * failure. This gathers any footnote content into the footq footnote * queue. */ static int is_footnote(struct lowdown_doc *doc, const char *data, size_t beg, size_t end, size_t *last) { size_t i = 0, ind = 0, start = 0, id_offs, id_end; struct lowdown_buf *contents = NULL; int in_empty = 0; struct foot_ref *ref = NULL; /* up to 3 optional leading spaces */ if (beg + 3 >= end) return 0; i = countspaces(data, beg, end, 3); /* id part: caret followed by anything between brackets */ if (data[i] != '[') return 0; i++; if (i >= end || data[i] != '^') return 0; i++; id_offs = i; while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') i++; if (i >= end || data[i] != ']') return 0; id_end = i; /* spacer: colon (space | tab)* newline? (space | tab)* */ i++; if (i >= end || data[i] != ':') return 0; i++; /* getting content buffer */ if ((contents = hbuf_new(64)) == NULL) return -1; start = i; /* process lines similar to a list item */ while (i < end) { while (i < end && data[i] != '\n' && data[i] != '\r') i++; /* process an empty line */ if (is_empty(data + start, i - start)) { in_empty = 1; if (i < end && (data[i] == '\n' || data[i] == '\r')) { i++; if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; } start = i; continue; } /* calculating the indentation */ ind = countspaces(data, start, end, 4) - start; /* joining only indented stuff after empty lines; * note that now we only require 1 space of indentation * to continue, just like lists */ if (ind == 0) { if (start == id_end + 2 && data[start] == '\t') { /* XXX: wtf? */ } else break; } else if (in_empty) if (!hbuf_putc(contents, '\n')) goto err; in_empty = 0; /* adding the line into the content buffer */ if (!hbuf_put(contents, data + start + ind, i - start - ind)) goto err; /* add carriage return */ if (i < end) { if (!hbuf_putc(contents, '\n')) goto err; if (i < end && (data[i] == '\n' || data[i] == '\r')) { i++; if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; } } start = i; } if (last) *last = start; if ((ref = calloc(1, sizeof(struct foot_ref))) == NULL) goto err; TAILQ_INSERT_TAIL(&doc->footq, ref, entries); if (!hbuf_createb(&ref->contents, contents)) return -1; if (!hbuf_create(&ref->name, data + id_offs, id_end - id_offs)) return -1; hbuf_free(contents); return 1; err: hbuf_free(contents); return -1; } /* * Returns >0 if the line is a reference, 0 if not, <0 on failure. */ static int is_ref(struct lowdown_doc *doc, const char *data, size_t beg, size_t end, size_t *last) { size_t i, id_offset, id_end, link_offset, link_end, title_offset = 0, title_end = 0, line_end, garbage, attr_offset = 0, attr_end = 0; struct link_ref *ref; /* Up to 3 optional leading spaces. */ if (beg + 3 >= end) return 0; i = countspaces(data, beg, end, 3); /* Id part: anything but a newline between brackets. */ if (data[i] != '[') return 0; i++; id_offset = i; while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') i++; if (i >= end || data[i] != ']') return 0; id_end = i; /* Spacer: colon (space | tab)* newline? (space | tab)* */ i++; if (i >= end || data[i] != ':') return 0; i++; i = countspaces(data, i, end, 0); if (i < end && (data[i] == '\n' || data[i] == '\r')) { i++; if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } i = countspaces(data, i, end, 0); if (i >= end) return 0; /* * Link: spacing-free sequence, optionally between angle * brackets. */ if (data[i] == '<') i++; link_offset = i; while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') i++; if (data[i - 1] == '>') link_end = i - 1; else link_end = i; /* * Space: (space | tab)* (newline | '\'' | '"' | '(' ) * Optionally '{' for attributes. */ i = countspaces(data, i, end, 0); if (doc->ext_flags & LOWDOWN_ATTRS) { if (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' && data[i] != '{') return 0; } else { if (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(') return 0; } line_end = 0; /* computing end-of-line */ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') line_end = i + 1; /* optional (space|tab)* spacer after a newline */ if (line_end) i = countspaces(data, line_end + 1, end, 0); /* * Optional title: any non-newline sequence enclosed in '"() * alone on its line. This is... confusing, because we can have * any number of embedded delimiters in the text and only the * last one is valid. * * [link1]: moo.com "hello "world" (hi) there * ^last * * The rule is that there must be only spaces between the last * delimiter, whatever it is, and the newline OR opening curly * brace (if parsing), which signifies extended attributes. */ if (i + 1 < end && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { title_offset = ++i; for (garbage = 0; i < end; i++) { if (data[i] == '\'' || data[i] == '"' || data[i] == ')') { title_end = i; garbage = 0; continue; } if (data[i] == '\n' || data[i] == '\r' || ((doc->ext_flags & LOWDOWN_ATTRS) && data[i] == '{')) break; if (data[i] != ' ') garbage = 1; } if (garbage) return 0; } /* * Now optionally the attributes. These use similar semantics * where there can be any number of embedded delimiters: only * the last one is recorded, and there may be no garbage between * it and the newline. */ if ((doc->ext_flags & LOWDOWN_ATTRS) && i + 1 < end && data[i] == '{') { attr_offset = ++i; for (garbage = 0; i < end; i++) { if (data[i] == '}') { attr_end = i; garbage = 0; continue; } if (data[i] == '\n' || data[i] == '\r') break; if (data[i] != ' ') garbage = 1; } if (garbage) return 0; } if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') line_end = i + 1; else line_end = i; /* Garbage after the link or empty link. */ if (!line_end || link_end == link_offset) return 0; /* A valid ref has been found, filling-in return structures. */ if (last) *last = line_end; if ((ref = calloc(1, sizeof(struct link_ref))) == NULL) return -1; TAILQ_INSERT_TAIL(&doc->refq, ref, entries); if (id_end - id_offset) { ref->name = hbuf_new(id_end - id_offset); if (ref->name == NULL) return -1; if (!hbuf_put(ref->name, data + id_offset, id_end - id_offset)) return -1; } ref->link = hbuf_new(link_end - link_offset); if (ref->link == NULL) return -1; if (!hbuf_put(ref->link, data + link_offset, link_end - link_offset)) return -1; if (title_end > title_offset) { ref->title = hbuf_new(title_end - title_offset); if (ref->title == NULL) return -1; if (!hbuf_put(ref->title, data + title_offset, title_end - title_offset)) return -1; } if (attr_end > attr_offset) { ref->attrs = hbuf_new(attr_end - attr_offset); if (ref->attrs == NULL) return -1; if (!hbuf_put(ref->attrs, data + attr_offset, attr_end - attr_offset)) return -1; } return 1; } /* * Replace tabs with 4 spaces. * Return zero on failure (memory), non-zero on success. */ static int expand_tabs(struct lowdown_buf *ob, const char *line, size_t size) { size_t i, tab = 0, org; /* * This code makes two assumptions: * * (1) Input is valid UTF-8. (Any byte with top two bits 10 is * skipped, whether or not it is a valid UTF-8 continuation * byte.) * (2) Input contains no combining characters. (Combining * characters should be skipped but are not.) */ for (i = 0; i < size; i++) { org = i; while (i < size && line[i] != '\t') { /* ignore UTF-8 continuation bytes */ if ((line[i] & 0xc0) != 0x80) tab++; i++; } if (i > org && !hbuf_put(ob, line + org, i - org)) return 0; if (i >= size) break; do { if (!hbuf_putc(ob, ' ')) return 0; tab++; } while (tab % 4); } return 1; } struct lowdown_doc * lowdown_doc_new(const struct lowdown_opts *opts) { struct lowdown_doc *doc; unsigned int extensions = opts ? opts->feat : 0; size_t i; doc = calloc(1, sizeof(struct lowdown_doc)); if (doc == NULL) return NULL; doc->maxdepth = opts == NULL ? 128 : opts->maxdepth; doc->active_char['*'] = MD_CHAR_EMPHASIS; doc->active_char['_'] = MD_CHAR_EMPHASIS; if (extensions & LOWDOWN_STRIKE) doc->active_char['~'] = MD_CHAR_EMPHASIS; if (extensions & LOWDOWN_HILITE) doc->active_char['='] = MD_CHAR_EMPHASIS; doc->active_char['`'] = MD_CHAR_CODESPAN; doc->active_char['\n'] = MD_CHAR_LINEBREAK; doc->active_char['['] = MD_CHAR_LINK; doc->active_char['!'] = MD_CHAR_IMAGE; doc->active_char['<'] = MD_CHAR_LANGLE; doc->active_char['\\'] = MD_CHAR_ESCAPE; doc->active_char['&'] = MD_CHAR_ENTITY; if (extensions & LOWDOWN_AUTOLINK) { doc->active_char[':'] = MD_CHAR_AUTOLINK_URL; doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW; } if (extensions & LOWDOWN_SUPER) doc->active_char['^'] = MD_CHAR_SUPERSCRIPT; if (extensions & LOWDOWN_MATH) doc->active_char['$'] = MD_CHAR_MATH; doc->ext_flags = extensions; if (opts != NULL && opts->metasz > 0) { doc->meta = calloc(opts->metasz, sizeof(char *)); if (doc->meta == NULL) goto err; doc->metasz = opts->metasz; for (i = 0; i < doc->metasz; i++) { doc->meta[i] = strdup(opts->meta[i]); if (doc->meta[i] == NULL) goto err; } } if (opts != NULL && opts->metaovrsz > 0) { doc->metaovr = calloc(opts->metaovrsz, sizeof(char *)); if (doc->metaovr == NULL) goto err; doc->metaovrsz = opts->metaovrsz; for (i = 0; i < doc->metaovrsz; i++) { doc->metaovr[i] = strdup(opts->metaovr[i]); if (doc->metaovr[i] == NULL) goto err; } } return doc; err: lowdown_doc_free(doc); return NULL; } /* * Parse a MMD meta-data value. * If the value is a single line, both leading and trailing whitespace * will be stripped. * If the value spans multiple lines, leading whitespace from the first * line will be stripped and any following lines will be taken as is. * Returns a pointer to the value and the length of the value will be * written to "len"; */ static const char * parse_metadata_val(const char *data, size_t sz, size_t *len) { const char *val; size_t i, nlines = 0, nspaces, peek = 0; int startws; /* Skip leading whitespace. */ i = countspaces(data, 0, sz, 0); val = data; sz -= i; /* Find end of line and count trailing whitespace. */ for (i = nspaces = 0; i < sz && data[i] != '\n'; i++) if (data[i] == ' ') nspaces++; else nspaces = 0; *len = i; /* * Iterate through zero or more following multilines. * Multilines are terminated by a line containing a colon (that * is not offset by whitespace) or a blank line. */ startws = i + 1 < sz && (data[i + 1] == ' ' || data[i + 1] == '\t'); for (i++; i < sz; i++) { /* * This block is executed within the line. * We use "peek" to see how far into the line we are; * thus, if we encounter a colon without leading * whitespace, we know that we're in the next metadata * and should stop. */ if (startws == 0 && data[i] == ':') break; peek++; if (data[i] != '\n') continue; /* * We're at a newline: start the loop again by seeing if * the next line starts with whitespace. */ nlines++; *len += peek; peek = 0; /* (Filtered out prior to calling parse_metdata().) */ assert(!(i + 1 < sz && data[i + 1] == '\n')); /* Check if the next line has leading whitespace. */ startws = i + 1 < sz && (data[i + 1] == ' ' || data[i + 1] == '\t'); } /* Last metadata in section. */ if (i == sz && peek) *len += peek + 1; /* Only remove trailing whitespace from a single line. */ if (nlines == 0) *len -= nspaces; return val; } /* * Parse MMD key-value meta-data pairs. * Store the output in the doc's "metaq", as we might be using the * values for variable replacement elsewhere in this document. * Returns 0 if this is not metadata, >0 of it is, <0 on failure. */ static int parse_metadata(struct lowdown_doc *doc, const char *data, size_t sz) { size_t i, j, pos = 0, vsz, keysz; struct lowdown_meta *m; struct lowdown_node *n, *nn; const char *val, *key; char *cp, *buf; if (sz == 0 || data[sz - 1] != '\n') return 0; /* * Check the first line for a colon to see if we should do * metadata parsing at all. * This is a convenience for regular markdown so that initial * lines (not headers) don't get sucked into metadata. */ for (pos = 0; pos < sz; pos++) if (data[pos] == '\n' || data[pos] == ':') break; if (pos == sz || data[pos] == '\n') return 0; /* * Put the metadata into the document's metaq because we might * set variables. */ for (pos = 0; pos < sz; ) { key = &data[pos]; for (i = pos; i < sz; i++) if (data[i] == ':') break; keysz = i - pos; if ((cp = buf = malloc(keysz + 1)) == NULL) return -1; /* * Normalise the key to lowercase alphanumerics, "-", * and "_", discard whitespace, replace other characters * with a question mark. */ for (j = 0; j < keysz; j++) { if (isalnum((unsigned char)key[j]) || '-' == key[j] || '_' == key[j]) { *cp++ = tolower((unsigned char)key[j]); continue; } else if (isspace((unsigned char)key[j])) continue; *cp++ = '?'; } *cp = '\0'; /* * If we've already encountered this key, remove it from * both the local queue and the meta nodes. */ TAILQ_FOREACH(m, doc->metaq, entries) if (strcmp(m->key, buf) == 0) { TAILQ_REMOVE(doc->metaq, m, entries); free(m->key); free(m->value); free(m); break; } assert(doc->current->type == LOWDOWN_DOC_HEADER); TAILQ_FOREACH(n, &doc->current->children, entries) { assert(n->type == LOWDOWN_META); if (hbuf_streq(&n->rndr_meta.key, buf)) { TAILQ_REMOVE(&doc->current->children, n, entries); lowdown_node_free(n); break; } } if ((n = pushnode(doc, LOWDOWN_META)) == NULL) { free(buf); return -1; } if (!hbuf_create(&n->rndr_meta.key, buf, cp - buf)) { free(buf); return -1; } free(buf); m = calloc(1, sizeof(struct lowdown_meta)); if (m == NULL) return -1; TAILQ_INSERT_TAIL(doc->metaq, m, entries); m->key = strndup (n->rndr_meta.key.data, n->rndr_meta.key.size); if (m->key == NULL) return -1; if (i == sz) { if ((m->value = strdup("")) == NULL) return -1; popnode(doc, n); break; } /* * Parse the value, creating a node if nonempty. Make * sure that the metadata has an empty value if there's * no value to be parsed. */ assert(data[i] == ':'); i++; while (i < sz && isspace((unsigned char)data[i])) i++; if (i == sz) { if ((m->value = strdup("")) == NULL) return -1; popnode(doc, n); break; } val = parse_metadata_val(&data[i], sz - i, &vsz); if ((m->value = strndup(val, vsz)) == NULL) return -1; if ((nn = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) return -1; if (!hbuf_push(&nn->rndr_normal_text.text, val, vsz)) return -1; popnode(doc, nn); popnode(doc, n); pos = i + vsz + 1; } return 1; } /* * Parse the buffer in data of length size. * If both mp and mszp are not NULL, set them with the meta information * instead of locally destroying it. * (Obviously only applicable if LOWDOWN_METADATA has been set.) */ struct lowdown_node * lowdown_doc_parse(struct lowdown_doc *doc, size_t *maxn, const char *data, size_t size, struct lowdown_metaq *metaq) { static const char UTF8_BOM[] = { 0xEF, 0xBB, 0xBF }; struct lowdown_buf *text; size_t beg, end, i; const char *sv; struct lowdown_node *n, *root = NULL; struct lowdown_metaq mq; int c, rc = 0; /* * Have a temporary "mq" if "metaq" is not set. We clear this * automatically at the tail of the function. */ TAILQ_INIT(&mq); if (metaq == NULL) metaq = &mq; /* Initialise the parser. */ doc->nodes = 0; doc->depth = 0; doc->current = NULL; doc->in_link_body = 0; doc->foots = 0; doc->metaq = metaq; TAILQ_INIT(doc->metaq); TAILQ_INIT(&doc->refq); TAILQ_INIT(&doc->footq); if ((text = hbuf_new(64)) == NULL) goto out; if (!hbuf_grow(text, size)) goto out; if ((root = pushnode(doc, LOWDOWN_ROOT)) == NULL) goto out; /* * Skip a possible UTF-8 BOM, even though the Unicode standard * discourages having these in UTF-8 documents. */ beg = 0; if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0) beg += 3; /* * Zeroth pass: metadata. First process given metadata, then * in-document metadata, then overriding metadata. The * in-document metadata is conditionally processed. */ if ((n = pushnode(doc, LOWDOWN_DOC_HEADER)) == NULL) goto out; for (i = 0; i < doc->metasz; i++) if (parse_metadata(doc, doc->meta[i], strlen(doc->meta[i])) < 0) goto out; /* FIXME: CRLF EOLNs. */ if ((doc->ext_flags & LOWDOWN_METADATA) && beg < size - 1 && isalnum((unsigned char)data[beg])) { sv = &data[beg]; for (end = beg + 1; end < size; end++) { if (data[end] == '\n' && data[end - 1] == '\n') break; } if ((c = parse_metadata(doc, sv, end - beg)) > 0) beg = end + 1; else if (c < 0) goto out; } for (i = 0; i < doc->metaovrsz; i++) if (parse_metadata(doc, doc->metaovr[i], strlen(doc->metaovr[i])) < 0) goto out; popnode(doc, n); /* * First pass: looking for references and footnotes, copying * everything else. */ while (beg < size) { if (doc->ext_flags & LOWDOWN_FOOTNOTES) { c = is_footnote(doc, data, beg, size, &end); if (c > 0) { beg = end; continue; } else if (c < 0) goto out; } if ((c = is_ref(doc, data, beg, size, &end)) > 0) { beg = end; continue; } else if (c < 0) goto out; /* Skipping to the next line. */ end = beg; while (end < size && data[end] != '\n' && data[end] != '\r') end++; /* Adding the line body if present. */ if (end > beg && !expand_tabs(text, data + beg, end - beg)) goto out; /* Add one \n per newline. */ while (end < size && (data[end] == '\n' || data[end] == '\r')) { if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n')) if (!hbuf_putc(text, '\n')) goto out; end++; } beg = end; } /* * Second pass (after header): rendering the document body and * footnotes. */ if (text->size) { /* Adding a final newline if not already present. */ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') if (!hbuf_putc(text, '\n')) goto out; if (!parse_block(doc, text->data, text->size)) goto out; } rc = 1; out: hbuf_free(text); free_link_refs(&doc->refq); free_foot_refq(&doc->footq); lowdown_metaq_free(&mq); if (rc) { if (maxn != NULL) *maxn = doc->nodes; popnode(doc, root); assert(doc->depth == 0); } else { lowdown_node_free(root); root = NULL; } return root; } void lowdown_node_free(struct lowdown_node *p) { struct lowdown_node *n; if (p == NULL) return; switch (p->type) { case LOWDOWN_BLOCKCODE: hbuf_free(&p->rndr_blockcode.text); hbuf_free(&p->rndr_blockcode.lang); break; case LOWDOWN_BLOCKHTML: hbuf_free(&p->rndr_blockhtml.text); break; case LOWDOWN_CODESPAN: hbuf_free(&p->rndr_codespan.text); break; case LOWDOWN_ENTITY: hbuf_free(&p->rndr_entity.text); break; case LOWDOWN_HEADER: hbuf_free(&p->rndr_header.attr_cls); hbuf_free(&p->rndr_header.attr_id); break; case LOWDOWN_IMAGE: hbuf_free(&p->rndr_image.link); hbuf_free(&p->rndr_image.title); hbuf_free(&p->rndr_image.dims); hbuf_free(&p->rndr_image.alt); hbuf_free(&p->rndr_image.attr_width); hbuf_free(&p->rndr_image.attr_height); hbuf_free(&p->rndr_image.attr_cls); hbuf_free(&p->rndr_image.attr_id); break; case LOWDOWN_LINK: hbuf_free(&p->rndr_link.link); hbuf_free(&p->rndr_link.title); hbuf_free(&p->rndr_link.attr_cls); hbuf_free(&p->rndr_link.attr_id); break; case LOWDOWN_LINK_AUTO: hbuf_free(&p->rndr_autolink.link); break; case LOWDOWN_MATH_BLOCK: hbuf_free(&p->rndr_math.text); break; case LOWDOWN_META: hbuf_free(&p->rndr_meta.key); break; case LOWDOWN_NORMAL_TEXT: hbuf_free(&p->rndr_normal_text.text); break; case LOWDOWN_RAW_HTML: hbuf_free(&p->rndr_raw_html.text); break; case LOWDOWN_TABLE_HEADER: free(p->rndr_table_header.flags); break; default: break; } while ((n = TAILQ_FIRST(&p->children)) != NULL) { TAILQ_REMOVE(&p->children, n, entries); lowdown_node_free(n); } free(p); } void lowdown_metaq_free(struct lowdown_metaq *q) { struct lowdown_meta *m; if (q == NULL) return; while ((m = TAILQ_FIRST(q)) != NULL) { TAILQ_REMOVE(q, m, entries); free(m->key); free(m->value); free(m); } } void lowdown_doc_free(struct lowdown_doc *doc) { size_t i; if (doc == NULL) return; for (i = 0; i < doc->metasz; i++) free(doc->meta[i]); for (i = 0; i < doc->metaovrsz; i++) free(doc->metaovr[i]); free(doc->meta); free(doc->metaovr); free(doc); }