commit a0489f374d50206cd7ed1608b805e912f9366450 Author: root <root> Date: Sun, 14 Jan 2024 22:12:12 +0100 add files Diffstat:
513 files changed, 45332 insertions(+), 0 deletions(-)
diff --git a/LICENSE.md b/LICENSE.md @@ -0,0 +1,17 @@ +Copyright (c) 2008, Natacha Porté +Copyright (c) 2011, Vicent Martí +Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors +Copyright (c) 2016, Kristaps Dzonsons + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. + diff --git a/Makefile b/Makefile @@ -0,0 +1,446 @@ +.PHONY: regress +.SUFFIXES: .xml .md .html .pdf .1 .1.html .3 .3.html .5 .5.html .thumb.jpg .png .in.pc .pc .valgrind .old.md .diff-valgrind + +include Makefile.configure + +VERSION = 1.0.0 +LIBVER = 1 +OBJS = autolink.o \ + buffer.o \ + diff.o \ + document.o \ + entity.o \ + gemini.o \ + html.o \ + html_escape.o \ + latex.o \ + library.o \ + libdiff.o \ + nroff.o \ + odt.o \ + smartypants.o \ + term.o \ + tree.o \ + util.o +COMPAT_OBJS = compats.o +WWWDIR = /var/www/vhosts/kristaps.bsd.lv/htdocs/lowdown +HTMLS = archive.html \ + atom.xml \ + diff.html \ + diff.diff.html \ + index.html \ + README.html \ + $(MANS) +MANS = $(MAN1S) $(MAN3S) $(MAN5S) +MAN1S = man/lowdown.1.html \ + man/lowdown-diff.1.html +MAN5S = man/lowdown.5.html +MAN3S = man/lowdown.3.html \ + man/lowdown_buf.3.html \ + man/lowdown_buf_diff.3.html \ + man/lowdown_buf_free.3.html \ + man/lowdown_buf_new.3.html \ + man/lowdown_diff.3.html \ + man/lowdown_doc_free.3.html \ + man/lowdown_doc_new.3.html \ + man/lowdown_doc_parse.3.html \ + man/lowdown_file.3.html \ + man/lowdown_file_diff.3.html \ + man/lowdown_gemini_free.3.html \ + man/lowdown_gemini_new.3.html \ + man/lowdown_gemini_rndr.3.html \ + man/lowdown_html_free.3.html \ + man/lowdown_html_new.3.html \ + man/lowdown_html_rndr.3.html \ + man/lowdown_latex_free.3.html \ + man/lowdown_latex_new.3.html \ + man/lowdown_latex_rndr.3.html \ + man/lowdown_metaq_free.3.html \ + man/lowdown_node_free.3.html \ + man/lowdown_nroff_free.3.html \ + man/lowdown_nroff_new.3.html \ + man/lowdown_nroff_rndr.3.html \ + man/lowdown_odt_free.3.html \ + man/lowdown_odt_new.3.html \ + man/lowdown_odt_rndr.3.html \ + man/lowdown_term_free.3.html \ + man/lowdown_term_new.3.html \ + man/lowdown_term_rndr.3.html \ + man/lowdown_tree_rndr.3.html +SOURCES = autolink.c \ + buffer.c \ + compats.c \ + diff.c \ + document.c \ + entity.c \ + gemini.c \ + html.c \ + html_escape.c \ + latex.c \ + libdiff.c \ + library.c \ + main.c \ + nroff.c \ + odt.c \ + smartypants.c \ + term.c \ + tests.c \ + tree.c \ + util.c +HEADERS = extern.h \ + libdiff.h \ + lowdown.h \ + term.h +PDFS = diff.pdf \ + diff.diff.pdf \ + index.latex.pdf \ + index.mandoc.pdf \ + index.nroff.pdf +MDS = index.md README.md +CSSS = diff.css template.css +JSS = diff.js +IMAGES = screen-mandoc.png \ + screen-groff.png \ + screen-term.png +THUMBS = screen-mandoc.thumb.jpg \ + screen-groff.thumb.jpg \ + screen-term.thumb.jpg +VALGRINDS != for f in `find regress -name \*.md` ; do echo `dirname $$f`/`basename $$f .md`.valgrind ; done +VALGRINDDIFFS != for f in `find regress/diff -name \*.old.md` ; do echo `dirname $$f`/`basename $$f .old.md`.diff-valgrind ; done +CFLAGS += -fPIC + +# Only for MarkdownTestv1.0.3 in regress/original. + +REGRESS_ARGS = "--out-no-smarty" +REGRESS_ARGS += "--parse-no-img-ext" +REGRESS_ARGS += "--parse-no-metadata" +REGRESS_ARGS += "--html-no-head-ids" +REGRESS_ARGS += "--html-no-skiphtml" +REGRESS_ARGS += "--html-no-escapehtml" +REGRESS_ARGS += "--html-no-owasp" +REGRESS_ARGS += "--html-no-num-ent" +REGRESS_ARGS += "--parse-no-autolink" +REGRESS_ARGS += "--parse-no-cmark" +REGRESS_ARGS += "--parse-no-deflists" + +VALGRIND_ARGS = -q --leak-check=full --leak-resolution=high --show-reachable=yes + +all: bins lowdown.pc liblowdown.so +bins: lowdown lowdown-diff + +valgrind: $(VALGRINDS) $(VALGRINDDIFFS) + @for f in $(VALGRINDS) ; do \ + if [ -s $$f ]; then \ + echo `dirname $$f`/`basename $$f .valgrind`.md ; \ + cat $$f ; \ + fi ; \ + done + @for f in $(VALGRINDDIFFS) ; do \ + if [ -s $$f ]; then \ + echo `dirname $$f`/`basename $$f .diff-valgrind`.old.md ; \ + cat $$f ; \ + fi ; \ + done + +$(VALGRINDS) $(VALGRINDDIFFS): bins + +.old.md.diff-valgrind: + @rm -f $@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -tfodt $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -thtml $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -tms $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -tman $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -tterm $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown-diff -s -tgemini $< `dirname $<`/`basename $< .old.md`.new.md >/dev/null 2>>$@ + +.md.valgrind: + @rm -f $@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tfodt $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -thtml $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tms $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tman $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tterm $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tgemini $< >/dev/null 2>>$@ + valgrind $(VALGRIND_ARGS) ./lowdown -s -tlatex $< >/dev/null 2>>$@ + +www: all $(HTMLS) $(PDFS) $(THUMBS) lowdown.tar.gz lowdown.tar.gz.sha512 + +installwww: www + mkdir -p $(WWWDIR)/snapshots + $(INSTALL) -m 0444 $(THUMBS) $(IMAGES) $(MDS) $(HTMLS) $(CSSS) $(JSS) $(PDFS) $(WWWDIR) + $(INSTALL) -m 0444 lowdown.tar.gz $(WWWDIR)/snapshots/lowdown-$(VERSION).tar.gz + $(INSTALL) -m 0444 lowdown.tar.gz.sha512 $(WWWDIR)/snapshots/lowdown-$(VERSION).tar.gz.sha512 + $(INSTALL) -m 0444 lowdown.tar.gz $(WWWDIR)/snapshots + $(INSTALL) -m 0444 lowdown.tar.gz.sha512 $(WWWDIR)/snapshots + +lowdown: liblowdown.a main.o + $(CC) -o $@ main.o liblowdown.a $(LDFLAGS) $(LDADD_MD5) -lm + +lowdown-diff: lowdown + ln -f lowdown lowdown-diff + +liblowdown.a: $(OBJS) $(COMPAT_OBJS) + $(AR) rs $@ $(OBJS) $(COMPAT_OBJS) + +liblowdown.so: $(OBJS) $(COMPAT_OBJS) + $(CC) -shared -o $@.$(LIBVER) $(OBJS) $(COMPAT_OBJS) $(LDFLAGS) $(LDADD_MD5) -Wl,-soname,$@.$(LIBVER) + ln -sf $@.$(LIBVER) $@ + +install: bins + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(MANDIR)/man1 + mkdir -p $(DESTDIR)$(MANDIR)/man5 + mkdir -p $(DESTDIR)$(SHAREDIR)/lowdown/odt + $(INSTALL_DATA) share/odt/styles.xml $(DESTDIR)$(SHAREDIR)/lowdown/odt + $(INSTALL_PROGRAM) lowdown $(DESTDIR)$(BINDIR) + $(INSTALL_PROGRAM) lowdown-diff $(DESTDIR)$(BINDIR) + for f in $(MAN1S) $(MAN5S) ; do \ + name=`basename $$f .html` ; \ + section=$${name##*.} ; \ + $(INSTALL_MAN) man/$$name $(DESTDIR)$(MANDIR)/man$$section ; \ + done + +install_lib_common: lowdown.pc + mkdir -p $(DESTDIR)$(MANDIR)/man3 + mkdir -p $(DESTDIR)$(LIBDIR)/pkgconfig + mkdir -p $(DESTDIR)$(INCLUDEDIR) + $(INSTALL_DATA) lowdown.pc $(DESTDIR)$(LIBDIR)/pkgconfig + $(INSTALL_DATA) lowdown.h $(DESTDIR)$(INCLUDEDIR) + for f in $(MAN3S) ; do \ + name=`basename $$f .html` ; \ + section=$${name##*.} ; \ + $(INSTALL_MAN) man/$$name $(DESTDIR)$(MANDIR)/man$$section ; \ + done + +install_shared: liblowdown.so install_lib_common + $(INSTALL_LIB) liblowdown.so.$(LIBVER) $(DESTDIR)$(LIBDIR) + +install_static: liblowdown.a install_lib_common + $(INSTALL_LIB) liblowdown.a $(DESTDIR)$(LIBDIR) + +install_libs: install_shared install_static + +distcheck: lowdown.tar.gz.sha512 + mandoc -Tlint -Werror man/*.[135] + newest=`grep "<h1>" versions.xml | tail -1 | sed 's![ ]*!!g'` ; \ + [ "$$newest" = "<h1>$(VERSION)</h1>" ] || \ + { echo "Version $(VERSION) not newest in versions.xml" 1>&2 ; exit 1 ; } + [ "`openssl dgst -sha512 -hex lowdown.tar.gz`" = "`cat lowdown.tar.gz.sha512`" ] || \ + { echo "Checksum does not match." 1>&2 ; exit 1 ; } + rm -rf .distcheck + mkdir -p .distcheck + ( cd .distcheck && tar -zvxpf ../lowdown.tar.gz ) + ( cd .distcheck/lowdown-$(VERSION) && ./configure PREFIX=prefix ) + ( cd .distcheck/lowdown-$(VERSION) && $(MAKE) ) + ( cd .distcheck/lowdown-$(VERSION) && $(MAKE) regress ) + ( cd .distcheck/lowdown-$(VERSION) && $(MAKE) install ) + rm -rf .distcheck + +$(PDFS) index.xml README.xml: lowdown + +index.html README.html: template.xml + +.md.pdf: + ./lowdown --nroff-no-numbered -s -tms $< | \ + pdfroff -i -mspdf -t -k > $@ + +index.latex.pdf: index.md $(THUMBS) + ./lowdown -s -tlatex index.md >index.latex.latex + pdflatex index.latex.latex + pdflatex index.latex.latex + +index.mandoc.pdf: index.md + ./lowdown --nroff-no-numbered -s -tman index.md | \ + mandoc -Tpdf > $@ + +index.nroff.pdf: index.md + ./lowdown --nroff-no-numbered -s -tms index.md | \ + pdfroff -i -mspdf -t -k > $@ + +.xml.html: + sblg -t template.xml -s date -o $@ -C $< $< versions.xml + +archive.html: archive.xml versions.xml + sblg -t archive.xml -s date -o $@ versions.xml + +atom.xml: atom-template.xml versions.xml + sblg -a -t atom-template.xml -s date -o $@ versions.xml + +diff.html: diff.md lowdown + ./lowdown -s diff.md >$@ + +diff.diff.html: diff.md diff.old.md lowdown-diff + ./lowdown-diff -s diff.old.md diff.md >$@ + +diff.diff.pdf: diff.md diff.old.md lowdown-diff + ./lowdown-diff --nroff-no-numbered -s -tms diff.old.md diff.md | \ + pdfroff -i -mspdf -t -k > $@ + +$(HTMLS): versions.xml lowdown + +.md.xml: + ( echo "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" ; \ + echo "<article data-sblg-article=\"1\">" ; \ + ./lowdown $< ; \ + echo "</article>" ; ) >$@ + +.1.1.html .3.3.html .5.5.html: + mandoc -Thtml -Ostyle=https://bsd.lv/css/mandoc.css $< >$@ + +lowdown.tar.gz.sha512: lowdown.tar.gz + openssl dgst -sha512 -hex lowdown.tar.gz >$@ + +lowdown.tar.gz: + mkdir -p .dist/lowdown-$(VERSION)/ + mkdir -p .dist/lowdown-$(VERSION)/man + mkdir -p .dist/lowdown-$(VERSION)/share/odt + mkdir -p .dist/lowdown-$(VERSION)/regress/original + mkdir -p .dist/lowdown-$(VERSION)/regress/standalone + mkdir -p .dist/lowdown-$(VERSION)/regress/metadata + mkdir -p .dist/lowdown-$(VERSION)/regress/diff + $(INSTALL) -m 0644 $(HEADERS) .dist/lowdown-$(VERSION) + $(INSTALL) -m 0644 $(SOURCES) .dist/lowdown-$(VERSION) + $(INSTALL) -m 0644 share/odt/* .dist/lowdown-$(VERSION)/share/odt + $(INSTALL) -m 0644 lowdown.in.pc Makefile LICENSE.md .dist/lowdown-$(VERSION) + $(INSTALL) -m 0644 man/*.1 man/*.3 man/*.5 .dist/lowdown-$(VERSION)/man + $(INSTALL) -m 0755 configure .dist/lowdown-$(VERSION) + $(INSTALL) -m 644 regress/original/* .dist/lowdown-$(VERSION)/regress/original + $(INSTALL) -m 644 regress/*.* .dist/lowdown-$(VERSION)/regress + $(INSTALL) -m 644 regress/standalone/* .dist/lowdown-$(VERSION)/regress/standalone + $(INSTALL) -m 644 regress/metadata/* .dist/lowdown-$(VERSION)/regress/metadata + $(INSTALL) -m 644 regress/diff/* .dist/lowdown-$(VERSION)/regress/diff + ( cd .dist/ && tar zcf ../$@ lowdown-$(VERSION) ) + rm -rf .dist/ + +$(OBJS) $(COMPAT_OBJS) main.o: config.h + +$(OBJS): extern.h lowdown.h + +term.o: term.h + +main.o: lowdown.h + +clean: + rm -f $(OBJS) $(COMPAT_OBJS) main.o + rm -f lowdown lowdown-diff liblowdown.a liblowdown.so liblowdown.so.$(LIBVER) lowdown.pc + rm -f index.xml diff.xml diff.diff.xml README.xml lowdown.tar.gz.sha512 lowdown.tar.gz + rm -f $(PDFS) $(HTMLS) $(THUMBS) $(VALGRINDS) $(VALGRINDDIFFS) + rm -f index.latex.aux index.latex.latex index.latex.log index.latex.out + +distclean: clean + rm -f Makefile.configure config.h config.log config.h.old config.log.old + +regress: bins + tmp1=`mktemp` ; \ + tmp2=`mktemp` ; \ + for f in regress/original/*.text ; do \ + echo "$$f" ; \ + want="`dirname \"$$f\"`/`basename \"$$f\" .text`.html" ; \ + sed -e '/^[ ]*$$/d' "$$want" > $$tmp1 ; \ + ./lowdown $(REGRESS_ARGS) "$$f" | \ + sed -e 's! ! !g' | sed -e '/^[ ]*$$/d' > $$tmp2 ; \ + diff -uw $$tmp1 $$tmp2 ; \ + ./lowdown -s -thtml "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -tlatex "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -tman "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -tms "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -tfodt "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -tterm "$$f" >/dev/null 2>&1 ; \ + ./lowdown -s -ttree "$$f" >/dev/null 2>&1 ; \ + done ; \ + for f in regress/*.md ; do \ + echo "$$f" ; \ + if [ -f regress/`basename $$f .md`.html ]; then \ + ./lowdown -thtml $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.html $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.fodt ]; then \ + ./lowdown -tfodt $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.fodt $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.term ]; then \ + ./lowdown -tterm $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.term $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.latex ]; then \ + ./lowdown -tlatex $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.latex $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.ms ]; then \ + ./lowdown -tms $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.ms $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.man ]; then \ + ./lowdown -tman $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.man $$tmp1 ; \ + fi ; \ + if [ -f regress/`basename $$f .md`.gemini ]; then \ + ./lowdown -tgemini $$f >$$tmp1 2>&1 ; \ + diff -uw regress/`basename $$f .md`.gemini $$tmp1 ; \ + fi ; \ + done ; \ + for f in regress/standalone/*.md ; do \ + echo "$$f" ; \ + if [ -f regress/standalone/`basename $$f .md`.html ]; then \ + ./lowdown -s -thtml $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.html $$tmp1 ; \ + fi ; \ + if [ -f regress/standalone/`basename $$f .md`.fodt ]; then \ + ./lowdown -s -tfodt $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.fodt $$tmp1 ; \ + fi ; \ + if [ -f regress/standalone/`basename $$f .md`.latex ]; then \ + ./lowdown -s -tlatex $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.latex $$tmp1 ; \ + fi ; \ + if [ -f regress/standalone/`basename $$f .md`.ms ]; then \ + ./lowdown -s -tms $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.ms $$tmp1 ; \ + fi ; \ + if [ -f regress/standalone/`basename $$f .md`.man ]; then \ + ./lowdown -s -tman $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.man $$tmp1 ; \ + fi ; \ + if [ -f regress/standalone/`basename $$f .md`.gemini ]; then \ + ./lowdown -s -tgemini $$f >$$tmp1 2>&1 ; \ + diff -uw regress/standalone/`basename $$f .md`.gemini $$tmp1 ; \ + fi ; \ + done ; \ + for f in regress/metadata/*.md ; do \ + echo "$$f" ; \ + if [ -f regress/metadata/`basename $$f .md`.txt ]; then \ + ./lowdown -X test $$f >$$tmp1 2>&1 ; \ + diff -uw regress/metadata/`basename $$f .md`.txt $$tmp1 ; \ + fi ; \ + done ; \ + for f in regress/diff/*.old.md ; do \ + bf=`dirname $$f`/`basename $$f .old.md` ; \ + echo "$$f -> $$bf.new.md" ; \ + if [ -f $$bf.html ]; then \ + ./lowdown-diff -s -thtml $$f $$bf.new.md >$$tmp1 2>&1 ; \ + diff -uw $$bf.html $$tmp1 ; \ + fi ; \ + if [ -f $$bf.ms ]; then \ + ./lowdown-diff -s -tms $$f $$bf.new.md >$$tmp1 2>&1 ; \ + diff -uw $$bf.ms $$tmp1 ; \ + fi ; \ + if [ -f $$bf.man ]; then \ + ./lowdown-diff -s -tman $$f $$bf.new.md >$$tmp1 2>&1 ; \ + diff -uw $$bf.man $$tmp1 ; \ + fi ; \ + if [ -f $$bf.latex ]; then \ + ./lowdown-diff -s -tlatex $$f $$bf.new.md >$$tmp1 2>&1 ; \ + diff -uw $$bf.latex $$tmp1 ; \ + fi ; \ + done ; \ + rm -f $$tmp1 ; \ + rm -f $$tmp2 + +.png.thumb.jpg: + convert $< -thumbnail 350 -quality 50 $@ + +.in.pc.pc: + sed -e "s!@PREFIX@!$(PREFIX)!g" \ + -e "s!@LIBDIR@!$(LIBDIR)!g" \ + -e "s!@INCLUDEDIR@!$(INCLUDEDIR)!g" \ + -e "s!@VERSION@!$(VERSION)!g" $< >$@ diff --git a/Makefile.configure b/Makefile.configure @@ -0,0 +1,24 @@ +CC = cc +CFLAGS = -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter +CPPFLAGS = +LDADD = +LDADD_B64_NTOP = -lresolv +LDADD_CRYPT = -lcrypt +LDADD_LIB_SOCKET = +LDADD_MD5 = +LDADD_SHA2 = +LDADD_STATIC = -static +LDFLAGS = +STATIC = +PREFIX = /usr/local +BINDIR = /usr/local/bin +SHAREDIR = /usr/local/share +SBINDIR = /usr/local/sbin +INCLUDEDIR = /usr/local/include +LIBDIR = /usr/local/lib +MANDIR = /usr/local/man +INSTALL = install +INSTALL_PROGRAM = install -m 0555 +INSTALL_LIB = install -m 0444 +INSTALL_MAN = install -m 0444 +INSTALL_DATA = install -m 0444 diff --git a/autolink.c b/autolink.c @@ -0,0 +1,327 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016--2017, 2021 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <ctype.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +#define VALID_URIS_SZ 6 + +/* + * List of URI prefixes that are considered "valid". + */ +static const char *const valid_uris[VALID_URIS_SZ] = { + "http://", + "https://", + "/", + "#", + "ftp://", + "mailto:" +}; + +/* + * Verify that a URL has a safe protocol. + */ +static int +halink_is_safe(const char *data, size_t size) +{ + size_t i, len; + + for (i = 0; i < VALID_URIS_SZ; ++i) { + len = strlen(valid_uris[i]); + if (size > len && + strncasecmp(data, valid_uris[i], len) == 0 && + isalnum((unsigned char)data[len])) + return 1; + } + + return 0; +} + +/* + * Find the end of a hyperlink. + * Returns the position of the end. + */ +static size_t +autolink_delim(char *data, + size_t link_end, size_t max_rewind, size_t size) +{ + char cclose, copen = 0; + size_t closing, opening, i, new_end; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) + if (strchr("?!.,:", data[link_end - 1]) != NULL) + link_end--; + else if (data[link_end - 1] == ';') { + new_end = link_end - 2; + + while (new_end > 0 && + isalpha((unsigned char)data[new_end])) + new_end--; + + if (new_end < link_end - 2 && + data[new_end] == '&') + link_end = new_end; + else + link_end--; + } else + break; + + if (link_end == 0) + return 0; + + cclose = data[link_end - 1]; + + switch (cclose) { + case '"': + copen = '"'; + break; + case '\'': + copen = '\''; + break; + case ')': + copen = '('; + break; + case ']': + copen = '['; + break; + case '}': + copen = '{'; + break; + } + + if (copen != 0) { + closing = opening = i = 0; + + /* + * Try to close the final punctuation sign in this same + * line; if we managed to close it outside of the URL, + * that means that it's not part of the URL. If it + * closes inside the URL, that means it is part of the + * URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + i++; + } + + if (closing != opening) + link_end--; + } + + return link_end; +} + +/* + * To make sure that a domain is well-formed. + * Returns zero on failure, non-zero on success. + * XXX: this function needs to be replaced. + */ +static size_t +check_domain(char *data, size_t size) +{ + size_t i, np = 0; + + if (!isalnum((unsigned char)data[0])) + return 0; + + for (i = 1; i < size - 1; ++i) { + if (strchr(".:", data[i]) != NULL) + np++; + else if (!isalnum((unsigned char)data[i]) && + data[i] != '-') + break; + } + + /* A valid domain needs to have at least a dot. */ + + return np ? i : 0; +} + +/* + * Search for the next www link in data. + */ +ssize_t +halink_www(size_t *rewind_p, struct lowdown_buf *link, + char *data, size_t max_rewind, size_t size) +{ + size_t link_end; + + if (max_rewind > 0 && + !ispunct((unsigned char)data[-1]) && + !isspace((unsigned char)data[-1])) + return 0; + + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; + + link_end = check_domain(data, size); + + if (link_end == 0) + return 0; + + while (link_end < size && + !isspace((unsigned char)data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + if (!hbuf_put(link, data, link_end)) + return -1; + *rewind_p = 0; + + return link_end; +} + +/* + * Search for the next email in data. + */ +ssize_t +halink_email(size_t *rewind_p, struct lowdown_buf *link, + char *data, size_t max_rewind, size_t size) +{ + size_t link_end, rewind; + int nb = 0, np = 0; + char c; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + c = data[-1 - rewind]; + + if (isalnum((unsigned char)c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + break; + } + + if (rewind == 0) + return 0; + + for (link_end = 0; link_end < size; ++link_end) { + c = data[link_end]; + + if (isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + !isalpha((unsigned char)data[link_end - 1])) + return 0; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + if (!hbuf_put(link, data - rewind, link_end + rewind)) + return -1; + *rewind_p = rewind; + + return link_end; +} + +/* + * Search for the next URL in data. + */ +ssize_t +halink_url(size_t *rewind_p, struct lowdown_buf *link, + char *data, size_t max_rewind, size_t size) +{ + size_t link_end, rewind = 0, domain_len; + + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; + + while (rewind < max_rewind && + isalpha((unsigned char)data[-1 - rewind])) + rewind++; + + if (!halink_is_safe(data - rewind, size + rewind)) + return 0; + + link_end = strlen("://"); + + domain_len = check_domain(data + link_end, size - link_end); + + if (domain_len == 0) + return 0; + + link_end += domain_len; + while (link_end < size && + !isspace((unsigned char)data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + if (!hbuf_put(link, data - rewind, link_end + rewind)) + return -1; + *rewind_p = rewind; + + return link_end; +} diff --git a/autolink.o b/autolink.o Binary files differ. diff --git a/buffer.c b/buffer.c @@ -0,0 +1,493 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016, 2021, Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +static void +hbuf_init(struct lowdown_buf *buf, size_t unit, int buffer_free) +{ + + assert(buf != NULL); + buf->data = NULL; + buf->size = buf->maxsize = 0; + buf->unit = unit; + buf->buffer_free = buffer_free; +} + +/* + * Return a buffer that deep-copies "buf". Returns the pointer of NULL + * on memory allocation failure. + */ +struct lowdown_buf * +hbuf_dup(const struct lowdown_buf *buf) +{ + struct lowdown_buf *v; + + v = calloc(1, sizeof(struct lowdown_buf)); + if (v != NULL && hbuf_clone(buf, v)) + return v; + free(v); + return NULL; +} + +/* + * Deep-copies "buf" into "v", wiping its contents. Returns TRUE on + * success or FALSE on memory allocation failure. + */ +int +hbuf_clone(const struct lowdown_buf *buf, struct lowdown_buf *v) +{ + + *v = *buf; + if (buf->size) { + if ((v->data = malloc(buf->size)) == NULL) + return 0; + memcpy(v->data, buf->data, buf->size); + } else + v->data = NULL; + + return 1; +} + +void +hbuf_truncate(struct lowdown_buf *buf) +{ + + buf->size = 0; +} + +int +hbuf_streq(const struct lowdown_buf *buf1, const char *buf2) +{ + size_t sz; + + sz = strlen(buf2); + return buf1->size == sz && + memcmp(buf1->data, buf2, sz) == 0; +} + +int +hbuf_strprefix(const struct lowdown_buf *buf1, const char *buf2) +{ + size_t sz; + + sz = strlen(buf2); + return buf1->size >= sz && + memcmp(buf1->data, buf2, sz) == 0; +} + +int +hbuf_eq(const struct lowdown_buf *buf1, const struct lowdown_buf *buf2) +{ + + return buf1->size == buf2->size && + memcmp(buf1->data, buf2->data, buf1->size) == 0; +} + +struct lowdown_buf * +hbuf_new(size_t unit) +{ + struct lowdown_buf *ret; + + if ((ret = malloc(sizeof(struct lowdown_buf))) == NULL) + return NULL; + hbuf_init(ret, unit, 1); + return ret; +} + +struct lowdown_buf * +lowdown_buf_new(size_t unit) +{ + + return hbuf_new(unit); +} + +void +hbuf_free(struct lowdown_buf *buf) +{ + + if (buf == NULL) + return; + free(buf->data); + if (buf->buffer_free) + free(buf); +} + +void +lowdown_buf_free(struct lowdown_buf *buf) +{ + + hbuf_free(buf); +} + +int +hbuf_grow(struct lowdown_buf *buf, size_t neosz) +{ + size_t neoasz; + void *pp; + + if (buf->maxsize >= neosz) + return 1; + + neoasz = (neosz/buf->unit + (neosz%buf->unit > 0)) * buf->unit; + + if ((pp = realloc(buf->data, neoasz)) == NULL) + return 0; + buf->data = pp; + buf->maxsize = neoasz; + return 1; +} + +int +hbuf_putb(struct lowdown_buf *buf, const struct lowdown_buf *b) +{ + + assert(buf != NULL && b != NULL); + return hbuf_put(buf, b->data, b->size); +} + +int +hbuf_put(struct lowdown_buf *buf, const char *data, size_t size) +{ + assert(buf != NULL && buf->unit); + + if (data == NULL || size == 0) + return 1; + + if (buf->size + size > buf->maxsize && + !hbuf_grow(buf, buf->size + size)) + return 0; + + memcpy(buf->data + buf->size, data, size); + buf->size += size; + return 1; +} + +int +hbuf_puts(struct lowdown_buf *buf, const char *str) +{ + + assert(buf != NULL && str != NULL); + return hbuf_put(buf, str, strlen(str)); +} + +int +hbuf_putc(struct lowdown_buf *buf, char c) +{ + assert(buf && buf->unit); + + if (buf->size >= buf->maxsize && + !hbuf_grow(buf, buf->size + 1)) + return 0; + + buf->data[buf->size] = c; + buf->size += 1; + return 1; +} + +int +hbuf_putf(struct lowdown_buf *buf, FILE *file) +{ + + assert(buf != NULL && buf->unit); + while (!(feof(file) || ferror(file))) { + if (!hbuf_grow(buf, buf->size + buf->unit)) + return 0; + buf->size += fread(buf->data + buf->size, + 1, buf->unit, file); + } + + return ferror(file) == 0; +} + +int +hbuf_printf(struct lowdown_buf *buf, const char *fmt, ...) +{ + va_list ap; + int n; + + assert(buf != NULL && buf->unit); + + if (buf->size >= buf->maxsize && + !hbuf_grow(buf, buf->size + 1)) + return 0; + + va_start(ap, fmt); + n = vsnprintf(buf->data + buf->size, + buf->maxsize - buf->size, fmt, ap); + va_end(ap); + + if (n < 0) + return 0; + + if ((size_t)n >= buf->maxsize - buf->size) { + if (!hbuf_grow(buf, buf->size + n + 1)) + return 0; + va_start(ap, fmt); + n = vsnprintf(buf->data + buf->size, + buf->maxsize - buf->size, fmt, ap); + va_end(ap); + } + + if (n < 0) + return 0; + + buf->size += n; + return 1; +} + +/* + * Link shortener. + * This only shows the domain name and last path/filename. + * It uses the following algorithm: + * (1) strip schema (if none, print in full) + * (2) print domain following + * (3) if no path, return + * (4) if path, look for final path component + * (5) print final path component with /.../ if shortened + * Return zero on failure (memory), non-zero on success. + */ +int +hbuf_shortlink(struct lowdown_buf *out, const struct lowdown_buf *link) +{ + size_t start = 0, sz; + const char *cp, *rcp; + + /* + * Skip the leading protocol. + * If we don't find a protocol, leave it be. + */ + + if (link->size > 7 && strncmp(link->data, "http://", 7) == 0) + start = 7; + else if (link->size > 8 && strncmp(link->data, "https://", 8) == 0) + start = 8; + else if (link->size > 7 && strncmp(link->data, "file://", 7) == 0) + start = 7; + else if (link->size > 7 && strncmp(link->data, "mailto:", 7) == 0) + start = 7; + else if (link->size > 6 && strncmp(link->data, "ftp://", 6) == 0) + start = 6; + + if (start == 0) + return hbuf_putb(out, link); + + sz = link->size; + if (link->data[link->size - 1] == '/') + sz--; + + /* + * Look for the end of the domain name. + * If we don't have an end, then print the whole thing. + */ + + cp = memchr(link->data + start, '/', sz - start); + if (cp == NULL) + return hbuf_put(out, link->data + start, sz - start); + + if (!hbuf_put(out, + link->data + start, cp - (link->data + start))) + return 0; + + /* + * Look for the filename. + * If it's the same as the end of the domain, then print the + * whole thing. + * Otherwise, use a "..." between. + */ + + rcp = memrchr(link->data + start, '/', sz - start); + + if (rcp == cp) + return hbuf_put(out, cp, sz - (cp - link->data)); + + return HBUF_PUTSL(out, "/...") && + hbuf_put(out, rcp, sz - (rcp - link->data)); +} + +/** + * Convert the buffer into an identifier. These are used in various + * front-ends for linking to a section identifier. Use pandoc's format + * for these identifiers: lowercase, no specials except some, and + * collapsing whitespace into a dash. + */ +struct lowdown_buf * +hbuf_dupname(const struct lowdown_buf *buf) +{ + struct lowdown_buf *nbuf; + size_t i; + int last_space = 1; + char c; + + if ((nbuf = hbuf_new(32)) == NULL) + goto err; + + for (i = 0; i < buf->size; i++) { + if (isalnum((unsigned char)buf->data[i]) || + buf->data[i] == '-' || + buf->data[i] == '.' || + buf->data[i] == '_') { + c = tolower((unsigned char)buf->data[i]); + if (!hbuf_putc(nbuf, c)) + goto err; + last_space = 0; + } else if (isspace((unsigned char)buf->data[i])) { + if (!last_space) { + if (!HBUF_PUTSL(nbuf, "-")) + goto err; + last_space = 1; + } + } + } + + if (nbuf->size == 0 && !HBUF_PUTSL(nbuf, "section")) + goto err; + + return nbuf; +err: + hbuf_free(nbuf); + return NULL; +} + +/* + * Format the raw string used for creating header identifiers. This + * recursively drops through the header contents extracting text along + * the way. + */ +int +hbuf_extract_text(struct lowdown_buf *ob, const struct lowdown_node *n) +{ + const struct lowdown_node *child; + + if (n->type == LOWDOWN_NORMAL_TEXT) + if (!hbuf_putb(ob, &n->rndr_normal_text.text)) + return 0; + if (n->type == LOWDOWN_IMAGE) + if (!hbuf_putb(ob, &n->rndr_image.alt)) + return 0; + if (n->type == LOWDOWN_LINK_AUTO) + if (!hbuf_putb(ob, &n->rndr_autolink.link)) + return 0; + TAILQ_FOREACH(child, &n->children, entries) + if (!hbuf_extract_text(ob, child)) + return 0; + + return 1; +} + +/* + * Return a unique header identifier for "header". Return zero on + * failure (memory), non-zero on success. The new value is appended to + * the queue, which must be freed with hentryq_clear at some point. + */ +const struct lowdown_buf * +hbuf_id(const struct lowdown_buf *header, const struct lowdown_node *n, + struct hentryq *q) +{ + struct lowdown_buf *buf = NULL, *nbuf = NULL; + const struct lowdown_node *child; + size_t count; + struct hentry *he = NULL, *entry; + + if (header == NULL) { + if ((nbuf = hbuf_new(32)) == NULL) + goto out; + TAILQ_FOREACH(child, &n->children, entries) + if (!hbuf_extract_text(nbuf, child)) + goto out; + if ((buf = hbuf_dupname(nbuf)) == NULL) + goto out; + hbuf_free(nbuf); + nbuf = NULL; + } else + if ((buf = hbuf_dupname(header)) == NULL) + goto out; + + TAILQ_FOREACH(entry, q, entries) + if (hbuf_eq(entry->buf, buf)) + break; + + if (entry == NULL) { + he = calloc(1, sizeof(struct hentry)); + if (he == NULL) + goto out; + TAILQ_INSERT_TAIL(q, he, entries); + he->buf = buf; + return buf; + } + + if ((nbuf = hbuf_new(32)) == NULL) + goto out; + + for (count = 1;; count++) { + hbuf_truncate(nbuf); + if (!hbuf_putb(nbuf, buf)) + goto out; + if (!hbuf_printf(nbuf, "-%zu", count)) + goto out; + TAILQ_FOREACH(entry, q, entries) + if (hbuf_eq(entry->buf, nbuf)) + break; + if (entry == NULL) { + he = calloc(1, sizeof(struct hentry)); + if (he == NULL) + goto out; + TAILQ_INSERT_TAIL(q, he, entries); + he->buf = nbuf; + hbuf_free(buf); + return nbuf; + } + } +out: + hbuf_free(buf); + hbuf_free(nbuf); + free(he); + return NULL; +} + +void +hentryq_clear(struct hentryq *q) +{ + struct hentry *he; + + if (q == NULL) + return; + + while ((he = TAILQ_FIRST(q)) != NULL) { + TAILQ_REMOVE(q, he, entries); + hbuf_free(he->buf); + free(he); + } +} + diff --git a/buffer.o b/buffer.o Binary files differ. diff --git a/compats.c b/compats.c @@ -0,0 +1,3906 @@ +#include "config.h" +#if !HAVE_ERR +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +void +vwarnx(const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); +} + +void +vwarnc(int code, const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": "); + } + fprintf(stderr, "%s\n", strerror(code)); +} + +void +vwarn(const char *fmt, va_list ap) +{ + int sverrno; + + sverrno = errno; + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": "); + } + fprintf(stderr, "%s\n", strerror(sverrno)); +} + +void +verrc(int eval, int code, const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": "); + } + fprintf(stderr, "%s\n", strerror(code)); + exit(eval); +} + +void +verrx(int eval, const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + exit(eval); +} + +void +verr(int eval, const char *fmt, va_list ap) +{ + int sverrno; + + sverrno = errno; + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": "); + } + fprintf(stderr, "%s\n", strerror(sverrno)); + exit(eval); +} + +void +err(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verr(eval, fmt, ap); + va_end(ap); +} + +void +errc(int eval, int code, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verrc(eval, code, fmt, ap); + va_end(ap); +} + +void +errx(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verrx(eval, fmt, ap); + va_end(ap); +} + +void +warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); +} + +void +warnc(int code, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnc(code, fmt, ap); + va_end(ap); +} + +void +warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); +} +#endif /* !HAVE_ERR */ +#if !HAVE_B64_NTOP +/* $OpenBSD$ */ + +/* + * Copyright (c) 1996 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS + * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE + * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +/* + * Portions Copyright (c) 1995 by International Business Machines, Inc. + * + * International Business Machines, Inc. (hereinafter called IBM) grants + * permission under its copyrights to use, copy, modify, and distribute this + * Software with or without fee, provided that the above copyright notice and + * all paragraphs of this notice appear in all copies, and that the name of IBM + * not be used in connection with the marketing of any product incorporating + * the Software or modifications thereof, without specific, written prior + * permission. + * + * To the extent it has a right to do so, IBM grants an immunity from suit + * under its patents, if any, for the use, sale or manufacture of products to + * the extent that such products are used for performing Domain Name System + * dynamic updates in TCP/IP networks by means of the Software. No immunity is + * granted for any product per se or for any other function of any product. + * + * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, + * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN + * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <arpa/nameser.h> + +#include <ctype.h> +#include <resolv.h> +#include <stdio.h> + +#include <stdlib.h> +#include <string.h> + +static const char b64_Base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char b64_Pad64 = '='; + +/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt) + The following encoding technique is taken from RFC 1521 by Borenstein + and Freed. It is reproduced here in a slightly edited form for + convenience. + + A 65-character subset of US-ASCII is used, enabling 6 bits to be + represented per printable character. (The extra 65th character, "=", + is used to signify a special processing function.) + + The encoding process represents 24-bit groups of input bits as output + strings of 4 encoded characters. Proceeding from left to right, a + 24-bit input group is formed by concatenating 3 8-bit input groups. + These 24 bits are then treated as 4 concatenated 6-bit groups, each + of which is translated into a single digit in the base64 alphabet. + + Each 6-bit group is used as an index into an array of 64 printable + characters. The character referenced by the index is placed in the + output string. + + Table 1: The Base64 Alphabet + + Value Encoding Value Encoding Value Encoding Value Encoding + 0 A 17 R 34 i 51 z + 1 B 18 S 35 j 52 0 + 2 C 19 T 36 k 53 1 + 3 D 20 U 37 l 54 2 + 4 E 21 V 38 m 55 3 + 5 F 22 W 39 n 56 4 + 6 G 23 X 40 o 57 5 + 7 H 24 Y 41 p 58 6 + 8 I 25 Z 42 q 59 7 + 9 J 26 a 43 r 60 8 + 10 K 27 b 44 s 61 9 + 11 L 28 c 45 t 62 + + 12 M 29 d 46 u 63 / + 13 N 30 e 47 v + 14 O 31 f 48 w (pad) = + 15 P 32 g 49 x + 16 Q 33 h 50 y + + Special processing is performed if fewer than 24 bits are available + at the end of the data being encoded. A full encoding quantum is + always completed at the end of a quantity. When fewer than 24 input + bits are available in an input group, zero bits are added (on the + right) to form an integral number of 6-bit groups. Padding at the + end of the data is performed using the '=' character. + + Since all base64 input is an integral number of octets, only the + ------------------------------------------------- + following cases can arise: + + (1) the final quantum of encoding input is an integral + multiple of 24 bits; here, the final unit of encoded + output will be an integral multiple of 4 characters + with no "=" padding, + (2) the final quantum of encoding input is exactly 8 bits; + here, the final unit of encoded output will be two + characters followed by two "=" padding characters, or + (3) the final quantum of encoding input is exactly 16 bits; + here, the final unit of encoded output will be three + characters followed by one "=" padding character. + */ + +int +b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize) +{ + size_t datalength = 0; + u_char input[3]; + u_char output[4]; + size_t i; + + while (2 < srclength) { + input[0] = *src++; + input[1] = *src++; + input[2] = *src++; + srclength -= 3; + + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + output[3] = input[2] & 0x3f; + + if (datalength + 4 > targsize) + return (-1); + target[datalength++] = b64_Base64[output[0]]; + target[datalength++] = b64_Base64[output[1]]; + target[datalength++] = b64_Base64[output[2]]; + target[datalength++] = b64_Base64[output[3]]; + } + + /* Now we worry about padding. */ + if (0 != srclength) { + /* Get what's left. */ + input[0] = input[1] = input[2] = '\0'; + for (i = 0; i < srclength; i++) + input[i] = *src++; + + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + + if (datalength + 4 > targsize) + return (-1); + target[datalength++] = b64_Base64[output[0]]; + target[datalength++] = b64_Base64[output[1]]; + if (srclength == 1) + target[datalength++] = b64_Pad64; + else + target[datalength++] = b64_Base64[output[2]]; + target[datalength++] = b64_Pad64; + } + if (datalength >= targsize) + return (-1); + target[datalength] = '\0'; /* Returned value doesn't count \0. */ + return (datalength); +} + +/* skips all whitespace anywhere. + converts characters, four at a time, starting at (or after) + src from base - 64 numbers into three 8 bit bytes in the target area. + it returns the number of data bytes stored at the target, or -1 on error. + */ + +int +b64_pton(char const *src, u_char *target, size_t targsize) +{ + int state, ch; + size_t tarindex; + u_char nextbyte; + char *pos; + + state = 0; + tarindex = 0; + + while ((ch = (unsigned char)*src++) != '\0') { + if (isspace(ch)) /* Skip whitespace anywhere. */ + continue; + + if (ch == b64_Pad64) + break; + + pos = strchr(b64_Base64, ch); + if (pos == 0) /* A non-base64 character. */ + return (-1); + + switch (state) { + case 0: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] = (pos - b64_Base64) << 2; + } + state = 1; + break; + case 1: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] |= (pos - b64_Base64) >> 4; + nextbyte = ((pos - b64_Base64) & 0x0f) << 4; + if (tarindex + 1 < targsize) + target[tarindex+1] = nextbyte; + else if (nextbyte) + return (-1); + } + tarindex++; + state = 2; + break; + case 2: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] |= (pos - b64_Base64) >> 2; + nextbyte = ((pos - b64_Base64) & 0x03) << 6; + if (tarindex + 1 < targsize) + target[tarindex+1] = nextbyte; + else if (nextbyte) + return (-1); + } + tarindex++; + state = 3; + break; + case 3: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] |= (pos - b64_Base64); + } + tarindex++; + state = 0; + break; + } + } + + /* + * We are done decoding Base-64 chars. Let's see if we ended + * on a byte boundary, and/or with erroneous trailing characters. + */ + + if (ch == b64_Pad64) { /* We got a pad char. */ + ch = (unsigned char)*src++; /* Skip it, get next. */ + switch (state) { + case 0: /* Invalid = in first position */ + case 1: /* Invalid = in second position */ + return (-1); + + case 2: /* Valid, means one byte of info */ + /* Skip any number of spaces. */ + for (; ch != '\0'; ch = (unsigned char)*src++) + if (!isspace(ch)) + break; + /* Make sure there is another trailing = sign. */ + if (ch != b64_Pad64) + return (-1); + ch = (unsigned char)*src++; /* Skip the = */ + /* Fall through to "single trailing =" case. */ + /* FALLTHROUGH */ + + case 3: /* Valid, means two bytes of info */ + /* + * We know this char is an =. Is there anything but + * whitespace after it? + */ + for (; ch != '\0'; ch = (unsigned char)*src++) + if (!isspace(ch)) + return (-1); + + /* + * Now make sure for cases 2 and 3 that the "extra" + * bits that slopped past the last full byte were + * zeros. If we don't check them, they become a + * subliminal channel. + */ + if (target && tarindex < targsize && + target[tarindex] != 0) + return (-1); + } + } else { + /* + * We ended by seeing the end of the string. Make sure we + * have no partial bytes lying around. + */ + if (state != 0) + return (-1); + } + + return (tarindex); +} +#endif /* !HAVE_B64_NTOP */ +#if !HAVE_EXPLICIT_BZERO +/* OPENBSD ORIGINAL: lib/libc/string/explicit_bzero.c */ +/* + * Public domain. + * Written by Ted Unangst + */ + +#include <string.h> + +/* + * explicit_bzero - don't let the compiler optimize away bzero + */ + +#if HAVE_MEMSET_S + +void +explicit_bzero(void *p, size_t n) +{ + if (n == 0) + return; + (void)memset_s(p, n, 0, n); +} + +#else /* HAVE_MEMSET_S */ + +#include <strings.h> + +/* + * Indirect memset through a volatile pointer to hopefully avoid + * dead-store optimisation eliminating the call. + */ +static void (* volatile ssh_memset)(void *, int, size_t) = memset; + +void +explicit_bzero(void *p, size_t n) +{ + if (n == 0) + return; + /* + * clang -fsanitize=memory needs to intercept memset-like functions + * to correctly detect memory initialisation. Make sure one is called + * directly since our indirection trick above sucessfully confuses it. + */ +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) + memset(p, 0, n); +# endif +#endif + + ssh_memset(p, 0, n); +} + +#endif /* HAVE_MEMSET_S */ +#endif /* !HAVE_EXPLICIT_BZERO */ +#if !HAVE_FTS +/* $OpenBSD$ */ + +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> +#include <sys/types.h> + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* + * oconfigure: Adapted from sys/_types.h. + * oconfigure: Be conservative with ALIGNBYTES. + */ +#define FTS_ALIGNBYTES (sizeof(long) - 1) +#define FTS_ALIGN(p) (((unsigned long)(p) + FTS_ALIGNBYTES) &~ FTS_ALIGNBYTES) + +static FTSENT *fts_alloc(FTS *, char *, size_t); +static FTSENT *fts_build(FTS *, int); +static void fts_lfree(FTSENT *); +static void fts_load(FTS *, FTSENT *); +static size_t fts_maxarglen(char * const *); +static void fts_padjust(FTS *, FTSENT *); +static int fts_palloc(FTS *, size_t); +static FTSENT *fts_sort(FTS *, FTSENT *, int); +static u_short fts_stat(FTS *, FTSENT *, int, int); +static int fts_safe_changedir(FTS *, FTSENT *, int, char *); + +/* oconfigure: Prefix with FTS_. */ + +#define FTS_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define FTS_ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) +#define FTS_CLR(opt) (sp->fts_options &= ~(opt)) +#define FTS_ISSET(opt) (sp->fts_options & (opt)) +#define FTS_SET(opt) (sp->fts_options |= (opt)) +#define FTS_FCHDIR(sp, fd) (!FTS_ISSET(FTS_NOCHDIR) && fchdir(fd)) +/* fts_build flags */ +#define FTS_BCHILD 1 /* fts_children */ +#define FTS_BNAMES 2 /* fts_children, names only */ +#define FTS_BREAD 3 /* fts_read */ + +FTS * +fts_open(char * const *argv, int options, + int (*compar)(const FTSENT **, const FTSENT **)) +{ + FTS *sp; + FTSENT *p, *root; + int nitems; + FTSENT *parent, *prev; + char empty[1] = { '\0' }; + + /* Options check. */ + if (options & ~FTS_OPTIONMASK) { + errno = EINVAL; + return (NULL); + } + + /* At least one path must be specified. */ + if (*argv == NULL) { + errno = EINVAL; + return (NULL); + } + + /* Allocate/initialize the stream */ + if ((sp = calloc(1, sizeof(FTS))) == NULL) + return (NULL); + sp->fts_compar = compar; + sp->fts_options = options; + + /* Logical walks turn on NOCHDIR; symbolic links are too hard. */ + if (FTS_ISSET(FTS_LOGICAL)) + FTS_SET(FTS_NOCHDIR); + + /* + * Start out with 1K of path space, and enough, in any case, + * to hold the user's paths. + */ + if (fts_palloc(sp, FTS_MAX(fts_maxarglen(argv), PATH_MAX))) + goto mem1; + + /* Allocate/initialize root's parent. */ + if ((parent = fts_alloc(sp, empty, 0)) == NULL) + goto mem2; + parent->fts_level = FTS_ROOTPARENTLEVEL; + + /* Allocate/initialize root(s). */ + for (root = prev = NULL, nitems = 0; *argv; ++argv, ++nitems) { + if ((p = fts_alloc(sp, *argv, strlen(*argv))) == NULL) + goto mem3; + p->fts_level = FTS_ROOTLEVEL; + p->fts_parent = parent; + p->fts_accpath = p->fts_name; + p->fts_info = fts_stat(sp, p, FTS_ISSET(FTS_COMFOLLOW), -1); + + /* Command-line "." and ".." are real directories. */ + if (p->fts_info == FTS_DOT) + p->fts_info = FTS_D; + + /* + * If comparison routine supplied, traverse in sorted + * order; otherwise traverse in the order specified. + */ + if (compar) { + p->fts_link = root; + root = p; + } else { + p->fts_link = NULL; + if (root == NULL) + root = p; + else + prev->fts_link = p; + prev = p; + } + } + if (compar && nitems > 1) + root = fts_sort(sp, root, nitems); + + /* + * Allocate a dummy pointer and make fts_read think that we've just + * finished the node before the root(s); set p->fts_info to FTS_INIT + * so that everything about the "current" node is ignored. + */ + if ((sp->fts_cur = fts_alloc(sp, empty, 0)) == NULL) + goto mem3; + sp->fts_cur->fts_link = root; + sp->fts_cur->fts_info = FTS_INIT; + + /* + * If using chdir(2), grab a file descriptor pointing to dot to ensure + * that we can get back here; this could be avoided for some paths, + * but almost certainly not worth the effort. Slashes, symbolic links, + * and ".." are all fairly nasty problems. Note, if we can't get the + * descriptor we run anyway, just more slowly. + */ + if (!FTS_ISSET(FTS_NOCHDIR) && + (sp->fts_rfd = open(".", O_RDONLY | O_CLOEXEC)) == -1) + FTS_SET(FTS_NOCHDIR); + + if (nitems == 0) + free(parent); + + return (sp); + +mem3: fts_lfree(root); + free(parent); +mem2: free(sp->fts_path); +mem1: free(sp); + return (NULL); +} + +static void +fts_load(FTS *sp, FTSENT *p) +{ + size_t len; + char *cp; + + /* + * Load the stream structure for the next traversal. Since we don't + * actually enter the directory until after the preorder visit, set + * the fts_accpath field specially so the chdir gets done to the right + * place and the user can access the first node. From fts_open it's + * known that the path will fit. + */ + len = p->fts_pathlen = p->fts_namelen; + memmove(sp->fts_path, p->fts_name, len + 1); + if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) { + len = strlen(++cp); + memmove(p->fts_name, cp, len + 1); + p->fts_namelen = len; + } + p->fts_accpath = p->fts_path = sp->fts_path; + sp->fts_dev = p->fts_dev; +} + +int +fts_close(FTS *sp) +{ + FTSENT *freep, *p; + int rfd, error = 0; + + /* + * This still works if we haven't read anything -- the dummy structure + * points to the root list, so we step through to the end of the root + * list which has a valid parent pointer. + */ + if (sp->fts_cur) { + for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) { + freep = p; + p = p->fts_link ? p->fts_link : p->fts_parent; + free(freep); + } + free(p); + } + + /* Stash the original directory fd if needed. */ + rfd = FTS_ISSET(FTS_NOCHDIR) ? -1 : sp->fts_rfd; + + /* Free up child linked list, sort array, path buffer, stream ptr.*/ + if (sp->fts_child) + fts_lfree(sp->fts_child); + free(sp->fts_array); + free(sp->fts_path); + free(sp); + + /* Return to original directory, checking for error. */ + if (rfd != -1) { + int saved_errno; + error = fchdir(rfd); + saved_errno = errno; + (void)close(rfd); + errno = saved_errno; + } + + return (error); +} + +/* + * Special case of "/" at the end of the path so that slashes aren't + * appended which would cause paths to be written as "....//foo". + */ +#define NAPPEND(p) \ + (p->fts_path[p->fts_pathlen - 1] == '/' \ + ? p->fts_pathlen - 1 : p->fts_pathlen) + +FTSENT * +fts_read(FTS *sp) +{ + FTSENT *p, *tmp; + int instr; + char *t; + char up[3] = { '.', '.', '\0' }; + int saved_errno; + + /* If finished or unrecoverable error, return NULL. */ + if (sp->fts_cur == NULL || FTS_ISSET(FTS_STOP)) + return (NULL); + + /* Set current node pointer. */ + p = sp->fts_cur; + + /* Save and zero out user instructions. */ + instr = p->fts_instr; + p->fts_instr = FTS_NOINSTR; + + /* Any type of file may be re-visited; re-stat and re-turn. */ + if (instr == FTS_AGAIN) { + p->fts_info = fts_stat(sp, p, 0, -1); + return (p); + } + + /* + * Following a symlink -- SLNONE test allows application to see + * SLNONE and recover. If indirecting through a symlink, have + * keep a pointer to current location. If unable to get that + * pointer, follow fails. + */ + if (instr == FTS_FOLLOW && + (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { + p->fts_info = fts_stat(sp, p, 1, -1); + if (p->fts_info == FTS_D && !FTS_ISSET(FTS_NOCHDIR)) { + if ((p->fts_symfd = + open(".", O_RDONLY | O_CLOEXEC)) == -1) { + p->fts_errno = errno; + p->fts_info = FTS_ERR; + } else + p->fts_flags |= FTS_SYMFOLLOW; + } + return (p); + } + + /* Directory in pre-order. */ + if (p->fts_info == FTS_D) { + /* If skipped or crossed mount point, do post-order visit. */ + if (instr == FTS_SKIP || + (FTS_ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) { + if (p->fts_flags & FTS_SYMFOLLOW) + (void)close(p->fts_symfd); + if (sp->fts_child) { + fts_lfree(sp->fts_child); + sp->fts_child = NULL; + } + p->fts_info = FTS_DP; + return (p); + } + + /* Rebuild if only read the names and now traversing. */ + if (sp->fts_child && FTS_ISSET(FTS_NAMEONLY)) { + FTS_CLR(FTS_NAMEONLY); + fts_lfree(sp->fts_child); + sp->fts_child = NULL; + } + + /* + * Cd to the subdirectory. + * + * If have already read and now fail to chdir, whack the list + * to make the names come out right, and set the parent errno + * so the application will eventually get an error condition. + * Set the FTS_DONTCHDIR flag so that when we logically change + * directories back to the parent we don't do a chdir. + * + * If haven't read do so. If the read fails, fts_build sets + * FTS_STOP or the fts_info field of the node. + */ + if (sp->fts_child) { + if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) { + p->fts_errno = errno; + p->fts_flags |= FTS_DONTCHDIR; + for (p = sp->fts_child; p; p = p->fts_link) + p->fts_accpath = + p->fts_parent->fts_accpath; + } + } else if ((sp->fts_child = fts_build(sp, FTS_BREAD)) == NULL) { + if (FTS_ISSET(FTS_STOP)) + return (NULL); + return (p); + } + p = sp->fts_child; + sp->fts_child = NULL; + goto name; + } + + /* Move to the next node on this level. */ +next: tmp = p; + if ((p = p->fts_link)) { + free(tmp); + + /* + * If reached the top, return to the original directory (or + * the root of the tree), and load the paths for the next root. + */ + if (p->fts_level == FTS_ROOTLEVEL) { + if (FTS_FCHDIR(sp, sp->fts_rfd)) { + FTS_SET(FTS_STOP); + return (NULL); + } + fts_load(sp, p); + return (sp->fts_cur = p); + } + + /* + * User may have called fts_set on the node. If skipped, + * ignore. If followed, get a file descriptor so we can + * get back if necessary. + */ + if (p->fts_instr == FTS_SKIP) + goto next; + if (p->fts_instr == FTS_FOLLOW) { + p->fts_info = fts_stat(sp, p, 1, -1); + if (p->fts_info == FTS_D && !FTS_ISSET(FTS_NOCHDIR)) { + if ((p->fts_symfd = + open(".", O_RDONLY | O_CLOEXEC)) == -1) { + p->fts_errno = errno; + p->fts_info = FTS_ERR; + } else + p->fts_flags |= FTS_SYMFOLLOW; + } + p->fts_instr = FTS_NOINSTR; + } + +name: t = sp->fts_path + NAPPEND(p->fts_parent); + *t++ = '/'; + memmove(t, p->fts_name, p->fts_namelen + 1); + return (sp->fts_cur = p); + } + + /* Move up to the parent node. */ + p = tmp->fts_parent; + free(tmp); + + if (p->fts_level == FTS_ROOTPARENTLEVEL) { + /* + * Done; free everything up and set errno to 0 so the user + * can distinguish between error and EOF. + */ + free(p); + errno = 0; + return (sp->fts_cur = NULL); + } + + /* NUL terminate the pathname. */ + sp->fts_path[p->fts_pathlen] = '\0'; + + /* + * Return to the parent directory. If at a root node or came through + * a symlink, go back through the file descriptor. Otherwise, cd up + * one directory. + */ + if (p->fts_level == FTS_ROOTLEVEL) { + if (FTS_FCHDIR(sp, sp->fts_rfd)) { + FTS_SET(FTS_STOP); + sp->fts_cur = p; + return (NULL); + } + } else if (p->fts_flags & FTS_SYMFOLLOW) { + if (FTS_FCHDIR(sp, p->fts_symfd)) { + saved_errno = errno; + (void)close(p->fts_symfd); + errno = saved_errno; + FTS_SET(FTS_STOP); + sp->fts_cur = p; + return (NULL); + } + (void)close(p->fts_symfd); + } else if (!(p->fts_flags & FTS_DONTCHDIR) && + fts_safe_changedir(sp, p->fts_parent, -1, up)) { + FTS_SET(FTS_STOP); + sp->fts_cur = p; + return (NULL); + } + p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; + return (sp->fts_cur = p); +} + +/* + * Fts_set takes the stream as an argument although it's not used in this + * implementation; it would be necessary if anyone wanted to add global + * semantics to fts using fts_set. An error return is allowed for similar + * reasons. + */ +int +fts_set(FTS *sp, FTSENT *p, int instr) +{ + if (instr && instr != FTS_AGAIN && instr != FTS_FOLLOW && + instr != FTS_NOINSTR && instr != FTS_SKIP) { + errno = EINVAL; + return (1); + } + p->fts_instr = instr; + return (0); +} + +FTSENT * +fts_children(FTS *sp, int instr) +{ + FTSENT *p; + int fd; + + if (instr && instr != FTS_NAMEONLY) { + errno = EINVAL; + return (NULL); + } + + /* Set current node pointer. */ + p = sp->fts_cur; + + /* + * Errno set to 0 so user can distinguish empty directory from + * an error. + */ + errno = 0; + + /* Fatal errors stop here. */ + if (FTS_ISSET(FTS_STOP)) + return (NULL); + + /* Return logical hierarchy of user's arguments. */ + if (p->fts_info == FTS_INIT) + return (p->fts_link); + + /* + * If not a directory being visited in pre-order, stop here. Could + * allow FTS_DNR, assuming the user has fixed the problem, but the + * same effect is available with FTS_AGAIN. + */ + if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */) + return (NULL); + + /* Free up any previous child list. */ + if (sp->fts_child) + fts_lfree(sp->fts_child); + + if (instr == FTS_NAMEONLY) { + FTS_SET(FTS_NAMEONLY); + instr = FTS_BNAMES; + } else + instr = FTS_BCHILD; + + /* + * If using chdir on a relative path and called BEFORE fts_read does + * its chdir to the root of a traversal, we can lose -- we need to + * chdir into the subdirectory, and we don't know where the current + * directory is, so we can't get back so that the upcoming chdir by + * fts_read will work. + */ + if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' || + FTS_ISSET(FTS_NOCHDIR)) + return (sp->fts_child = fts_build(sp, instr)); + + if ((fd = open(".", O_RDONLY | O_CLOEXEC)) == -1) + return (NULL); + sp->fts_child = fts_build(sp, instr); + if (fchdir(fd)) { + (void)close(fd); + return (NULL); + } + (void)close(fd); + return (sp->fts_child); +} + +/* + * This is the tricky part -- do not casually change *anything* in here. The + * idea is to build the linked list of entries that are used by fts_children + * and fts_read. There are lots of special cases. + * + * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is + * set and it's a physical walk (so that symbolic links can't be directories), + * we can do things quickly. First, if it's a 4.4BSD file system, the type + * of the file is in the directory entry. Otherwise, we assume that the number + * of subdirectories in a node is equal to the number of links to the parent. + * The former skips all stat calls. The latter skips stat calls in any leaf + * directories and for any files after the subdirectories in the directory have + * been found, cutting the stat calls by about 2/3. + */ +static FTSENT * +fts_build(FTS *sp, int type) +{ + struct dirent *dp; + FTSENT *p, *head; + FTSENT *cur, *tail; + DIR *dirp; + void *oldaddr; + size_t len, maxlen, namlen; + int nitems, cderrno, descend, level, nlinks, nostat, doadjust; + int saved_errno; + char *cp; + char up[3] = { '.', '.', '\0' }; + + /* Set current node pointer. */ + cur = sp->fts_cur; + + /* + * Open the directory for reading. If this fails, we're done. + * If being called from fts_read, set the fts_info field. + */ + if ((dirp = opendir(cur->fts_accpath)) == NULL) { + if (type == FTS_BREAD) { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return (NULL); + } + + /* + * Nlinks is the number of possible entries of type directory in the + * directory if we're cheating on stat calls, 0 if we're not doing + * any stat calls at all, -1 if we're doing stats on everything. + */ + if (type == FTS_BNAMES) + nlinks = 0; + else if (FTS_ISSET(FTS_NOSTAT) && FTS_ISSET(FTS_PHYSICAL)) { + nlinks = cur->fts_nlink - (FTS_ISSET(FTS_SEEDOT) ? 0 : 2); + nostat = 1; + } else { + nlinks = -1; + nostat = 0; + } + +#ifdef notdef + (void)printf("nlinks == %d (cur: %u)\n", nlinks, cur->fts_nlink); + (void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n", + FTS_ISSET(FTS_NOSTAT), FTS_ISSET(FTS_PHYSICAL), FTS_ISSET(FTS_SEEDOT)); +#endif + /* + * If we're going to need to stat anything or we want to descend + * and stay in the directory, chdir. If this fails we keep going, + * but set a flag so we don't chdir after the post-order visit. + * We won't be able to stat anything, but we can still return the + * names themselves. Note, that since fts_read won't be able to + * chdir into the directory, it will have to return different path + * names than before, i.e. "a/b" instead of "b". Since the node + * has already been visited in pre-order, have to wait until the + * post-order visit to return the error. There is a special case + * here, if there was nothing to stat then it's not an error to + * not be able to stat. This is all fairly nasty. If a program + * needed sorted entries or stat information, they had better be + * checking FTS_NS on the returned nodes. + */ + cderrno = 0; + if (nlinks || type == FTS_BREAD) { + if (fts_safe_changedir(sp, cur, dirfd(dirp), NULL)) { + if (nlinks && type == FTS_BREAD) + cur->fts_errno = errno; + cur->fts_flags |= FTS_DONTCHDIR; + descend = 0; + cderrno = errno; + (void)closedir(dirp); + dirp = NULL; + } else + descend = 1; + } else + descend = 0; + + /* + * Figure out the max file name length that can be stored in the + * current path -- the inner loop allocates more path as necessary. + * We really wouldn't have to do the maxlen calculations here, we + * could do them in fts_read before returning the path, but it's a + * lot easier here since the length is part of the dirent structure. + * + * If not changing directories set a pointer so that can just append + * each new name into the path. + */ + len = NAPPEND(cur); + if (FTS_ISSET(FTS_NOCHDIR)) { + cp = sp->fts_path + len; + *cp++ = '/'; + } + len++; + maxlen = sp->fts_pathlen - len; + + /* + * fts_level is signed so we must prevent it from wrapping + * around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL. + */ + level = cur->fts_level; + if (level < FTS_MAXLEVEL) + level++; + + /* Read the directory, attaching each entry to the `link' pointer. */ + doadjust = 0; + for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { + if (!FTS_ISSET(FTS_SEEDOT) && FTS_ISDOT(dp->d_name)) + continue; + + namlen = strlen(dp->d_name); + + if (!(p = fts_alloc(sp, dp->d_name, namlen))) + goto mem1; + if (namlen >= maxlen) { /* include space for NUL */ + oldaddr = sp->fts_path; + if (fts_palloc(sp, namlen +len + 1)) { + /* + * No more memory for path or structures. Save + * errno, free up the current structure and the + * structures already allocated. + */ +mem1: saved_errno = errno; + free(p); + fts_lfree(head); + (void)closedir(dirp); + cur->fts_info = FTS_ERR; + FTS_SET(FTS_STOP); + errno = saved_errno; + return (NULL); + } + /* Did realloc() change the pointer? */ + if (oldaddr != sp->fts_path) { + doadjust = 1; + if (FTS_ISSET(FTS_NOCHDIR)) + cp = sp->fts_path + len; + } + maxlen = sp->fts_pathlen - len; + } + + p->fts_level = level; + p->fts_parent = sp->fts_cur; + p->fts_pathlen = len + namlen; + if (p->fts_pathlen < len) { + /* + * If we wrap, free up the current structure and + * the structures already allocated, then error + * out with ENAMETOOLONG. + */ + free(p); + fts_lfree(head); + (void)closedir(dirp); + cur->fts_info = FTS_ERR; + FTS_SET(FTS_STOP); + errno = ENAMETOOLONG; + return (NULL); + } + + if (cderrno) { + if (nlinks) { + p->fts_info = FTS_NS; + p->fts_errno = cderrno; + } else + p->fts_info = FTS_NSOK; + p->fts_accpath = cur->fts_accpath; + } else if (nlinks == 0 +#ifdef DT_DIR + || (nostat && + dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN) +#endif + ) { + p->fts_accpath = + FTS_ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name; + p->fts_info = FTS_NSOK; + } else { + /* Build a file name for fts_stat to stat. */ + if (FTS_ISSET(FTS_NOCHDIR)) { + p->fts_accpath = p->fts_path; + memmove(cp, p->fts_name, p->fts_namelen + 1); + p->fts_info = fts_stat(sp, p, 0, dirfd(dirp)); + } else { + p->fts_accpath = p->fts_name; + p->fts_info = fts_stat(sp, p, 0, -1); + } + + /* Decrement link count if applicable. */ + if (nlinks > 0 && (p->fts_info == FTS_D || + p->fts_info == FTS_DC || p->fts_info == FTS_DOT)) + --nlinks; + } + + /* We walk in directory order so "ls -f" doesn't get upset. */ + p->fts_link = NULL; + if (head == NULL) + head = tail = p; + else { + tail->fts_link = p; + tail = p; + } + ++nitems; + } + if (dirp) + (void)closedir(dirp); + + /* + * If realloc() changed the address of the path, adjust the + * addresses for the rest of the tree and the dir list. + */ + if (doadjust) + fts_padjust(sp, head); + + /* + * If not changing directories, reset the path back to original + * state. + */ + if (FTS_ISSET(FTS_NOCHDIR)) { + if (len == sp->fts_pathlen || nitems == 0) + --cp; + *cp = '\0'; + } + + /* + * If descended after called from fts_children or after called from + * fts_read and nothing found, get back. At the root level we use + * the saved fd; if one of fts_open()'s arguments is a relative path + * to an empty directory, we wind up here with no other way back. If + * can't get back, we're done. + */ + if (descend && (type == FTS_BCHILD || !nitems) && + (cur->fts_level == FTS_ROOTLEVEL ? FTS_FCHDIR(sp, sp->fts_rfd) : + fts_safe_changedir(sp, cur->fts_parent, -1, up))) { + cur->fts_info = FTS_ERR; + FTS_SET(FTS_STOP); + return (NULL); + } + + /* If didn't find anything, return NULL. */ + if (!nitems) { + if (type == FTS_BREAD) + cur->fts_info = FTS_DP; + return (NULL); + } + + /* Sort the entries. */ + if (sp->fts_compar && nitems > 1) + head = fts_sort(sp, head, nitems); + return (head); +} + +static u_short +fts_stat(FTS *sp, FTSENT *p, int follow, int dfd) +{ + FTSENT *t; + dev_t dev; + ino_t ino; + struct stat *sbp, sb; + int saved_errno; + const char *path; + + if (dfd == -1) { + path = p->fts_accpath; + dfd = AT_FDCWD; + } else + path = p->fts_name; + + /* If user needs stat info, stat buffer already allocated. */ + sbp = FTS_ISSET(FTS_NOSTAT) ? &sb : p->fts_statp; + + /* + * If doing a logical walk, or application requested FTS_FOLLOW, do + * a stat(2). If that fails, check for a non-existent symlink. If + * fail, set the errno from the stat call. + */ + if (FTS_ISSET(FTS_LOGICAL) || follow) { + if (fstatat(dfd, path, sbp, 0)) { + saved_errno = errno; + if (!fstatat(dfd, path, sbp, AT_SYMLINK_NOFOLLOW)) { + errno = 0; + return (FTS_SLNONE); + } + p->fts_errno = saved_errno; + goto err; + } + } else if (fstatat(dfd, path, sbp, AT_SYMLINK_NOFOLLOW)) { + p->fts_errno = errno; +err: memset(sbp, 0, sizeof(struct stat)); + return (FTS_NS); + } + + if (S_ISDIR(sbp->st_mode)) { + /* + * Set the device/inode. Used to find cycles and check for + * crossing mount points. Also remember the link count, used + * in fts_build to limit the number of stat calls. It is + * understood that these fields are only referenced if fts_info + * is set to FTS_D. + */ + dev = p->fts_dev = sbp->st_dev; + ino = p->fts_ino = sbp->st_ino; + p->fts_nlink = sbp->st_nlink; + + if (FTS_ISDOT(p->fts_name)) + return (FTS_DOT); + + /* + * Cycle detection is done by brute force when the directory + * is first encountered. If the tree gets deep enough or the + * number of symbolic links to directories is high enough, + * something faster might be worthwhile. + */ + for (t = p->fts_parent; + t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) + if (ino == t->fts_ino && dev == t->fts_dev) { + p->fts_cycle = t; + return (FTS_DC); + } + return (FTS_D); + } + if (S_ISLNK(sbp->st_mode)) + return (FTS_SL); + if (S_ISREG(sbp->st_mode)) + return (FTS_F); + return (FTS_DEFAULT); +} + +static FTSENT * +fts_sort(FTS *sp, FTSENT *head, int nitems) +{ + FTSENT **ap, *p; + + /* + * Construct an array of pointers to the structures and call qsort(3). + * Reassemble the array in the order returned by qsort. If unable to + * sort for memory reasons, return the directory entries in their + * current order. Allocate enough space for the current needs plus + * 40 so don't realloc one entry at a time. + */ + if (nitems > sp->fts_nitems) { + struct _ftsent **a; + + if ((a = reallocarray(sp->fts_array, + nitems + 40, sizeof(FTSENT *))) == NULL) { + free(sp->fts_array); + sp->fts_array = NULL; + sp->fts_nitems = 0; + return (head); + } + sp->fts_nitems = nitems + 40; + sp->fts_array = a; + } + for (ap = sp->fts_array, p = head; p; p = p->fts_link) + *ap++ = p; + qsort(sp->fts_array, nitems, sizeof(FTSENT *), + (int(*)(const void *, const void *))sp->fts_compar); + for (head = *(ap = sp->fts_array); --nitems; ++ap) + ap[0]->fts_link = ap[1]; + ap[0]->fts_link = NULL; + return (head); +} + +static FTSENT * +fts_alloc(FTS *sp, char *name, size_t namelen) +{ + FTSENT *p; + size_t len; + + /* + * The file name is a variable length array and no stat structure is + * necessary if the user has set the nostat bit. Allocate the FTSENT + * structure, the file name and the stat structure in one chunk, but + * be careful that the stat structure is reasonably aligned. Since the + * fts_name field is declared to be of size 1, the fts_name pointer is + * namelen + 2 before the first possible address of the stat structure. + */ + len = sizeof(FTSENT) + namelen; + if (!FTS_ISSET(FTS_NOSTAT)) + len += sizeof(struct stat) + FTS_ALIGNBYTES; + if ((p = calloc(1, len)) == NULL) + return (NULL); + + p->fts_path = sp->fts_path; + p->fts_namelen = namelen; + p->fts_instr = FTS_NOINSTR; + if (!FTS_ISSET(FTS_NOSTAT)) + p->fts_statp = (struct stat *)FTS_ALIGN(p->fts_name + namelen + 2); + memcpy(p->fts_name, name, namelen); + + return (p); +} + +static void +fts_lfree(FTSENT *head) +{ + FTSENT *p; + + /* Free a linked list of structures. */ + while ((p = head)) { + head = head->fts_link; + free(p); + } +} + +/* + * Allow essentially unlimited paths; find, rm, ls should all work on any tree. + * Most systems will allow creation of paths much longer than PATH_MAX, even + * though the kernel won't resolve them. Add the size (not just what's needed) + * plus 256 bytes so don't realloc the path 2 bytes at a time. + */ +static int +fts_palloc(FTS *sp, size_t more) +{ + char *p; + + /* + * Check for possible wraparound. + */ + more += 256; + if (sp->fts_pathlen + more < sp->fts_pathlen) { + free(sp->fts_path); + sp->fts_path = NULL; + errno = ENAMETOOLONG; + return (1); + } + p = recallocarray(sp->fts_path, sp->fts_pathlen, + sp->fts_pathlen + more, 1); + if (p == NULL) { + free(sp->fts_path); + sp->fts_path = NULL; + return (1); + } + sp->fts_pathlen += more; + sp->fts_path = p; + return (0); +} + +/* + * When the path is realloc'd, have to fix all of the pointers in structures + * already returned. + */ +static void +fts_padjust(FTS *sp, FTSENT *head) +{ + FTSENT *p; + char *addr = sp->fts_path; + +#define ADJUST(p) { \ + if ((p)->fts_accpath != (p)->fts_name) { \ + (p)->fts_accpath = \ + (char *)addr + ((p)->fts_accpath - (p)->fts_path); \ + } \ + (p)->fts_path = addr; \ +} + /* Adjust the current set of children. */ + for (p = sp->fts_child; p; p = p->fts_link) + ADJUST(p); + + /* Adjust the rest of the tree, including the current level. */ + for (p = head; p->fts_level >= FTS_ROOTLEVEL;) { + ADJUST(p); + p = p->fts_link ? p->fts_link : p->fts_parent; + } +} + +static size_t +fts_maxarglen(char * const *argv) +{ + size_t len, max; + + for (max = 0; *argv; ++argv) + if ((len = strlen(*argv)) > max) + max = len; + return (max + 1); +} + +/* + * Change to dir specified by fd or p->fts_accpath without getting + * tricked by someone changing the world out from underneath us. + * Assumes p->fts_dev and p->fts_ino are filled in. + */ +static int +fts_safe_changedir(FTS *sp, FTSENT *p, int fd, char *path) +{ + int ret, oerrno, newfd; + struct stat sb; + + newfd = fd; + if (FTS_ISSET(FTS_NOCHDIR)) + return (0); + if (fd == -1 && (newfd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC)) == -1) + return (-1); + if (fstat(newfd, &sb) == -1) { + ret = -1; + goto bail; + } + if (p->fts_dev != sb.st_dev || p->fts_ino != sb.st_ino) { + errno = ENOENT; /* disinformation */ + ret = -1; + goto bail; + } + ret = fchdir(newfd); +bail: + oerrno = errno; + if (fd == -1) + (void)close(newfd); + errno = oerrno; + return (ret); +} +#endif /* !HAVE_FTS */ +#if !HAVE_GETPROGNAME +/* + * Copyright (c) 2016 Nicholas Marriott <nicholas.marriott@gmail.com> + * Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2020 Stephen Gregoratto <dev@sgregoratto.me> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <errno.h> + +#if HAVE_GETEXECNAME +#include <stdlib.h> +const char * +getprogname(void) +{ + return getexecname(); +} +#elif HAVE_PROGRAM_INVOCATION_SHORT_NAME +const char * +getprogname(void) +{ + return (program_invocation_short_name); +} +#elif HAVE___PROGNAME +const char * +getprogname(void) +{ + extern char *__progname; + + return (__progname); +} +#else +#error No getprogname available. +#endif +#endif /* !HAVE_GETPROGNAME */ +#if !HAVE_MD5 +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +#include <sys/types.h> +#include <stdlib.h> +#include <string.h> + +#ifndef BYTE_ORDER +# if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN) +# error Confusion in endian macros. +# endif +# if !defined(__BYTE_ORDER__) +# error Byte order macro not found. +# endif +# if !defined(__ORDER_LITTLE_ENDIAN__) || !defined(__ORDER_BIG_ENDIAN__) +# error Little/big endian macros not found. +# endif +# define BYTE_ORDER __BYTE_ORDER__ +# define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# define BIG_ENDIAN __ORDER_BIG_ENDIAN__ +#endif /*!BYTE_ORDER*/ + +#define PUT_64BIT_LE(cp, value) do { \ + (cp)[7] = (value) >> 56; \ + (cp)[6] = (value) >> 48; \ + (cp)[5] = (value) >> 40; \ + (cp)[4] = (value) >> 32; \ + (cp)[3] = (value) >> 24; \ + (cp)[2] = (value) >> 16; \ + (cp)[1] = (value) >> 8; \ + (cp)[0] = (value); } while (0) + +#define PUT_32BIT_LE(cp, value) do { \ + (cp)[3] = (value) >> 24; \ + (cp)[2] = (value) >> 16; \ + (cp)[1] = (value) >> 8; \ + (cp)[0] = (value); } while (0) + +static uint8_t PADDING[MD5_BLOCK_LENGTH] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +void +MD5Init(MD5_CTX *ctx) +{ + ctx->count = 0; + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xefcdab89; + ctx->state[2] = 0x98badcfe; + ctx->state[3] = 0x10325476; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void +MD5Update(MD5_CTX *ctx, const unsigned char *input, size_t len) +{ + size_t have, need; + + /* Check how many bytes we already have and how many more we need. */ + have = (size_t)((ctx->count >> 3) & (MD5_BLOCK_LENGTH - 1)); + need = MD5_BLOCK_LENGTH - have; + + /* Update bitcount */ + ctx->count += (uint64_t)len << 3; + + if (len >= need) { + if (have != 0) { + memcpy(ctx->buffer + have, input, need); + MD5Transform(ctx->state, ctx->buffer); + input += need; + len -= need; + have = 0; + } + + /* Process data in MD5_BLOCK_LENGTH-byte chunks. */ + while (len >= MD5_BLOCK_LENGTH) { + MD5Transform(ctx->state, input); + input += MD5_BLOCK_LENGTH; + len -= MD5_BLOCK_LENGTH; + } + } + + /* Handle any remaining bytes of data. */ + if (len != 0) + memcpy(ctx->buffer + have, input, len); +} + +/* + * Pad pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void +MD5Pad(MD5_CTX *ctx) +{ + uint8_t count[8]; + size_t padlen; + + /* Convert count to 8 bytes in little endian order. */ + PUT_64BIT_LE(count, ctx->count); + + /* Pad out to 56 mod 64. */ + padlen = MD5_BLOCK_LENGTH - + ((ctx->count >> 3) & (MD5_BLOCK_LENGTH - 1)); + if (padlen < 1 + 8) + padlen += MD5_BLOCK_LENGTH; + MD5Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ + MD5Update(ctx, count, 8); +} + +/* + * Final wrapup--call MD5Pad, fill in digest and zero out ctx. + */ +void +MD5Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5_CTX *ctx) +{ + int i; + + MD5Pad(ctx); + for (i = 0; i < 4; i++) + PUT_32BIT_LE(digest + i * 4, ctx->state[i]); + memset(ctx, 0, sizeof(*ctx)); +} + + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +void +MD5Transform(uint32_t state[4], const uint8_t block[MD5_BLOCK_LENGTH]) +{ + uint32_t a, b, c, d, in[MD5_BLOCK_LENGTH / 4]; + +#if BYTE_ORDER == LITTLE_ENDIAN + memcpy(in, block, sizeof(in)); +#else + for (a = 0; a < MD5_BLOCK_LENGTH / 4; a++) { + in[a] = (uint32_t)( + (uint32_t)(block[a * 4 + 0]) | + (uint32_t)(block[a * 4 + 1]) << 8 | + (uint32_t)(block[a * 4 + 2]) << 16 | + (uint32_t)(block[a * 4 + 3]) << 24); + } +#endif + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + + MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2 ] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7 ] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5 ] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3 ] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1 ] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8 ] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6 ] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4 ] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2 ] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9 ] + 0xeb86d391, 21); + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; +} + +char * +MD5End(MD5_CTX *ctx, char *buf) +{ + int i; + unsigned char digest[MD5_DIGEST_LENGTH]; + static const char hex[]="0123456789abcdef"; + + if (!buf) + buf = malloc(2*MD5_DIGEST_LENGTH + 1); + if (!buf) + return 0; + MD5Final(digest, ctx); + for (i = 0; i < MD5_DIGEST_LENGTH; i++) { + buf[i+i] = hex[digest[i] >> 4]; + buf[i+i+1] = hex[digest[i] & 0x0f]; + } + buf[i+i] = '\0'; + return buf; +} +#endif /* !HAVE_MD5 */ +#if !HAVE_MEMMEM +/*- + * Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Find the first occurrence of the byte string s in byte string l. + */ +void * +memmem(const void *l, size_t l_len, const void *s, size_t s_len) +{ + const char *cur, *last; + const char *cl = l; + const char *cs = s; + + /* a zero length needle should just return the haystack */ + if (l_len == 0) + return (void *)cl; + + /* "s" must be smaller or equal to "l" */ + if (l_len < s_len) + return NULL; + + /* special case where s_len == 1 */ + if (s_len == 1) + return memchr(l, *cs, l_len); + + /* the last position where its possible to find "s" in "l" */ + last = cl + l_len - s_len; + + for (cur = cl; cur <= last; cur++) + if (cur[0] == cs[0] && memcmp(cur, cs, s_len) == 0) + return (void *)cur; + + return NULL; +} +#endif /* !HAVE_MEMMEM */ +#if !HAVE_MEMRCHR +/* + * Copyright (c) 2007 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include <string.h> + +/* + * Reverse memchr() + * Find the last occurrence of 'c' in the buffer 's' of size 'n'. + */ +void * +memrchr(const void *s, int c, size_t n) +{ + const unsigned char *cp; + + if (n != 0) { + cp = (unsigned char *)s + n; + do { + if (*(--cp) == (unsigned char)c) + return((void *)cp); + } while (--n != 0); + } + return(NULL); +} +#endif /* !HAVE_MEMRCHR */ +#if !HAVE_MKFIFOAT +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +int +mkfifoat(int fd, const char *path, mode_t mode) +{ + int er, curfd = -1, newfd = -1; + + /* Get our current directory then switch to the given one. */ + + if (fd != AT_FDCWD) { + if ((curfd = open(".", O_RDONLY | O_DIRECTORY, 0)) == -1) + return -1; + if (fchdir(fd) == -1) + goto out; + } + + if ((newfd = mkfifo(path, mode)) == -1) + goto out; + + /* This leaves the fifo if it fails. */ + + if (curfd != -1 && fchdir(curfd) == -1) + goto out; + if (curfd != -1) + close(curfd); + + return newfd; +out: + /* Ignore errors in close(2). */ + + er = errno; + if (curfd != -1) + fchdir(curfd); + if (curfd != -1) + close(curfd); + if (newfd != -1) + close(newfd); + errno = er; + return -1; +} +#endif /* !HAVE_MKFIFOAT */ +#if !HAVE_MKNODAT +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +int +mknodat(int fd, const char *path, mode_t mode, dev_t dev) +{ + int er, curfd = -1, newfd = -1; + + /* Get our current directory then switch to the given one. */ + + if (fd != AT_FDCWD) { + if ((curfd = open(".", O_RDONLY | O_DIRECTORY, 0)) == -1) + return -1; + if (fchdir(fd) == -1) + goto out; + } + + if ((newfd = mknod(path, mode, dev)) == -1) + goto out; + + /* This leaves the node if it fails. */ + + if (curfd != -1 && fchdir(curfd) == -1) + goto out; + if (curfd != -1) + close(curfd); + + return newfd; +out: + + /* Ignore errors in close(2). */ + + er = errno; + if (curfd != -1) + fchdir(curfd); + if (curfd != -1) + close(curfd); + if (newfd != -1) + close(newfd); + errno = er; + return -1; +} +#endif /* !HAVE_MKNODAT */ +#if !HAVE_READPASSPHRASE +/* + * Original: readpassphrase.c in OpenSSH portable + */ +/* + * Copyright (c) 2000-2002, 2007, 2010 + * Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <paths.h> +#include <pwd.h> +#include <signal.h> +#include <string.h> +#include <termios.h> +#include <unistd.h> + +#if !defined(_NSIG) && defined(NSIG) +# define _NSIG NSIG +#endif + +static volatile sig_atomic_t readpassphrase_signo[_NSIG]; + +static void +readpassphrase_handler(int s) +{ + + readpassphrase_signo[s] = 1; +} + +char * +readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) +{ + ssize_t nr; + int input, output, save_errno, i, need_restart; + char ch, *p, *end; + struct termios term, oterm; + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; +/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */ +#ifndef TCSASOFT + const int tcasoft = 0; +#else + const int tcasoft = TCSASOFT; +#endif + + /* I suppose we could alloc on demand in this case (XXX). */ + if (bufsiz == 0) { + errno = EINVAL; + return(NULL); + } + +restart: + for (i = 0; i < _NSIG; i++) + readpassphrase_signo[i] = 0; + nr = -1; + save_errno = 0; + need_restart = 0; + /* + * Read and write to /dev/tty if available. If not, read from + * stdin and write to stderr unless a tty is required. + */ + if ((flags & RPP_STDIN) || + (input = output = open(_PATH_TTY, O_RDWR)) == -1) { + if (flags & RPP_REQUIRE_TTY) { + errno = ENOTTY; + return(NULL); + } + input = STDIN_FILENO; + output = STDERR_FILENO; + } + + /* + * Turn off echo if possible. + * If we are using a tty but are not the foreground pgrp this will + * generate SIGTTOU, so do it *before* installing the signal handlers. + */ + if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) { + memcpy(&term, &oterm, sizeof(term)); + if (!(flags & RPP_ECHO_ON)) + term.c_lflag &= ~(ECHO | ECHONL); +#ifdef VSTATUS + if (term.c_cc[VSTATUS] != _POSIX_VDISABLE) + term.c_cc[VSTATUS] = _POSIX_VDISABLE; +#endif + (void)tcsetattr(input, TCSAFLUSH|tcasoft, &term); + } else { + memset(&term, 0, sizeof(term)); + term.c_lflag |= ECHO; + memset(&oterm, 0, sizeof(oterm)); + oterm.c_lflag |= ECHO; + } + + /* + * Catch signals that would otherwise cause the user to end + * up with echo turned off in the shell. Don't worry about + * things like SIGXCPU and SIGVTALRM for now. + */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; /* don't restart system calls */ + sa.sa_handler = readpassphrase_handler; + (void)sigaction(SIGALRM, &sa, &savealrm); + (void)sigaction(SIGHUP, &sa, &savehup); + (void)sigaction(SIGINT, &sa, &saveint); + (void)sigaction(SIGPIPE, &sa, &savepipe); + (void)sigaction(SIGQUIT, &sa, &savequit); + (void)sigaction(SIGTERM, &sa, &saveterm); + (void)sigaction(SIGTSTP, &sa, &savetstp); + (void)sigaction(SIGTTIN, &sa, &savettin); + (void)sigaction(SIGTTOU, &sa, &savettou); + + if (!(flags & RPP_STDIN)) + (void)write(output, prompt, strlen(prompt)); + end = buf + bufsiz - 1; + p = buf; + while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + if ((flags & RPP_SEVENBIT)) + ch &= 0x7f; + if (isalpha((unsigned char)ch)) { + if ((flags & RPP_FORCELOWER)) + ch = (char)tolower((unsigned char)ch); + if ((flags & RPP_FORCEUPPER)) + ch = (char)toupper((unsigned char)ch); + } + *p++ = ch; + } + } + *p = '\0'; + save_errno = errno; + if (!(term.c_lflag & ECHO)) + (void)write(output, "\n", 1); + + /* Restore old terminal settings and signals. */ + if (memcmp(&term, &oterm, sizeof(term)) != 0) { + const int sigttou = readpassphrase_signo[SIGTTOU]; + + /* Ignore SIGTTOU generated when we are not the fg pgrp. */ + while (tcsetattr(input, TCSAFLUSH|tcasoft, &oterm) == -1 && + errno == EINTR && !readpassphrase_signo[SIGTTOU]) + continue; + readpassphrase_signo[SIGTTOU] = sigttou; + } + (void)sigaction(SIGALRM, &savealrm, NULL); + (void)sigaction(SIGHUP, &savehup, NULL); + (void)sigaction(SIGINT, &saveint, NULL); + (void)sigaction(SIGQUIT, &savequit, NULL); + (void)sigaction(SIGPIPE, &savepipe, NULL); + (void)sigaction(SIGTERM, &saveterm, NULL); + (void)sigaction(SIGTSTP, &savetstp, NULL); + (void)sigaction(SIGTTIN, &savettin, NULL); + (void)sigaction(SIGTTOU, &savettou, NULL); + if (input != STDIN_FILENO) + (void)close(input); + + /* + * If we were interrupted by a signal, resend it to ourselves + * now that we have restored the signal handlers. + */ + for (i = 0; i < _NSIG; i++) { + if (readpassphrase_signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + need_restart = 1; + } + } + } + if (need_restart) + goto restart; + + if (save_errno) + errno = save_errno; + return(nr == -1 ? NULL : buf); +} +#endif /* !HAVE_READPASSPHRASE */ +#if !HAVE_REALLOCARRAY +/* + * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} +#endif /* !HAVE_REALLOCARRAY */ +#if !HAVE_RECALLOCARRAY +/* + * Copyright (c) 2008, 2017 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* OPENBSD ORIGINAL: lib/libc/stdlib/recallocarray.c */ + +#include <errno.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void * +recallocarray(void *ptr, size_t oldnmemb, size_t newnmemb, size_t size) +{ + size_t oldsize, newsize; + void *newptr; + + if (ptr == NULL) + return calloc(newnmemb, size); + + if ((newnmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + newnmemb > 0 && SIZE_MAX / newnmemb < size) { + errno = ENOMEM; + return NULL; + } + newsize = newnmemb * size; + + if ((oldnmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + oldnmemb > 0 && SIZE_MAX / oldnmemb < size) { + errno = EINVAL; + return NULL; + } + oldsize = oldnmemb * size; + + /* + * Don't bother too much if we're shrinking just a bit, + * we do not shrink for series of small steps, oh well. + */ + if (newsize <= oldsize) { + size_t d = oldsize - newsize; + + if (d < oldsize / 2 && d < (size_t)getpagesize()) { + memset((char *)ptr + newsize, 0, d); + return ptr; + } + } + + newptr = malloc(newsize); + if (newptr == NULL) + return NULL; + + if (newsize > oldsize) { + memcpy(newptr, ptr, oldsize); + memset((char *)newptr + oldsize, 0, newsize - oldsize); + } else + memcpy(newptr, ptr, newsize); + + explicit_bzero(ptr, oldsize); + free(ptr); + + return newptr; +} +#endif /* !HAVE_RECALLOCARRAY */ +#if !HAVE_SETRESGID +/* + * Copyright (c) 2004, 2005 Darren Tucker (dtucker at zip com au). + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <unistd.h> + +int +setresgid(gid_t rgid, gid_t egid, gid_t sgid) +{ + /* this is the only configuration tested */ + + if (rgid != egid || egid != sgid) + return -1; + + if (setregid(rgid, egid) == -1) + return -1; + + return 0; +} +#endif /* !HAVE_SETRESGID */ +#if !HAVE_SETRESUID +/* + * Copyright (c) 2004, 2005 Darren Tucker (dtucker at zip com au). + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <errno.h> +#include <unistd.h> + +int +setresuid(uid_t ruid, uid_t euid, uid_t suid) +{ + uid_t ouid; + int ret = -1; + + /* Allow only the tested configuration. */ + + if (ruid != euid || euid != suid) { + errno = ENOSYS; + return -1; + } + ouid = getuid(); + + if ((ret = setreuid(euid, euid)) == -1) + return -1; + + /* + * When real, effective and saved uids are the same and we have + * changed uids, sanity check that we cannot restore the old uid. + */ + + if (ruid == euid && euid == suid && ouid != ruid && + setuid(ouid) != -1 && seteuid(ouid) != -1) { + errno = EINVAL; + return -1; + } + + /* + * Finally, check that the real and effective uids are what we + * expect. + */ + if (getuid() != ruid || geteuid() != euid) { + errno = EACCES; + return -1; + } + + return ret; +} +#endif /* !HAVE_SETRESUID */ +#if !HAVE_SHA2 +/* $OpenBSD$ */ + +/* + * FILE: sha2.c + * AUTHOR: Aaron D. Gifford <me@aarongifford.com> + * + * Copyright (c) 2000-2001, Aaron D. Gifford + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $From: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $ + */ + +/* OPENBSD ORIGINAL: lib/libc/hash/sha2.c */ + +/* no-op out, similar to DEF_WEAK but only needed here */ +#define MAKE_CLONE(x, y) void __ssh_compat_make_clone_##x_##y(void) + +#include <sys/types.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#ifndef MINIMUM +# define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef BYTE_ORDER +# if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN) +# error Confusion in endian macros. +# endif +# if !defined(__BYTE_ORDER__) +# error Byte order macro not found. +# endif +# if !defined(__ORDER_LITTLE_ENDIAN__) || !defined(__ORDER_BIG_ENDIAN__) +# error Little/big endian macros not found. +# endif +# define BYTE_ORDER __BYTE_ORDER__ +# define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# define BIG_ENDIAN __ORDER_BIG_ENDIAN__ +#endif /*!BYTE_ORDER*/ + +/* + * UNROLLED TRANSFORM LOOP NOTE: + * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform + * loop version for the hash transform rounds (defined using macros + * later in this file). Either define on the command line, for example: + * + * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c + * + * or define below: + * + * #define SHA2_UNROLL_TRANSFORM + * + */ +#if defined(__amd64__) || defined(__i386__) +#define SHA2_UNROLL_TRANSFORM +#endif + +/*** SHA-224/256/384/512 Machine Architecture Definitions *****************/ +/* + * BYTE_ORDER NOTE: + * + * Please make sure that your system defines BYTE_ORDER. If your + * architecture is little-endian, make sure it also defines + * LITTLE_ENDIAN and that the two (BYTE_ORDER and LITTLE_ENDIAN) are + * equivalent. + * + * If your system does not define the above, then you can do so by + * hand like this: + * + * #define LITTLE_ENDIAN 1234 + * #define BIG_ENDIAN 4321 + * + * And for little-endian machines, add: + * + * #define BYTE_ORDER LITTLE_ENDIAN + * + * Or for big-endian machines: + * + * #define BYTE_ORDER BIG_ENDIAN + * + * The FreeBSD machine this was written on defines BYTE_ORDER + * appropriately by including <sys/types.h> (which in turn includes + * <machine/endian.h> where the appropriate definitions are actually + * made). + */ +#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN) +#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN +#endif + + +/*** SHA-224/256/384/512 Various Length Definitions ***********************/ +/* NOTE: Most of these are in sha2.h */ +#define SHA224_SHORT_BLOCK_LENGTH (SHA224_BLOCK_LENGTH - 8) +#define SHA256_SHORT_BLOCK_LENGTH (SHA256_BLOCK_LENGTH - 8) +#define SHA384_SHORT_BLOCK_LENGTH (SHA384_BLOCK_LENGTH - 16) +#define SHA512_SHORT_BLOCK_LENGTH (SHA512_BLOCK_LENGTH - 16) + +/*** ENDIAN SPECIFIC COPY MACROS **************************************/ +#define BE_8_TO_32(dst, cp) do { \ + (dst) = (uint32_t)(cp)[3] | ((uint32_t)(cp)[2] << 8) | \ + ((uint32_t)(cp)[1] << 16) | ((uint32_t)(cp)[0] << 24); \ +} while(0) + +#define BE_8_TO_64(dst, cp) do { \ + (dst) = (uint64_t)(cp)[7] | ((uint64_t)(cp)[6] << 8) | \ + ((uint64_t)(cp)[5] << 16) | ((uint64_t)(cp)[4] << 24) | \ + ((uint64_t)(cp)[3] << 32) | ((uint64_t)(cp)[2] << 40) | \ + ((uint64_t)(cp)[1] << 48) | ((uint64_t)(cp)[0] << 56); \ +} while (0) + +#define BE_64_TO_8(cp, src) do { \ + (cp)[0] = (src) >> 56; \ + (cp)[1] = (src) >> 48; \ + (cp)[2] = (src) >> 40; \ + (cp)[3] = (src) >> 32; \ + (cp)[4] = (src) >> 24; \ + (cp)[5] = (src) >> 16; \ + (cp)[6] = (src) >> 8; \ + (cp)[7] = (src); \ +} while (0) + +#define BE_32_TO_8(cp, src) do { \ + (cp)[0] = (src) >> 24; \ + (cp)[1] = (src) >> 16; \ + (cp)[2] = (src) >> 8; \ + (cp)[3] = (src); \ +} while (0) + +/* + * Macro for incrementally adding the unsigned 64-bit integer n to the + * unsigned 128-bit integer (represented using a two-element array of + * 64-bit words): + */ +#define ADDINC128(w,n) do { \ + (w)[0] += (uint64_t)(n); \ + if ((w)[0] < (n)) { \ + (w)[1]++; \ + } \ +} while (0) + +/*** THE SIX LOGICAL FUNCTIONS ****************************************/ +/* + * Bit shifting and rotation (used by the six SHA-XYZ logical functions: + * + * NOTE: The naming of R and S appears backwards here (R is a SHIFT and + * S is a ROTATION) because the SHA-224/256/384/512 description document + * (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this + * same "backwards" definition. + */ +/* Shift-right (used in SHA-224, SHA-256, SHA-384, and SHA-512): */ +#define R(b,x) ((x) >> (b)) +/* 32-bit Rotate-right (used in SHA-224 and SHA-256): */ +#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) +/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ +#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) + +/* Two of six logical functions used in SHA-224, SHA-256, SHA-384, and SHA-512: */ +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +/* Four of six logical functions used in SHA-224 and SHA-256: */ +#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) +#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) +#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) +#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) + +/* Four of six logical functions used in SHA-384 and SHA-512: */ +#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) +#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) +#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x))) +#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x))) + + +/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ +/* Hash constant words K for SHA-224 and SHA-256: */ +static const uint32_t K256[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* Initial hash value H for SHA-256: */ +static const uint32_t sha256_initial_hash_value[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL +}; + +/* Hash constant words K for SHA-384 and SHA-512: */ +static const uint64_t K512[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +/* Initial hash value H for SHA-512 */ +static const uint64_t sha512_initial_hash_value[8] = { + 0x6a09e667f3bcc908ULL, + 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, + 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, + 0x5be0cd19137e2179ULL +}; + +/* Initial hash value H for SHA-384 */ +static const uint64_t sha384_initial_hash_value[8] = { + 0xcbbb9d5dc1059ed8ULL, + 0x629a292a367cd507ULL, + 0x9159015a3070dd17ULL, + 0x152fecd8f70e5939ULL, + 0x67332667ffc00b31ULL, + 0x8eb44a8768581511ULL, + 0xdb0c2e0d64f98fa7ULL, + 0x47b5481dbefa4fa4ULL +}; + +/*** SHA-256: *********************************************************/ +void +SHA256Init(SHA2_CTX *context) +{ + memcpy(context->state.st32, sha256_initial_hash_value, + sizeof(sha256_initial_hash_value)); + memset(context->buffer, 0, sizeof(context->buffer)); + context->bitcount[0] = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-256 round macros: */ + +#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) do { \ + BE_8_TO_32(W256[j], data); \ + data += 4; \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + W256[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +#define ROUND256(a,b,c,d,e,f,g,h) do { \ + s0 = W256[(j+1)&0x0f]; \ + s0 = sigma0_256(s0); \ + s1 = W256[(j+14)&0x0f]; \ + s1 = sigma1_256(s1); \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + \ + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +void +SHA256Transform(uint32_t state[8], const uint8_t data[SHA256_BLOCK_LENGTH]) +{ + uint32_t a, b, c, d, e, f, g, h, s0, s1; + uint32_t T1, W256[16]; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + j = 0; + do { + /* Rounds 0 to 15 (unrolled): */ + ROUND256_0_TO_15(a,b,c,d,e,f,g,h); + ROUND256_0_TO_15(h,a,b,c,d,e,f,g); + ROUND256_0_TO_15(g,h,a,b,c,d,e,f); + ROUND256_0_TO_15(f,g,h,a,b,c,d,e); + ROUND256_0_TO_15(e,f,g,h,a,b,c,d); + ROUND256_0_TO_15(d,e,f,g,h,a,b,c); + ROUND256_0_TO_15(c,d,e,f,g,h,a,b); + ROUND256_0_TO_15(b,c,d,e,f,g,h,a); + } while (j < 16); + + /* Now for the remaining rounds up to 63: */ + do { + ROUND256(a,b,c,d,e,f,g,h); + ROUND256(h,a,b,c,d,e,f,g); + ROUND256(g,h,a,b,c,d,e,f); + ROUND256(f,g,h,a,b,c,d,e); + ROUND256(e,f,g,h,a,b,c,d); + ROUND256(d,e,f,g,h,a,b,c); + ROUND256(c,d,e,f,g,h,a,b); + ROUND256(b,c,d,e,f,g,h,a); + } while (j < 64); + + /* Compute the current intermediate hash value */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} + +#else /* SHA2_UNROLL_TRANSFORM */ + +void +SHA256Transform(uint32_t state[8], const uint8_t data[SHA256_BLOCK_LENGTH]) +{ + uint32_t a, b, c, d, e, f, g, h, s0, s1; + uint32_t T1, T2, W256[16]; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + j = 0; + do { + BE_8_TO_32(W256[j], data); + data += 4; + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do { + /* Part of the message block expansion: */ + s0 = W256[(j+1)&0x0f]; + s0 = sigma0_256(s0); + s1 = W256[(j+14)&0x0f]; + s1 = sigma1_256(s1); + + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 64); + + /* Compute the current intermediate hash value */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} + +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +SHA256Update(SHA2_CTX *context, const uint8_t *data, size_t len) +{ + uint64_t freespace, usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount[0] >> 3) % SHA256_BLOCK_LENGTH; + if (usedspace > 0) { + /* Calculate how much free space is available in the buffer */ + freespace = SHA256_BLOCK_LENGTH - usedspace; + + if (len >= freespace) { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + context->bitcount[0] += freespace << 3; + len -= freespace; + data += freespace; + SHA256Transform(context->state.st32, context->buffer); + } else { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + context->bitcount[0] += (uint64_t)len << 3; + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= SHA256_BLOCK_LENGTH) { + /* Process as many complete blocks as we can */ + SHA256Transform(context->state.st32, data); + context->bitcount[0] += SHA256_BLOCK_LENGTH << 3; + len -= SHA256_BLOCK_LENGTH; + data += SHA256_BLOCK_LENGTH; + } + if (len > 0) { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + context->bitcount[0] += len << 3; + } + /* Clean up: */ + usedspace = freespace = 0; +} + +void +SHA256Pad(SHA2_CTX *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount[0] >> 3) % SHA256_BLOCK_LENGTH; + if (usedspace > 0) { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, + SHA256_SHORT_BLOCK_LENGTH - usedspace); + } else { + if (usedspace < SHA256_BLOCK_LENGTH) { + memset(&context->buffer[usedspace], 0, + SHA256_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA256Transform(context->state.st32, context->buffer); + + /* Prepare for last transform: */ + memset(context->buffer, 0, SHA256_SHORT_BLOCK_LENGTH); + } + } else { + /* Set-up for the last transform: */ + memset(context->buffer, 0, SHA256_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Store the length of input data (in bits) in big endian format: */ + BE_64_TO_8(&context->buffer[SHA256_SHORT_BLOCK_LENGTH], + context->bitcount[0]); + + /* Final transform: */ + SHA256Transform(context->state.st32, context->buffer); + + /* Clean up: */ + usedspace = 0; +} + +void +SHA256Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA2_CTX *context) +{ + SHA256Pad(context); + +#if BYTE_ORDER == LITTLE_ENDIAN + int i; + + /* Convert TO host byte order */ + for (i = 0; i < 8; i++) + BE_32_TO_8(digest + i * 4, context->state.st32[i]); +#else + memcpy(digest, context->state.st32, SHA256_DIGEST_LENGTH); +#endif + explicit_bzero(context, sizeof(*context)); +} + + +/*** SHA-512: *********************************************************/ +void +SHA512Init(SHA2_CTX *context) +{ + memcpy(context->state.st64, sha512_initial_hash_value, + sizeof(sha512_initial_hash_value)); + memset(context->buffer, 0, sizeof(context->buffer)); + context->bitcount[0] = context->bitcount[1] = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-512 round macros: */ + +#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) do { \ + BE_8_TO_64(W512[j], data); \ + data += 8; \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + W512[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + + +#define ROUND512(a,b,c,d,e,f,g,h) do { \ + s0 = W512[(j+1)&0x0f]; \ + s0 = sigma0_512(s0); \ + s1 = W512[(j+14)&0x0f]; \ + s1 = sigma1_512(s1); \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + \ + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +void +SHA512Transform(uint64_t state[8], const uint8_t data[SHA512_BLOCK_LENGTH]) +{ + uint64_t a, b, c, d, e, f, g, h, s0, s1; + uint64_t T1, W512[16]; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + j = 0; + do { + /* Rounds 0 to 15 (unrolled): */ + ROUND512_0_TO_15(a,b,c,d,e,f,g,h); + ROUND512_0_TO_15(h,a,b,c,d,e,f,g); + ROUND512_0_TO_15(g,h,a,b,c,d,e,f); + ROUND512_0_TO_15(f,g,h,a,b,c,d,e); + ROUND512_0_TO_15(e,f,g,h,a,b,c,d); + ROUND512_0_TO_15(d,e,f,g,h,a,b,c); + ROUND512_0_TO_15(c,d,e,f,g,h,a,b); + ROUND512_0_TO_15(b,c,d,e,f,g,h,a); + } while (j < 16); + + /* Now for the remaining rounds up to 79: */ + do { + ROUND512(a,b,c,d,e,f,g,h); + ROUND512(h,a,b,c,d,e,f,g); + ROUND512(g,h,a,b,c,d,e,f); + ROUND512(f,g,h,a,b,c,d,e); + ROUND512(e,f,g,h,a,b,c,d); + ROUND512(d,e,f,g,h,a,b,c); + ROUND512(c,d,e,f,g,h,a,b); + ROUND512(b,c,d,e,f,g,h,a); + } while (j < 80); + + /* Compute the current intermediate hash value */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} + +#else /* SHA2_UNROLL_TRANSFORM */ + +void +SHA512Transform(uint64_t state[8], const uint8_t data[SHA512_BLOCK_LENGTH]) +{ + uint64_t a, b, c, d, e, f, g, h, s0, s1; + uint64_t T1, T2, W512[16]; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + j = 0; + do { + BE_8_TO_64(W512[j], data); + data += 8; + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j]; + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do { + /* Part of the message block expansion: */ + s0 = W512[(j+1)&0x0f]; + s0 = sigma0_512(s0); + s1 = W512[(j+14)&0x0f]; + s1 = sigma1_512(s1); + + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 80); + + /* Compute the current intermediate hash value */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} + +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +SHA512Update(SHA2_CTX *context, const uint8_t *data, size_t len) +{ + size_t freespace, usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; + if (usedspace > 0) { + /* Calculate how much free space is available in the buffer */ + freespace = SHA512_BLOCK_LENGTH - usedspace; + + if (len >= freespace) { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + ADDINC128(context->bitcount, freespace << 3); + len -= freespace; + data += freespace; + SHA512Transform(context->state.st64, context->buffer); + } else { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + ADDINC128(context->bitcount, len << 3); + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= SHA512_BLOCK_LENGTH) { + /* Process as many complete blocks as we can */ + SHA512Transform(context->state.st64, data); + ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3); + len -= SHA512_BLOCK_LENGTH; + data += SHA512_BLOCK_LENGTH; + } + if (len > 0) { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + ADDINC128(context->bitcount, len << 3); + } + /* Clean up: */ + usedspace = freespace = 0; +} + +void +SHA512Pad(SHA2_CTX *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; + if (usedspace > 0) { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, SHA512_SHORT_BLOCK_LENGTH - usedspace); + } else { + if (usedspace < SHA512_BLOCK_LENGTH) { + memset(&context->buffer[usedspace], 0, SHA512_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA512Transform(context->state.st64, context->buffer); + + /* And set-up for the last transform: */ + memset(context->buffer, 0, SHA512_BLOCK_LENGTH - 2); + } + } else { + /* Prepare for final transform: */ + memset(context->buffer, 0, SHA512_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Store the length of input data (in bits) in big endian format: */ + BE_64_TO_8(&context->buffer[SHA512_SHORT_BLOCK_LENGTH], + context->bitcount[1]); + BE_64_TO_8(&context->buffer[SHA512_SHORT_BLOCK_LENGTH + 8], + context->bitcount[0]); + + /* Final transform: */ + SHA512Transform(context->state.st64, context->buffer); + + /* Clean up: */ + usedspace = 0; +} + +void +SHA512Final(uint8_t digest[SHA512_DIGEST_LENGTH], SHA2_CTX *context) +{ + SHA512Pad(context); + +#if BYTE_ORDER == LITTLE_ENDIAN + int i; + + /* Convert TO host byte order */ + for (i = 0; i < 8; i++) + BE_64_TO_8(digest + i * 8, context->state.st64[i]); +#else + memcpy(digest, context->state.st64, SHA512_DIGEST_LENGTH); +#endif + explicit_bzero(context, sizeof(*context)); +} + +/*** SHA-384: *********************************************************/ +void +SHA384Init(SHA2_CTX *context) +{ + memcpy(context->state.st64, sha384_initial_hash_value, + sizeof(sha384_initial_hash_value)); + memset(context->buffer, 0, sizeof(context->buffer)); + context->bitcount[0] = context->bitcount[1] = 0; +} + +MAKE_CLONE(SHA384Transform, SHA512Transform); +MAKE_CLONE(SHA384Update, SHA512Update); +MAKE_CLONE(SHA384Pad, SHA512Pad); + +/* Equivalent of MAKE_CLONE (which is a no-op) for SHA384 funcs */ +void +SHA384Transform(uint64_t state[8], const uint8_t data[SHA512_BLOCK_LENGTH]) +{ + SHA512Transform(state, data); +} + +void +SHA384Update(SHA2_CTX *context, const uint8_t *data, size_t len) +{ + SHA512Update(context, data, len); +} + +void +SHA384Pad(SHA2_CTX *context) +{ + SHA512Pad(context); +} + +void +SHA384Final(uint8_t digest[SHA384_DIGEST_LENGTH], SHA2_CTX *context) +{ + SHA384Pad(context); + +#if BYTE_ORDER == LITTLE_ENDIAN + int i; + + /* Convert TO host byte order */ + for (i = 0; i < 6; i++) + BE_64_TO_8(digest + i * 8, context->state.st64[i]); +#else + memcpy(digest, context->state.st64, SHA384_DIGEST_LENGTH); +#endif + /* Zero out state data */ + explicit_bzero(context, sizeof(*context)); +} + +char * +SHA256End(SHA2_CTX *ctx, char *buf) +{ + int i; + uint8_t digest[SHA256_DIGEST_LENGTH]; + static const char hex[] = "0123456789abcdef"; + + if (buf == NULL && (buf = malloc(SHA256_DIGEST_STRING_LENGTH)) == NULL) + return (NULL); + + SHA256Final(digest, ctx); + for (i = 0; i < SHA256_DIGEST_LENGTH; i++) { + buf[i + i] = hex[digest[i] >> 4]; + buf[i + i + 1] = hex[digest[i] & 0x0f]; + } + buf[i + i] = '\0'; + explicit_bzero(digest, sizeof(digest)); + return (buf); +} + +char * +SHA384End(SHA2_CTX *ctx, char *buf) +{ + int i; + uint8_t digest[SHA384_DIGEST_LENGTH]; + static const char hex[] = "0123456789abcdef"; + + if (buf == NULL && (buf = malloc(SHA384_DIGEST_STRING_LENGTH)) == NULL) + return (NULL); + + SHA384Final(digest, ctx); + for (i = 0; i < SHA384_DIGEST_LENGTH; i++) { + buf[i + i] = hex[digest[i] >> 4]; + buf[i + i + 1] = hex[digest[i] & 0x0f]; + } + buf[i + i] = '\0'; + explicit_bzero(digest, sizeof(digest)); + return (buf); +} + +char * +SHA512End(SHA2_CTX *ctx, char *buf) +{ + int i; + uint8_t digest[SHA512_DIGEST_LENGTH]; + static const char hex[] = "0123456789abcdef"; + + if (buf == NULL && (buf = malloc(SHA512_DIGEST_STRING_LENGTH)) == NULL) + return (NULL); + + SHA512Final(digest, ctx); + for (i = 0; i < SHA512_DIGEST_LENGTH; i++) { + buf[i + i] = hex[digest[i] >> 4]; + buf[i + i + 1] = hex[digest[i] & 0x0f]; + } + buf[i + i] = '\0'; + explicit_bzero(digest, sizeof(digest)); + return (buf); +} + +char * +SHA256FileChunk(const char *filename, char *buf, off_t off, off_t len) +{ + struct stat sb; + u_char buffer[BUFSIZ]; + SHA2_CTX ctx; + int fd, save_errno; + ssize_t nr; + + SHA256Init(&ctx); + + if ((fd = open(filename, O_RDONLY)) == -1) + return (NULL); + if (len == 0) { + if (fstat(fd, &sb) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + len = sb.st_size; + } + if (off > 0 && lseek(fd, off, SEEK_SET) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + + while ((nr = read(fd, buffer, MINIMUM(sizeof(buffer), (size_t)len))) > 0) { + SHA256Update(&ctx, buffer, nr); + if (len > 0 && (len -= nr) == 0) + break; + } + + save_errno = errno; + close(fd); + errno = save_errno; + return (nr == -1 ? NULL : SHA256End(&ctx, buf)); +} + +char * +SHA256File(const char *filename, char *buf) +{ + return (SHA256FileChunk(filename, buf, 0, 0)); +} + +char * +SHA384FileChunk(const char *filename, char *buf, off_t off, off_t len) +{ + struct stat sb; + u_char buffer[BUFSIZ]; + SHA2_CTX ctx; + int fd, save_errno; + ssize_t nr; + + SHA384Init(&ctx); + + if ((fd = open(filename, O_RDONLY)) == -1) + return (NULL); + if (len == 0) { + if (fstat(fd, &sb) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + len = sb.st_size; + } + if (off > 0 && lseek(fd, off, SEEK_SET) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + + while ((nr = read(fd, buffer, MINIMUM(sizeof(buffer), (size_t)len))) > 0) { + SHA384Update(&ctx, buffer, nr); + if (len > 0 && (len -= nr) == 0) + break; + } + + save_errno = errno; + close(fd); + errno = save_errno; + return (nr == -1 ? NULL : SHA384End(&ctx, buf)); +} + +char * +SHA384File(const char *filename, char *buf) +{ + return (SHA384FileChunk(filename, buf, 0, 0)); +} + +char * +SHA512FileChunk(const char *filename, char *buf, off_t off, off_t len) +{ + struct stat sb; + u_char buffer[BUFSIZ]; + SHA2_CTX ctx; + int fd, save_errno; + ssize_t nr; + + SHA512Init(&ctx); + + if ((fd = open(filename, O_RDONLY)) == -1) + return (NULL); + if (len == 0) { + if (fstat(fd, &sb) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + len = sb.st_size; + } + if (off > 0 && lseek(fd, off, SEEK_SET) == -1) { + save_errno = errno; + close(fd); + errno = save_errno; + return (NULL); + } + + while ((nr = read(fd, buffer, MINIMUM(sizeof(buffer), (size_t)len))) > 0) { + SHA512Update(&ctx, buffer, nr); + if (len > 0 && (len -= nr) == 0) + break; + } + + save_errno = errno; + close(fd); + errno = save_errno; + return (nr == -1 ? NULL : SHA512End(&ctx, buf)); +} + +char * +SHA512File(const char *filename, char *buf) +{ + return (SHA512FileChunk(filename, buf, 0, 0)); +} + +char * +SHA256Data(const u_char *data, size_t len, char *buf) +{ + SHA2_CTX ctx; + + SHA256Init(&ctx); + SHA256Update(&ctx, data, len); + return (SHA256End(&ctx, buf)); +} + +char * +SHA384Data(const u_char *data, size_t len, char *buf) +{ + SHA2_CTX ctx; + + SHA384Init(&ctx); + SHA384Update(&ctx, data, len); + return (SHA384End(&ctx, buf)); +} + +char * +SHA512Data(const u_char *data, size_t len, char *buf) +{ + SHA2_CTX ctx; + + SHA512Init(&ctx); + SHA512Update(&ctx, data, len); + return (SHA512End(&ctx, buf)); +} +#endif /* !HAVE_SHA2 */ +#if !HAVE_STRLCAT +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} +#endif /* !HAVE_STRLCAT */ +#if !HAVE_STRLCPY +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} +#endif /* !HAVE_STRLCPY */ +#if !HAVE_STRNDUP +/* $OpenBSD$ */ +/* + * Copyright (c) 2010 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +char * +strndup(const char *str, size_t maxlen) +{ + char *copy; + size_t len; + + len = strnlen(str, maxlen); + copy = malloc(len + 1); + if (copy != NULL) { + (void)memcpy(copy, str, len); + copy[len] = '\0'; + } + + return copy; +} +#endif /* !HAVE_STRNDUP */ +#if !HAVE_STRNLEN +/* $OpenBSD$ */ + +/* + * Copyright (c) 2010 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +size_t +strnlen(const char *str, size_t maxlen) +{ + const char *cp; + + for (cp = str; maxlen != 0 && *cp != '\0'; cp++, maxlen--) + ; + + return (size_t)(cp - str); +} +#endif /* !HAVE_STRNLEN */ +#if !HAVE_STRTONUM +/* + * Copyright (c) 2004 Ted Unangst and Todd Miller + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + +#define INVALID 1 +#define TOOSMALL 2 +#define TOOLARGE 3 + +long long +strtonum(const char *numstr, long long minval, long long maxval, + const char **errstrp) +{ + long long ll = 0; + int error = 0; + char *ep; + struct errval { + const char *errstr; + int err; + } ev[4] = { + { NULL, 0 }, + { "invalid", EINVAL }, + { "too small", ERANGE }, + { "too large", ERANGE }, + }; + + ev[0].err = errno; + errno = 0; + if (minval > maxval) { + error = INVALID; + } else { + ll = strtoll(numstr, &ep, 10); + if (numstr == ep || *ep != '\0') + error = INVALID; + else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) + error = TOOSMALL; + else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) + error = TOOLARGE; + } + if (errstrp != NULL) + *errstrp = ev[error].errstr; + errno = ev[error].err; + if (error) + ll = 0; + + return (ll); +} +#endif /* !HAVE_STRTONUM */ diff --git a/compats.o b/compats.o Binary files differ. diff --git a/config.h b/config.h @@ -0,0 +1,1621 @@ +#ifndef OCONFIGURE_CONFIG_H +#define OCONFIGURE_CONFIG_H + +#ifdef __cplusplus +# error "Do not use C++: this is a C application." +#endif +#if !defined(__GNUC__) || (__GNUC__ < 4) +# define __attribute__(x) +#endif +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* memmem, memrchr, setresuid... */ +# define _DEFAULT_SOURCE /* le32toh, crypt, ... */ +#endif +#if defined(__NetBSD__) +# define _OPENBSD_SOURCE /* reallocarray, etc. */ +#endif +#if defined(__sun) +# ifndef _XOPEN_SOURCE /* SunOS already defines */ +# define _XOPEN_SOURCE /* XPGx */ +# endif +# define _XOPEN_SOURCE_EXTENDED 1 /* XPG4v2 */ +# ifndef __EXTENSIONS__ /* SunOS already defines */ +# define __EXTENSIONS__ /* reallocarray, etc. */ +# endif +#endif +#if !defined(__BEGIN_DECLS) +# define __BEGIN_DECLS +#endif +#if !defined(__END_DECLS) +# define __END_DECLS +#endif + +#include <sys/types.h> /* size_t, mode_t, dev_t */ + +#include <stdint.h> /* C99 [u]int[nn]_t types */ + +#include <stdarg.h> /* err(3) */ + +#define INFTIM (-1) /* poll.h */ + +/* + * Results of configuration feature-testing. + */ +#define HAVE_ARC4RANDOM 0 +#define HAVE_B64_NTOP 1 +#define HAVE_CAPSICUM 0 +#define HAVE_CRYPT 1 +#define HAVE_ENDIAN_H 1 +#define HAVE_ERR 0 +#define HAVE_EXPLICIT_BZERO 1 +#define HAVE_FTS 1 +#define HAVE_GETEXECNAME 0 +#define HAVE_GETPROGNAME 0 +#define HAVE_INFTIM 0 +#define HAVE_MD5 0 +#define HAVE_MEMMEM 1 +#define HAVE_MEMRCHR 1 +#define HAVE_MEMSET_S 0 +#define HAVE_MKFIFOAT 1 +#define HAVE_MKNODAT 1 +#define HAVE_OSBYTEORDER_H 0 +#define HAVE_PATH_MAX 1 +#define HAVE_PLEDGE 0 +#define HAVE_PROGRAM_INVOCATION_SHORT_NAME 1 +#define HAVE_READPASSPHRASE 0 +#define HAVE_REALLOCARRAY 1 +#define HAVE_RECALLOCARRAY 0 +#define HAVE_SANDBOX_INIT 0 +#define HAVE_SECCOMP_FILTER 1 +#define HAVE_SETRESGID 1 +#define HAVE_SETRESUID 1 +#define HAVE_SHA2 0 +#define HAVE_SHA2_H 0 +#define HAVE_SOCK_NONBLOCK 1 +#define HAVE_STRLCAT 0 +#define HAVE_STRLCPY 0 +#define HAVE_STRNDUP 1 +#define HAVE_STRNLEN 1 +#define HAVE_STRTONUM 0 +#define HAVE_SYS_BYTEORDER_H 0 +#define HAVE_SYS_ENDIAN_H 0 +#define HAVE_SYS_MKDEV_H 0 +#define HAVE_SYS_QUEUE 0 +#define HAVE_SYS_SYSMACROS_H 1 +#define HAVE_SYS_TREE 0 +#define HAVE_SYSTRACE 0 +#define HAVE_UNVEIL 0 +#define HAVE_WAIT_ANY 1 +#define HAVE___PROGNAME 1 + +/* + * Handle the various major()/minor() header files. + * Use sys/mkdev.h before sys/sysmacros.h because SunOS + * has both, where only the former works properly. + */ +#if HAVE_SYS_MKDEV_H +# define COMPAT_MAJOR_MINOR_H <sys/mkdev.h> +#elif HAVE_SYS_SYSMACROS_H +# define COMPAT_MAJOR_MINOR_H <sys/sysmacros.h> +#else +# define COMPAT_MAJOR_MINOR_H <sys/types.h> +#endif + +/* + * Make it easier to include endian.h forms. + */ +#if HAVE_ENDIAN_H +# define COMPAT_ENDIAN_H <endian.h> +#elif HAVE_SYS_ENDIAN_H +# define COMPAT_ENDIAN_H <sys/endian.h> +#elif HAVE_OSBYTEORDER_H +# define COMPAT_ENDIAN_H <libkern/OSByteOrder.h> +#elif HAVE_SYS_BYTEORDER_H +# define COMPAT_ENDIAN_H <sys/byteorder.h> +#else +# warning No suitable endian.h could be found. +# warning Please e-mail the maintainers with your OS. +# define COMPAT_ENDIAN_H <endian.h> +#endif + +/* + * Compatibility functions for err(3). + */ +extern void err(int, const char *, ...) __attribute__((noreturn)); +extern void errc(int, int, const char *, ...) __attribute__((noreturn)); +extern void errx(int, const char *, ...) __attribute__((noreturn)); +extern void verr(int, const char *, va_list) __attribute__((noreturn)); +extern void verrc(int, int, const char *, va_list) __attribute__((noreturn)); +extern void verrx(int, const char *, va_list) __attribute__((noreturn)); +extern void warn(const char *, ...); +extern void warnx(const char *, ...); +extern void warnc(int, const char *, ...); +extern void vwarn(const char *, va_list); +extern void vwarnc(int, const char *, va_list); +extern void vwarnx(const char *, va_list); +/* + * Compatibility for md4(3). + */ +#define MD5_BLOCK_LENGTH 64 +#define MD5_DIGEST_LENGTH 16 +#define MD5_DIGEST_STRING_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) + +typedef struct MD5Context { + uint32_t state[4]; + uint64_t count; + uint8_t buffer[MD5_BLOCK_LENGTH]; +} MD5_CTX; + +extern void MD5Init(MD5_CTX *); +extern void MD5Update(MD5_CTX *, const uint8_t *, size_t); +extern void MD5Pad(MD5_CTX *); +extern void MD5Transform(uint32_t [4], const uint8_t [MD5_BLOCK_LENGTH]); +extern char *MD5End(MD5_CTX *, char *); +extern void MD5Final(uint8_t [MD5_DIGEST_LENGTH], MD5_CTX *); + +/* + * Compatibility for sha2(3). + */ + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +#define SHA256_BLOCK_LENGTH 64 +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) +#define SHA384_BLOCK_LENGTH 128 +#define SHA384_DIGEST_LENGTH 48 +#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) +#define SHA512_BLOCK_LENGTH 128 +#define SHA512_DIGEST_LENGTH 64 +#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) +#define SHA512_256_BLOCK_LENGTH 128 +#define SHA512_256_DIGEST_LENGTH 32 +#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1) + +/*** SHA-224/256/384/512 Context Structure *******************************/ +typedef struct _SHA2_CTX { + union { + uint32_t st32[8]; + uint64_t st64[8]; + } state; + uint64_t bitcount[2]; + uint8_t buffer[SHA512_BLOCK_LENGTH]; +} SHA2_CTX; + +void SHA256Init(SHA2_CTX *); +void SHA256Transform(uint32_t state[8], const uint8_t [SHA256_BLOCK_LENGTH]); +void SHA256Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA256Pad(SHA2_CTX *); +void SHA256Final(uint8_t [SHA256_DIGEST_LENGTH], SHA2_CTX *); +char *SHA256End(SHA2_CTX *, char *); +char *SHA256File(const char *, char *); +char *SHA256FileChunk(const char *, char *, off_t, off_t); +char *SHA256Data(const uint8_t *, size_t, char *); + +void SHA384Init(SHA2_CTX *); +void SHA384Transform(uint64_t state[8], const uint8_t [SHA384_BLOCK_LENGTH]); +void SHA384Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA384Pad(SHA2_CTX *); +void SHA384Final(uint8_t [SHA384_DIGEST_LENGTH], SHA2_CTX *); +char *SHA384End(SHA2_CTX *, char *); +char *SHA384File(const char *, char *); +char *SHA384FileChunk(const char *, char *, off_t, off_t); +char *SHA384Data(const uint8_t *, size_t, char *); + +void SHA512Init(SHA2_CTX *); +void SHA512Transform(uint64_t state[8], const uint8_t [SHA512_BLOCK_LENGTH]); +void SHA512Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA512Pad(SHA2_CTX *); +void SHA512Final(uint8_t [SHA512_DIGEST_LENGTH], SHA2_CTX *); +char *SHA512End(SHA2_CTX *, char *); +char *SHA512File(const char *, char *); +char *SHA512FileChunk(const char *, char *, off_t, off_t); +char *SHA512Data(const uint8_t *, size_t, char *); + +#define SECCOMP_AUDIT_ARCH AUDIT_ARCH_X86_64 + +/* + * Compatibility for getprogname(3). + */ +extern const char *getprogname(void); + +/* + * Macros and function required for readpassphrase(3). + */ +#define RPP_ECHO_OFF 0x00 +#define RPP_ECHO_ON 0x01 +#define RPP_REQUIRE_TTY 0x02 +#define RPP_FORCELOWER 0x04 +#define RPP_FORCEUPPER 0x08 +#define RPP_SEVENBIT 0x10 +#define RPP_STDIN 0x20 +char *readpassphrase(const char *, char *, size_t, int); + +/* + * Compatibility for recallocarray(3). + */ +extern void *recallocarray(void *, size_t, size_t, size_t); + +/* + * Compatibility for strlcat(3). + */ +extern size_t strlcat(char *, const char *, size_t); + +/* + * Compatibility for strlcpy(3). + */ +extern size_t strlcpy(char *, const char *, size_t); + +/* + * Compatibility for strotnum(3). + */ +extern long long strtonum(const char *, long long, long long, const char **); + +/* + * A compatible version of OpenBSD <sys/queue.h>. + */ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +/* OPENBSD ORIGINAL: sys/sys/queue.h */ + +/* + * Require for OS/X and other platforms that have old/broken/incomplete + * <sys/queue.h>. + */ + +#undef LIST_EMPTY +#undef LIST_END +#undef LIST_ENTRY +#undef LIST_FIRST +#undef LIST_FOREACH +#undef LIST_FOREACH_SAFE +#undef LIST_HEAD +#undef LIST_HEAD_INITIALIZER +#undef LIST_INIT +#undef LIST_INSERT_AFTER +#undef LIST_INSERT_BEFORE +#undef LIST_INSERT_HEAD +#undef LIST_NEXT +#undef LIST_REMOVE +#undef LIST_REPLACE +#undef SIMPLEQ_CONCAT +#undef SIMPLEQ_EMPTY +#undef SIMPLEQ_END +#undef SIMPLEQ_ENTRY +#undef SIMPLEQ_FIRST +#undef SIMPLEQ_FOREACH +#undef SIMPLEQ_FOREACH_SAFE +#undef SIMPLEQ_HEAD +#undef SIMPLEQ_HEAD_INITIALIZER +#undef SIMPLEQ_INIT +#undef SIMPLEQ_INSERT_AFTER +#undef SIMPLEQ_INSERT_HEAD +#undef SIMPLEQ_INSERT_TAIL +#undef SIMPLEQ_NEXT +#undef SIMPLEQ_REMOVE_AFTER +#undef SIMPLEQ_REMOVE_HEAD +#undef SLIST_EMPTY +#undef SLIST_END +#undef SLIST_ENTRY +#undef SLIST_FIRST +#undef SLIST_FOREACH +#undef SLIST_FOREACH_SAFE +#undef SLIST_HEAD +#undef SLIST_HEAD_INITIALIZER +#undef SLIST_INIT +#undef SLIST_INSERT_AFTER +#undef SLIST_INSERT_HEAD +#undef SLIST_NEXT +#undef SLIST_REMOVE +#undef SLIST_REMOVE_AFTER +#undef SLIST_REMOVE_HEAD +#undef TAILQ_CONCAT +#undef TAILQ_EMPTY +#undef TAILQ_END +#undef TAILQ_ENTRY +#undef TAILQ_FIRST +#undef TAILQ_FOREACH +#undef TAILQ_FOREACH_REVERSE +#undef TAILQ_FOREACH_REVERSE_SAFE +#undef TAILQ_FOREACH_SAFE +#undef TAILQ_HEAD +#undef TAILQ_HEAD_INITIALIZER +#undef TAILQ_INIT +#undef TAILQ_INSERT_AFTER +#undef TAILQ_INSERT_BEFORE +#undef TAILQ_INSERT_HEAD +#undef TAILQ_INSERT_TAIL +#undef TAILQ_LAST +#undef TAILQ_NEXT +#undef TAILQ_PREV +#undef TAILQ_REMOVE +#undef TAILQ_REPLACE +#undef XSIMPLEQ_EMPTY +#undef XSIMPLEQ_END +#undef XSIMPLEQ_ENTRY +#undef XSIMPLEQ_FIRST +#undef XSIMPLEQ_FOREACH +#undef XSIMPLEQ_FOREACH_SAFE +#undef XSIMPLEQ_HEAD +#undef XSIMPLEQ_INIT +#undef XSIMPLEQ_INSERT_AFTER +#undef XSIMPLEQ_INSERT_HEAD +#undef XSIMPLEQ_INSERT_TAIL +#undef XSIMPLEQ_NEXT +#undef XSIMPLEQ_REMOVE_AFTER +#undef XSIMPLEQ_REMOVE_HEAD +#undef XSIMPLEQ_XOR + +/* + * This file defines five types of data structures: singly-linked lists, + * lists, simple queues, tail queues and XOR simple queues. + * + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A simple queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are singly + * linked to save space, so elements can only be removed from the + * head of the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the + * list. A simple queue may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * An XOR simple queue is used in the same way as a regular simple queue. + * The difference is that the head structure also includes a "cookie" that + * is XOR'd with the queue pointer (first, last or next) to generate the + * real pointer value. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +#if defined(QUEUE_MACRO_DEBUG) || (defined(_KERNEL) && defined(DIAGNOSTIC)) +#define _Q_INVALID ((void *)-1) +#define _Q_INVALIDATE(a) (a) = _Q_INVALID +#else +#define _Q_INVALIDATE(a) +#endif + +/* + * Singly-linked List definitions. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List access methods. + */ +#define SLIST_FIRST(head) ((head)->slh_first) +#define SLIST_END(head) NULL +#define SLIST_EMPTY(head) (SLIST_FIRST(head) == SLIST_END(head)) +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_FOREACH(var, head, field) \ + for((var) = SLIST_FIRST(head); \ + (var) != SLIST_END(head); \ + (var) = SLIST_NEXT(var, field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST(head); \ + (var) && ((tvar) = SLIST_NEXT(var, field), 1); \ + (var) = (tvar)) + +/* + * Singly-linked List functions. + */ +#define SLIST_INIT(head) { \ + SLIST_FIRST(head) = SLIST_END(head); \ +} + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + (elm)->field.sle_next = (slistelm)->field.sle_next; \ + (slistelm)->field.sle_next = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.sle_next = (head)->slh_first; \ + (head)->slh_first = (elm); \ +} while (0) + +#define SLIST_REMOVE_AFTER(elm, field) do { \ + (elm)->field.sle_next = (elm)->field.sle_next->field.sle_next; \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + (head)->slh_first = (head)->slh_first->field.sle_next; \ +} while (0) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if ((head)->slh_first == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->slh_first; \ + \ + while (curelm->field.sle_next != (elm)) \ + curelm = curelm->field.sle_next; \ + curelm->field.sle_next = \ + curelm->field.sle_next->field.sle_next; \ + } \ + _Q_INVALIDATE((elm)->field.sle_next); \ +} while (0) + +/* + * List definitions. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List access methods. + */ +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_END(head) NULL +#define LIST_EMPTY(head) (LIST_FIRST(head) == LIST_END(head)) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_FOREACH(var, head, field) \ + for((var) = LIST_FIRST(head); \ + (var)!= LIST_END(head); \ + (var) = LIST_NEXT(var, field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST(head); \ + (var) && ((tvar) = LIST_NEXT(var, field), 1); \ + (var) = (tvar)) + +/* + * List functions. + */ +#define LIST_INIT(head) do { \ + LIST_FIRST(head) = LIST_END(head); \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (0) + +#define LIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ + _Q_INVALIDATE((elm)->field.le_prev); \ + _Q_INVALIDATE((elm)->field.le_next); \ +} while (0) + +#define LIST_REPLACE(elm, elm2, field) do { \ + if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \ + (elm2)->field.le_next->field.le_prev = \ + &(elm2)->field.le_next; \ + (elm2)->field.le_prev = (elm)->field.le_prev; \ + *(elm2)->field.le_prev = (elm2); \ + _Q_INVALIDATE((elm)->field.le_prev); \ + _Q_INVALIDATE((elm)->field.le_next); \ +} while (0) + +/* + * Simple queue definitions. + */ +#define SIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqh_first; /* first element */ \ + struct type **sqh_last; /* addr of last next element */ \ +} + +#define SIMPLEQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).sqh_first } + +#define SIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqe_next; /* next element */ \ +} + +/* + * Simple queue access methods. + */ +#define SIMPLEQ_FIRST(head) ((head)->sqh_first) +#define SIMPLEQ_END(head) NULL +#define SIMPLEQ_EMPTY(head) (SIMPLEQ_FIRST(head) == SIMPLEQ_END(head)) +#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) + +#define SIMPLEQ_FOREACH(var, head, field) \ + for((var) = SIMPLEQ_FIRST(head); \ + (var) != SIMPLEQ_END(head); \ + (var) = SIMPLEQ_NEXT(var, field)) + +#define SIMPLEQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SIMPLEQ_FIRST(head); \ + (var) && ((tvar) = SIMPLEQ_NEXT(var, field), 1); \ + (var) = (tvar)) + +/* + * Simple queue functions. + */ +#define SIMPLEQ_INIT(head) do { \ + (head)->sqh_first = NULL; \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (0) + +#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (head)->sqh_first = (elm); \ +} while (0) + +#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.sqe_next = NULL; \ + *(head)->sqh_last = (elm); \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (0) + +#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (listelm)->field.sqe_next = (elm); \ +} while (0) + +#define SIMPLEQ_REMOVE_HEAD(head, field) do { \ + if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (0) + +#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \ + if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \ + == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (0) + +#define SIMPLEQ_CONCAT(head1, head2) do { \ + if (!SIMPLEQ_EMPTY((head2))) { \ + *(head1)->sqh_last = (head2)->sqh_first; \ + (head1)->sqh_last = (head2)->sqh_last; \ + SIMPLEQ_INIT((head2)); \ + } \ +} while (0) + +/* + * XOR Simple queue definitions. + */ +#define XSIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqx_first; /* first element */ \ + struct type **sqx_last; /* addr of last next element */ \ + unsigned long sqx_cookie; \ +} + +#define XSIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqx_next; /* next element */ \ +} + +/* + * XOR Simple queue access methods. + */ +#define XSIMPLEQ_XOR(head, ptr) ((__typeof(ptr))((head)->sqx_cookie ^ \ + (unsigned long)(ptr))) +#define XSIMPLEQ_FIRST(head) XSIMPLEQ_XOR(head, ((head)->sqx_first)) +#define XSIMPLEQ_END(head) NULL +#define XSIMPLEQ_EMPTY(head) (XSIMPLEQ_FIRST(head) == XSIMPLEQ_END(head)) +#define XSIMPLEQ_NEXT(head, elm, field) XSIMPLEQ_XOR(head, ((elm)->field.sqx_next)) + + +#define XSIMPLEQ_FOREACH(var, head, field) \ + for ((var) = XSIMPLEQ_FIRST(head); \ + (var) != XSIMPLEQ_END(head); \ + (var) = XSIMPLEQ_NEXT(head, var, field)) + +#define XSIMPLEQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = XSIMPLEQ_FIRST(head); \ + (var) && ((tvar) = XSIMPLEQ_NEXT(head, var, field), 1); \ + (var) = (tvar)) + +/* + * XOR Simple queue functions. + */ +#define XSIMPLEQ_INIT(head) do { \ + arc4random_buf(&(head)->sqx_cookie, sizeof((head)->sqx_cookie)); \ + (head)->sqx_first = XSIMPLEQ_XOR(head, NULL); \ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(head)->sqx_first); \ +} while (0) + +#define XSIMPLEQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.sqx_next = (head)->sqx_first) == \ + XSIMPLEQ_XOR(head, NULL)) \ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \ + (head)->sqx_first = XSIMPLEQ_XOR(head, (elm)); \ +} while (0) + +#define XSIMPLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.sqx_next = XSIMPLEQ_XOR(head, NULL); \ + *(XSIMPLEQ_XOR(head, (head)->sqx_last)) = XSIMPLEQ_XOR(head, (elm)); \ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \ +} while (0) + +#define XSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.sqx_next = (listelm)->field.sqx_next) == \ + XSIMPLEQ_XOR(head, NULL)) \ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \ + (listelm)->field.sqx_next = XSIMPLEQ_XOR(head, (elm)); \ +} while (0) + +#define XSIMPLEQ_REMOVE_HEAD(head, field) do { \ + if (((head)->sqx_first = XSIMPLEQ_XOR(head, \ + (head)->sqx_first)->field.sqx_next) == XSIMPLEQ_XOR(head, NULL)) \ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(head)->sqx_first); \ +} while (0) + +#define XSIMPLEQ_REMOVE_AFTER(head, elm, field) do { \ + if (((elm)->field.sqx_next = XSIMPLEQ_XOR(head, \ + (elm)->field.sqx_next)->field.sqx_next) \ + == XSIMPLEQ_XOR(head, NULL)) \ + (head)->sqx_last = \ + XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \ +} while (0) + + +/* + * Tail queue definitions. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} + +/* + * Tail queue access methods. + */ +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_END(head) NULL +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +/* XXX */ +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) +#define TAILQ_EMPTY(head) \ + (TAILQ_FIRST(head) == TAILQ_END(head)) + +#define TAILQ_FOREACH(var, head, field) \ + for((var) = TAILQ_FIRST(head); \ + (var) != TAILQ_END(head); \ + (var) = TAILQ_NEXT(var, field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST(head); \ + (var) != TAILQ_END(head) && \ + ((tvar) = TAILQ_NEXT(var, field), 1); \ + (var) = (tvar)) + + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for((var) = TAILQ_LAST(head, headname); \ + (var) != TAILQ_END(head); \ + (var) = TAILQ_PREV(var, headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST(head, headname); \ + (var) != TAILQ_END(head) && \ + ((tvar) = TAILQ_PREV(var, headname, field), 1); \ + (var) = (tvar)) + +/* + * Tail queue functions. + */ +#define TAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + (elm)->field.tqe_next = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ + _Q_INVALIDATE((elm)->field.tqe_prev); \ + _Q_INVALIDATE((elm)->field.tqe_next); \ +} while (0) + +#define TAILQ_REPLACE(head, elm, elm2, field) do { \ + if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != NULL) \ + (elm2)->field.tqe_next->field.tqe_prev = \ + &(elm2)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm2)->field.tqe_next; \ + (elm2)->field.tqe_prev = (elm)->field.tqe_prev; \ + *(elm2)->field.tqe_prev = (elm2); \ + _Q_INVALIDATE((elm)->field.tqe_prev); \ + _Q_INVALIDATE((elm)->field.tqe_next); \ +} while (0) + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + } \ +} while (0) + +/* + * A compatible version of OpenBSD <sys/tree.h>. + */ +/* + * Copyright 2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* OPENBSD ORIGINAL: sys/sys/tree.h */ + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ +struct name { \ + struct type *sph_root; /* root of the tree */ \ +} + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) do { \ + (root)->sph_root = NULL; \ +} while (0) + +#define SPLAY_ENTRY(type) \ +struct { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ +} while (0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ +} while (0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ +} while (0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ +void name##_SPLAY(struct name *, struct type *); \ +void name##_SPLAY_MINMAX(struct name *, int); \ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ +/* Finds the node with the same key as elm */ \ +static __inline struct type * \ +name##_SPLAY_FIND(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) \ + return(NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \ +{ \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_MIN_MAX(struct name *head, int val) \ +{ \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ +struct type * \ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ +} \ + \ +struct type * \ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *__tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ +} \ + \ +void \ +name##_SPLAY(struct name *head, struct type *elm) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while ((__comp = (cmp)(elm, (head)->sph_root))) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0){ \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} \ + \ +/* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); \ + (x) != NULL; \ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \ +struct name { \ + struct type *rbh_root; /* root of the tree */ \ +} + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) do { \ + (root)->rbh_root = NULL; \ +} while (0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) do { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ +} while (0) + +#define RB_SET_BLACKRED(black, red, field) do { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ +} while (0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) do {} while (0) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \ + (tmp) = RB_RIGHT(elm, field); \ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \ + (tmp) = RB_LEFT(elm, field); \ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp,) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ +attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \ +attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\ +attr struct type *name##_RB_REMOVE(struct name *, struct type *); \ +attr struct type *name##_RB_INSERT(struct name *, struct type *); \ +attr struct type *name##_RB_FIND(struct name *, struct type *); \ +attr struct type *name##_RB_NFIND(struct name *, struct type *); \ +attr struct type *name##_RB_NEXT(struct type *); \ +attr struct type *name##_RB_PREV(struct type *); \ +attr struct type *name##_RB_MINMAX(struct name *, int); \ + \ + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \ + RB_GENERATE_INTERNAL(name, type, field, cmp,) +#define RB_GENERATE_STATIC(name, type, field, cmp) \ + RB_GENERATE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static) +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ +attr void \ +name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ +{ \ + struct type *parent, *gparent, *tmp; \ + while ((parent = RB_PARENT(elm, field)) && \ + RB_COLOR(parent, field) == RB_RED) { \ + gparent = RB_PARENT(parent, field); \ + if (parent == RB_LEFT(gparent, field)) { \ + tmp = RB_RIGHT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_RIGHT(parent, field) == elm) { \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } else { \ + tmp = RB_LEFT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_LEFT(parent, field) == elm) { \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ +} \ + \ +attr void \ +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ +{ \ + struct type *tmp; \ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \ + elm != RB_ROOT(head)) { \ + if (RB_LEFT(parent, field) == elm) { \ + tmp = RB_RIGHT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\ + struct type *oleft; \ + if ((oleft = RB_LEFT(tmp, field)))\ + RB_COLOR(oleft, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } else { \ + tmp = RB_LEFT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\ + struct type *oright; \ + if ((oright = RB_RIGHT(tmp, field)))\ + RB_COLOR(oright, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if (elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ +} \ + \ +attr struct type * \ +name##_RB_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *child, *parent, *old = elm; \ + int color; \ + if (RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if (RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while ((left = RB_LEFT(elm, field))) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + if (RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if (RB_PARENT(old, field)) { \ + if (RB_LEFT(RB_PARENT(old, field), field) == old)\ + RB_LEFT(RB_PARENT(old, field), field) = elm;\ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm;\ + RB_AUGMENT(RB_PARENT(old, field)); \ + } else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if (RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if (parent) { \ + left = parent; \ + do { \ + RB_AUGMENT(left); \ + } while ((left = RB_PARENT(left, field))); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ +color: \ + if (color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ +} \ + \ +/* Inserts a node into the RB tree */ \ +attr struct type * \ +name##_RB_INSERT(struct name *head, struct type *elm) \ +{ \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while (tmp) { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if (parent != NULL) { \ + if (comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ +} \ + \ +/* Finds the node with the same key as elm */ \ +attr struct type * \ +name##_RB_FIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ +} \ + \ +/* Finds the first node greater than or equal to the search key */ \ +attr struct type * \ +name##_RB_NFIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *res = NULL; \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ +} \ + \ +/* ARGSUSED */ \ +attr struct type * \ +name##_RB_NEXT(struct type *elm) \ +{ \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ +} \ + \ +/* ARGSUSED */ \ +attr struct type * \ +name##_RB_PREV(struct type *elm) \ +{ \ + if (RB_LEFT(elm, field)) { \ + elm = RB_LEFT(elm, field); \ + while (RB_RIGHT(elm, field)) \ + elm = RB_RIGHT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field)))\ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ +} \ + \ +attr struct type * \ +name##_RB_MINMAX(struct name *head, int val) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ +} + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); \ + (x) != NULL; \ + (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_SAFE(x, name, head, y) \ + for ((x) = RB_MIN(name, head); \ + ((x) != NULL) && ((y) = name##_RB_NEXT(x), 1); \ + (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) \ + for ((x) = RB_MAX(name, head); \ + (x) != NULL; \ + (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ + for ((x) = RB_MAX(name, head); \ + ((x) != NULL) && ((y) = name##_RB_PREV(x), 1); \ + (x) = (y)) + +#endif /*!OCONFIGURE_CONFIG_H*/ diff --git a/config.log b/config.log @@ -0,0 +1,370 @@ +configure.local: no (fully automatic configuration) + +arc4random: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_ARC4RANDOM -o test-arc4random tests.c +tests.c: In function ‘main’: +tests.c:16:10: error: implicit declaration of function ‘arc4random’; did you mean ‘srandom’? [-Werror=implicit-function-declaration] + 16 | return (arc4random() + 1) ? 0 : 1; + | ^~~~~~~~~~ + | srandom +cc1: all warnings being treated as errors +arc4random: cc failed with 1 + +b64_ntop: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_B64_NTOP -o test-b64_ntop tests.c +/usr/bin/ld: /tmp/ccHkNYSC.o: in function `main': +/root/lowdown-1.0.0/tests.c:29: undefined reference to `__b64_ntop' +collect2: error: ld returned 1 exit status +b64_ntop: cc failed with 0 (retrying) +b64_ntop: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_B64_NTOP -o test-b64_ntop tests.c -lresolv +b64_ntop: cc succeeded +b64_ntop: yes (with -lresolv) + +capsicum: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_CAPSICUM -o test-capsicum tests.c +tests.c:33:10: fatal error: sys/capsicum.h: No such file or directory + 33 | #include <sys/capsicum.h> + | ^~~~~~~~~~~~~~~~ +compilation terminated. +capsicum: cc failed with 1 + +crypt: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_CRYPT -o test-crypt tests.c +/usr/bin/ld: /tmp/ccXnRY3O.o: in function `main': +/root/lowdown-1.0.0/tests.c:62: undefined reference to `crypt' +collect2: error: ld returned 1 exit status +crypt: cc failed with 0 (retrying) +crypt: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_CRYPT -o test-crypt tests.c -lcrypt +crypt: cc succeeded +crypt: yes (with -lcrypt) + +endian_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_ENDIAN_H -o test-endian_h tests.c +endian_h: cc succeeded +endian_h: yes + +err: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_ERR -o test-err tests.c +tests.c: In function ‘main’: +tests.c:102:2: error: implicit declaration of function ‘warnc’; did you mean ‘warnx’? [-Werror=implicit-function-declaration] + 102 | warnc(ENOENT, "%d. warn", ENOENT); + | ^~~~~ + | warnx +tests.c:106:2: error: implicit declaration of function ‘errc’; did you mean ‘errx’? [-Werror=implicit-function-declaration] + 106 | errc(0, ENOENT, "%d. err", 3); + | ^~~~ + | errx +cc1: all warnings being treated as errors +err: cc failed with 1 + +explicit_bzero: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_EXPLICIT_BZERO -o test-explicit_bzero tests.c +explicit_bzero: cc succeeded +explicit_bzero: yes + +fts: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_FTS -o test-fts tests.c +fts: cc succeeded +fts: yes + +getexecname: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_GETEXECNAME -o test-getexecname tests.c +tests.c: In function ‘main’: +tests.c:167:13: error: implicit declaration of function ‘getexecname’ [-Werror=implicit-function-declaration] + 167 | progname = getexecname(); + | ^~~~~~~~~~~ +tests.c:167:11: error: assignment to ‘const char *’ from ‘int’ makes pointer from integer without a cast [-Werror=int-conversion] + 167 | progname = getexecname(); + | ^ +cc1: all warnings being treated as errors +getexecname: cc failed with 1 + +getprogname: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_GETPROGNAME -o test-getprogname tests.c +tests.c: In function ‘main’: +tests.c:179:13: error: implicit declaration of function ‘getprogname’ [-Werror=implicit-function-declaration] + 179 | progname = getprogname(); + | ^~~~~~~~~~~ +tests.c:179:11: error: assignment to ‘const char *’ from ‘int’ makes pointer from integer without a cast [-Werror=int-conversion] + 179 | progname = getprogname(); + | ^ +cc1: all warnings being treated as errors +getprogname: cc failed with 1 + +INFTIM: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_INFTIM -o test-INFTIM tests.c +tests.c: In function ‘main’: +tests.c:194:48: error: ‘INFTIM’ undeclared (first use in this function) + 194 | printf("INFTIM is defined to be %ld\n", (long)INFTIM); + | ^~~~~~ +tests.c:194:48: note: each undeclared identifier is reported only once for each function it appears in +INFTIM: cc failed with 1 + +lib_socket: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_LIB_SOCKET -o test-lib_socket tests.c +lib_socket: cc succeeded +lib_socket: yes + +md5: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MD5 -o test-md5 tests.c +tests.c:212:10: fatal error: md5.h: No such file or directory + 212 | #include <md5.h> + | ^~~~~~~ +compilation terminated. +md5: cc failed with 0 (retrying) +md5: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MD5 -o test-md5 tests.c -lmd +tests.c:212:10: fatal error: md5.h: No such file or directory + 212 | #include <md5.h> + | ^~~~~~~ +compilation terminated. +md5: cc failed with 1 + +memmem: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MEMMEM -o test-memmem tests.c +memmem: cc succeeded +memmem: yes + +memrchr: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MEMRCHR -o test-memrchr tests.c +memrchr: cc succeeded +memrchr: yes + +memset_s: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MEMSET_S -o test-memset_s tests.c +tests.c: In function ‘main’: +tests.c:259:2: error: implicit declaration of function ‘memset_s’; did you mean ‘memset’? [-Werror=implicit-function-declaration] + 259 | memset_s(buf, 0, 'c', sizeof(buf)); + | ^~~~~~~~ + | memset +cc1: all warnings being treated as errors +memset_s: cc failed with 1 + +mkfifoat: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MKFIFOAT -o test-mkfifoat tests.c +mkfifoat: cc succeeded +mkfifoat: yes + +mknodat: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_MKNODAT -o test-mknodat tests.c +mknodat: cc succeeded +mknodat: yes + +osbyteorder_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_OSBYTEORDER_H -o test-osbyteorder_h tests.c +tests.c:282:10: fatal error: libkern/OSByteOrder.h: No such file or directory + 282 | #include <libkern/OSByteOrder.h> + | ^~~~~~~~~~~~~~~~~~~~~~~ +compilation terminated. +osbyteorder_h: cc failed with 1 + +PATH_MAX: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_PATH_MAX -o test-PATH_MAX tests.c +PATH_MAX: cc succeeded +PATH_MAX: yes + +pledge: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_PLEDGE -o test-pledge tests.c +tests.c: In function ‘main’: +tests.c:328:11: error: implicit declaration of function ‘pledge’ [-Werror=implicit-function-declaration] + 328 | return !!pledge("stdio", NULL); + | ^~~~~~ +cc1: all warnings being treated as errors +pledge: cc failed with 1 + +program_invocation_short_name: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_PROGRAM_INVOCATION_SHORT_NAME -o test-program_invocation_short_name tests.c +program_invocation_short_name: cc succeeded +program_invocation_short_name: yes + +readpassphrase: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_READPASSPHRASE -o test-readpassphrase tests.c +tests.c:344:10: fatal error: readpassphrase.h: No such file or directory + 344 | #include <readpassphrase.h> + | ^~~~~~~~~~~~~~~~~~ +compilation terminated. +readpassphrase: cc failed with 1 + +reallocarray: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_REALLOCARRAY -o test-reallocarray tests.c +reallocarray: cc succeeded +reallocarray: yes + +recallocarray: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_RECALLOCARRAY -o test-recallocarray tests.c +tests.c: In function ‘main’: +tests.c:370:10: error: implicit declaration of function ‘recallocarray’; did you mean ‘reallocarray’? [-Werror=implicit-function-declaration] + 370 | return !recallocarray(NULL, 0, 2, 2); + | ^~~~~~~~~~~~~ + | reallocarray +cc1: all warnings being treated as errors +recallocarray: cc failed with 1 + +sandbox_init: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SANDBOX_INIT -Wno-deprecated -o test-sandbox_init tests.c +tests.c:374:10: fatal error: sandbox.h: No such file or directory + 374 | #include <sandbox.h> + | ^~~~~~~~~~~ +compilation terminated. +sandbox_init: cc failed with 1 + +seccomp-filter: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SECCOMP_FILTER -o test-seccomp-filter tests.c +seccomp-filter: cc succeeded +seccomp-filter: yes + +setresgid: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SETRESGID -o test-setresgid tests.c +setresgid: cc succeeded +setresgid: yes + +setresuid: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SETRESUID -o test-setresuid tests.c +setresuid: cc succeeded +setresuid: yes + +sha2: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SHA2 -o test-sha2 tests.c +tests.c:425:10: fatal error: sha2.h: No such file or directory + 425 | #include <sha2.h> + | ^~~~~~~~ +compilation terminated. +sha2: cc failed with 0 (retrying) +sha2: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SHA2 -o test-sha2 tests.c -lmd +tests.c:425:10: fatal error: sha2.h: No such file or directory + 425 | #include <sha2.h> + | ^~~~~~~~ +compilation terminated. +sha2: cc failed with 1 + +SOCK_NONBLOCK: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SOCK_NONBLOCK -o test-SOCK_NONBLOCK tests.c +SOCK_NONBLOCK: cc succeeded +SOCK_NONBLOCK: yes + +static: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STATIC -o test-static tests.c -static +static: cc succeeded +static: yes + +strlcat: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STRLCAT -o test-strlcat tests.c +tests.c: In function ‘main’: +tests.c:468:12: error: implicit declaration of function ‘strlcat’; did you mean ‘strncat’? [-Werror=implicit-function-declaration] + 468 | return ! (strlcat(buf, "b", sizeof(buf)) == 2 && + | ^~~~~~~ + | strncat +cc1: all warnings being treated as errors +strlcat: cc failed with 1 + +strlcpy: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STRLCPY -o test-strlcpy tests.c +tests.c: In function ‘main’: +tests.c:479:12: error: implicit declaration of function ‘strlcpy’; did you mean ‘strncpy’? [-Werror=implicit-function-declaration] + 479 | return ! (strlcpy(buf, "a", sizeof(buf)) == 1 && + | ^~~~~~~ + | strncpy +cc1: all warnings being treated as errors +strlcpy: cc failed with 1 + +strndup: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STRNDUP -o test-strndup tests.c +strndup: cc succeeded +strndup: yes + +strnlen: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STRNLEN -o test-strnlen tests.c +strnlen: cc succeeded +strnlen: yes + +strtonum: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_STRTONUM -o test-strtonum tests.c +tests.c: In function ‘main’: +tests.c:535:6: error: implicit declaration of function ‘strtonum’; did you mean ‘strtouq’? [-Werror=implicit-function-declaration] + 535 | if (strtonum("1", 0, 2, &errstr) != 1) + | ^~~~~~~~ + | strtouq +cc1: all warnings being treated as errors +strtonum: cc failed with 1 + +sys_byteorder_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_BYTEORDER_H -o test-sys_byteorder_h tests.c +tests.c:555:10: fatal error: sys/byteorder.h: No such file or directory + 555 | #include <sys/byteorder.h> + | ^~~~~~~~~~~~~~~~~ +compilation terminated. +sys_byteorder_h: cc failed with 1 + +sys_endian_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_ENDIAN_H -o test-sys_endian_h tests.c +tests.c:564:10: fatal error: sys/endian.h: No such file or directory + 564 | #include <sys/endian.h> + | ^~~~~~~~~~~~~~ +compilation terminated. +sys_endian_h: cc failed with 1 + +sys_mkdev_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_MKDEV_H -o test-sys_mkdev_h tests.c +tests.c:574:10: fatal error: sys/mkdev.h: No such file or directory + 574 | #include <sys/mkdev.h> + | ^~~~~~~~~~~~~ +compilation terminated. +sys_mkdev_h: cc failed with 1 + +sys_sysmacros_h: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_SYSMACROS_H -o test-sys_sysmacros_h tests.c +sys_sysmacros_h: cc succeeded +sys_sysmacros_h: yes + +sys_queue: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_QUEUE -o test-sys_queue tests.c +tests.c: In function ‘main’: +tests.c:608:2: error: implicit declaration of function ‘TAILQ_FOREACH_SAFE’; did you mean ‘TAILQ_FOREACH’? [-Werror=implicit-function-declaration] + 608 | TAILQ_FOREACH_SAFE(p, &foo_q, entries, tmp) + | ^~~~~~~~~~~~~~~~~~ + | TAILQ_FOREACH +tests.c:608:32: error: ‘entries’ undeclared (first use in this function) + 608 | TAILQ_FOREACH_SAFE(p, &foo_q, entries, tmp) + | ^~~~~~~ +tests.c:608:32: note: each undeclared identifier is reported only once for each function it appears in +tests.c:608:45: error: expected ‘;’ before ‘p’ + 608 | TAILQ_FOREACH_SAFE(p, &foo_q, entries, tmp) + | ^ + | ; + 609 | p->bar = i++; + | ~ +cc1: all warnings being treated as errors +sys_queue: cc failed with 1 + +sys_tree: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_SYS_TREE -o test-sys_tree tests.c +tests.c:627:10: fatal error: sys/tree.h: No such file or directory + 627 | #include <sys/tree.h> + | ^~~~~~~~~~~~ +compilation terminated. +sys_tree: cc failed with 1 + +unveil: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_UNVEIL -o test-unveil tests.c +tests.c: In function ‘main’: +tests.c:672:15: error: implicit declaration of function ‘unveil’ [-Werror=implicit-function-declaration] + 672 | return -1 != unveil(NULL, NULL); + | ^~~~~~ +cc1: all warnings being treated as errors +unveil: cc failed with 1 + +WAIT_ANY: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST_WAIT_ANY -o test-WAIT_ANY tests.c +WAIT_ANY: cc succeeded +WAIT_ANY: yes + +__progname: testing... +cc -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -Wwrite-strings -Wno-unused-parameter -Wno-unused -Werror -DTEST___PROGNAME -o test-__progname tests.c +__progname: cc succeeded +__progname: yes + +config.h: written +Makefile.configure: written diff --git a/configure b/configure @@ -0,0 +1,2449 @@ +#! /bin/sh +# +# Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> +# Copyright (c) 2017, 2018 Kristaps Dzonsons <kristaps@bsd.lv> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +OCONFIGURE_VERSION="0.3.5" + +# +# This script outputs two files: config.h and Makefile.configure. +# It tries to read from configure.local, which contains predefined +# values we won't autoconfigure. +# +# If you want to use configure with your project, have your GNUmakefile +# or BSDmakefile---whichever---try to import/include Makefile.configure +# at the beginning of the file. +# +# Like so (note no quotes, no period, etc.): +# +# include Makefile.configure +# +# If it exists, configure was run; otherwise, it wasn't. +# +# You'll probably want to change parts of this file. I've noted the +# parts that you'll probably change in the section documentation. +# +# See https://github.com/kristapsdz/oconfigure for more. + +set -e + +#---------------------------------------------------------------------- +# Prepare for running: move aside previous configure runs. +# Output file descriptor usage: +# 1 (stdout): config.h or Makefile.configure +# 2 (stderr): original stderr, usually to the console +# 3: config.log +# You DO NOT want to change this. +#---------------------------------------------------------------------- + +[ -w config.log ] && mv config.log config.log.old +[ -w config.h ] && mv config.h config.h.old + +exec 3> config.log +echo "config.log: writing..." + +# GNU submake prints different output if invoked recursively, which +# messes up CC and CFLAGS detection. Pass --no-print-directory if +# we have a MAKELEVEL (GNU and FreeBSD make) and the argument is +# allowed. + +MAKE_FLAGS="" + +if [ -n "${MAKELEVEL}" ]; then + if [ "${MAKELEVEL}" -gt 0 ] ; then + MAKE_FLAGS="--no-print-directory" + echo "all:" | make ${MAKE_FLAGS} -sf - 2>/dev/null || MAKE_FLAGS="" + fi +fi + +if [ -n "$MAKE_FLAGS" ]; then + echo "GNU submake detected: using --no-print-directory" 1>&2 + echo "GNU submake detected: using --no-print-directory" 1>&3 +fi + +#---------------------------------------------------------------------- +# Initialize all variables here such that nothing can leak in from the +# environment except for CC and CFLAGS, which we might have passed in. +#---------------------------------------------------------------------- + +CC=`printf "all:\\n\\t@echo \\\$(CC)\\n" | make ${MAKE_FLAGS} -sf -` +CFLAGS=`printf "all:\\n\\t@echo \\\$(CFLAGS)\\n" | make ${MAKE_FLAGS} -sf -` +CFLAGS="${CFLAGS} -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes" +CFLAGS="${CFLAGS} -Wwrite-strings -Wno-unused-parameter" +LDADD= +LDADD_B64_NTOP= +LDADD_CRYPT= +LDADD_MD5= +LDADD_SHA2= +LDADD_LIB_SOCKET= +LDADD_STATIC= +CPPFLAGS= +LDFLAGS= +DESTDIR= +PREFIX="/usr/local" +BINDIR= +SBINDIR= +INCLUDEDIR= +LIBDIR= +MANDIR= +SHAREDIR= +INSTALL="install" +INSTALL_PROGRAM= +INSTALL_LIB= +INSTALL_MAN= +INSTALL_DATA= + +# SunOS sets "cc", but this doesn't exist. +# It does have gcc, so try that instead. +# Prefer clang, though. + +command -v ${CC} 2>/dev/null 1>&2 || { + echo "${CC} not found: trying clang" 1>&2 + echo "${CC} not found: trying clang" 1>&3 + CC=clang + command -v ${CC} 2>/dev/null 1>&2 || { + echo "${CC} not found: trying gcc" 1>&2 + echo "${CC} not found: trying gcc" 1>&3 + CC=gcc + command -v ${CC} 2>/dev/null 1>&2 || { + echo "gcc not found: giving up" 1>&2 + echo "gcc not found: giving up" 1>&3 + exit 1 + } + } +} + +#---------------------------------------------------------------------- +# Allow certain variables to be overriden on the command line. +#---------------------------------------------------------------------- + +for keyvals in "$@" +do + key=`echo $keyvals | cut -s -d '=' -f 1` + if [ -z "$key" ] + then + echo "$0: invalid key-value: $keyvals" 1>&2 + exit 1 + fi + val=`echo $keyvals | cut -d '=' -f 2-` + case "$key" in + LDADD) + LDADD="$val" ;; + LDFLAGS) + LDFLAGS="$val" ;; + CPPFLAGS) + CPPFLAGS="$val" ;; + DESTDIR) + DESTDIR="$val" ;; + PREFIX) + PREFIX="$val" ;; + MANDIR) + MANDIR="$val" ;; + LIBDIR) + LIBDIR="$val" ;; + BINDIR) + BINDIR="$val" ;; + SHAREDIR) + SHAREDIR="$val" ;; + SBINDIR) + SBINDIR="$val" ;; + INCLUDEDIR) + INCLUDEDIR="$val" ;; + *) + echo "$0: invalid key: $key" 1>&2 + exit 1 + esac +done + + +#---------------------------------------------------------------------- +# These are the values that will be pushed into config.h after we test +# for whether they're supported or not. +# Each of these must have a runtest(), below. +# Please sort by alpha, for clarity. +# You WANT to change this. +#---------------------------------------------------------------------- + +HAVE_ARC4RANDOM= +HAVE_B64_NTOP= +HAVE_CAPSICUM= +HAVE_CRYPT= +HAVE_ENDIAN_H= +HAVE_ERR= +HAVE_EXPLICIT_BZERO= +HAVE_FTS= +HAVE_GETEXECNAME= +HAVE_GETPROGNAME= +HAVE_INFTIM= +HAVE_MD5= +HAVE_MEMMEM= +HAVE_MEMRCHR= +HAVE_MEMSET_S= +HAVE_MKFIFOAT= +HAVE_MKNODAT= +HAVE_OSBYTEORDER_H= +HAVE_PATH_MAX= +HAVE_PLEDGE= +HAVE_PROGRAM_INVOCATION_SHORT_NAME= +HAVE_READPASSPHRASE= +HAVE_REALLOCARRAY= +HAVE_RECALLOCARRAY= +HAVE_SANDBOX_INIT= +HAVE_SECCOMP_FILTER= +HAVE_SETRESGID= +HAVE_SETRESUID= +HAVE_SOCK_NONBLOCK= +HAVE_SHA2= +HAVE_SHA2_H= +HAVE_STRLCAT= +HAVE_STRLCPY= +HAVE_STRNDUP= +HAVE_STRNLEN= +HAVE_STRTONUM= +HAVE_SYS_BYTEORDER_H= +HAVE_SYS_ENDIAN_H= +HAVE_SYS_MKDEV_H= +HAVE_SYS_QUEUE= +HAVE_SYS_SYSMACROS= +HAVE_SYS_TREE= +HAVE_SYSTRACE=0 +HAVE_UNVEIL= +HAVE_WAIT_ANY= +HAVE___PROGNAME= + +#---------------------------------------------------------------------- +# Allow configure.local to override all variables, default settings, +# command-line arguments, and tested features, above. +# You PROBABLY DO NOT want to change this. +#---------------------------------------------------------------------- + +if [ -r ./configure.local ]; then + echo "configure.local: reading..." 1>&2 + echo "configure.local: reading..." 1>&3 + cat ./configure.local 1>&3 + . ./configure.local +else + echo "configure.local: no (fully automatic configuration)" 1>&2 + echo "configure.local: no (fully automatic configuration)" 1>&3 +fi + +echo 1>&3 + +#---------------------------------------------------------------------- +# Infrastructure for running tests. +# These consists of a series of functions that will attempt to run the +# given test file and record its exit into a HAVE_xxx variable. +# You DO NOT want to change this. +#---------------------------------------------------------------------- + +COMP="${CC} ${CFLAGS} ${CPPFLAGS} -Wno-unused -Werror" + +# Check whether this HAVE_ setting is manually overridden. +# If yes, use the override, if no, do not decide anything yet. +# Arguments: lower-case test name, manual value + +ismanual() { + [ -z "${3}" ] && return 1 + echo "${1}: manual (HAVE_${2}=${3})" 1>&2 + echo "${1}: manual (HAVE_${2}=${3})" 1>&3 + echo 1>&3 + return 0 +} + +# Run a single autoconfiguration test. +# In case of success, enable the feature. +# In case of failure, do not decide anything yet. +# Arguments: lower-case test name, upper-case test name, additional +# CFLAGS, additional LIBS. + +singletest() { + extralib="" + cat 1>&3 << __HEREDOC__ +${1}: testing... +${COMP} -DTEST_${2} ${3} -o test-${1} tests.c ${LDFLAGS} ${4} +__HEREDOC__ + if ${COMP} -DTEST_${2} ${3} -o "test-${1}" tests.c ${LDFLAGS} ${4} 1>&3 2>&3; then + echo "${1}: ${CC} succeeded" 1>&3 + else + if [ -n "${5}" ] ; then + echo "${1}: ${CC} failed with $? (retrying)" 1>&3 + cat 1>&3 << __HEREDOC__ +${1}: testing... +${COMP} -DTEST_${2} ${3} -o test-${1} tests.c ${LDFLAGS} ${5} +__HEREDOC__ + if ${COMP} -DTEST_${2} ${3} -o "test-${1}" tests.c ${LDFLAGS} ${5} 1>&3 2>&3; then + echo "${1}: ${CC} succeeded" 1>&3 + extralib="(with ${5})" + else + echo "${1}: ${CC} failed with $?" 1>&3 + echo 1>&3 + return 1 + fi + else + echo "${1}: ${CC} failed with $?" 1>&3 + echo 1>&3 + return 1 + fi + fi + + if [ -n "${extralib}" ] + then + eval "LDADD_${2}=\"${5}\"" + elif [ -n "${4}" ] + then + eval "LDADD_${2}=\"${4}\"" + fi + + echo "${1}: yes ${extralib}" 1>&2 + echo "${1}: yes ${extralib}" 1>&3 + echo 1>&3 + eval HAVE_${2}=1 + rm "test-${1}" + return 0 +} + +# Run a complete autoconfiguration test, including the check for +# a manual override and disabling the feature on failure. +# Arguments: lower case name, upper case name, additional CFLAGS, +# additional LDADD, alternative LDADD. + +runtest() { + eval _manual=\${HAVE_${2}} + ismanual "${1}" "${2}" "${_manual}" && return 0 + singletest "${1}" "${2}" "${3}" "${4}" "${5}" && return 0 + echo "${1}: no" 1>&2 + eval HAVE_${2}=0 + return 1 +} + +#---------------------------------------------------------------------- +# Begin running the tests themselves. +# All of your tests must be defined here. +# Please sort as the HAVE_xxxx values were defined. +# You WANT to change this. +# It consists of the following columns: +# runtest +# (1) test file +# (2) macro to set +# (3) argument to cc *before* -o +# (4) argument to cc *after* +# (5) alternative argument to cc *after* +#---------------------------------------------------------------------- + +runtest arc4random ARC4RANDOM || true +runtest b64_ntop B64_NTOP "" "" "-lresolv" || true +runtest capsicum CAPSICUM || true +runtest crypt CRYPT "" "" "-lcrypt" || true +runtest endian_h ENDIAN_H || true +runtest err ERR || true +runtest explicit_bzero EXPLICIT_BZERO || true +runtest fts FTS || true +runtest getexecname GETEXECNAME || true +runtest getprogname GETPROGNAME || true +runtest INFTIM INFTIM || true +runtest lib_socket LIB_SOCKET "" "" "-lsocket -lnsl" || true +runtest md5 MD5 "" "" "-lmd" || true +runtest memmem MEMMEM || true +runtest memrchr MEMRCHR || true +runtest memset_s MEMSET_S || true +runtest mkfifoat MKFIFOAT || true +runtest mknodat MKNODAT || true +runtest osbyteorder_h OSBYTEORDER_H || true +runtest PATH_MAX PATH_MAX || true +runtest pledge PLEDGE || true +runtest program_invocation_short_name PROGRAM_INVOCATION_SHORT_NAME || true +runtest readpassphrase READPASSPHRASE || true +runtest reallocarray REALLOCARRAY || true +runtest recallocarray RECALLOCARRAY || true +runtest sandbox_init SANDBOX_INIT "-Wno-deprecated" || true +runtest seccomp-filter SECCOMP_FILTER || true +runtest setresgid SETRESGID || true +runtest setresuid SETRESUID || true +runtest sha2 SHA2 "" "" "-lmd" || true +runtest SOCK_NONBLOCK SOCK_NONBLOCK || true +runtest static STATIC "" "-static" || true +runtest strlcat STRLCAT || true +runtest strlcpy STRLCPY || true +runtest strndup STRNDUP || true +runtest strnlen STRNLEN || true +runtest strtonum STRTONUM || true +runtest sys_byteorder_h SYS_BYTEORDER_H || true +runtest sys_endian_h SYS_ENDIAN_H || true +runtest sys_mkdev_h SYS_MKDEV_H || true +runtest sys_sysmacros_h SYS_SYSMACROS_H || true +runtest sys_queue SYS_QUEUE || true +runtest sys_tree SYS_TREE || true +runtest unveil UNVEIL || true +runtest WAIT_ANY WAIT_ANY || true +runtest __progname __PROGNAME || true + +#---------------------------------------------------------------------- +# Output writing: generate the config.h file. +# This file contains all of the HAVE_xxxx variables necessary for +# compiling your source. +# You must include "config.h" BEFORE any other variables. +# You WANT to change this. +#---------------------------------------------------------------------- + +exec > config.h + +# Start with prologue. + +cat << __HEREDOC__ +#ifndef OCONFIGURE_CONFIG_H +#define OCONFIGURE_CONFIG_H + +#ifdef __cplusplus +# error "Do not use C++: this is a C application." +#endif +#if !defined(__GNUC__) || (__GNUC__ < 4) +# define __attribute__(x) +#endif +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* memmem, memrchr, setresuid... */ +# define _DEFAULT_SOURCE /* le32toh, crypt, ... */ +#endif +#if defined(__NetBSD__) +# define _OPENBSD_SOURCE /* reallocarray, etc. */ +#endif +#if defined(__sun) +# ifndef _XOPEN_SOURCE /* SunOS already defines */ +# define _XOPEN_SOURCE /* XPGx */ +# endif +# define _XOPEN_SOURCE_EXTENDED 1 /* XPG4v2 */ +# ifndef __EXTENSIONS__ /* SunOS already defines */ +# define __EXTENSIONS__ /* reallocarray, etc. */ +# endif +#endif +#if !defined(__BEGIN_DECLS) +# define __BEGIN_DECLS +#endif +#if !defined(__END_DECLS) +# define __END_DECLS +#endif + +__HEREDOC__ + +# This is just for size_t, mode_t, and dev_t. +# Most of these functions, in the real world, pull in <string.h> or +# someting that pulls in support for size_t. +# Our function declarations are standalone, so specify them here. + +if [ ${HAVE_FTS} -eq 0 -o \ + ${HAVE_MD5} -eq 0 -o \ + ${HAVE_MEMMEM} -eq 0 -o \ + ${HAVE_MEMRCHR} -eq 0 -o \ + ${HAVE_MKFIFOAT} -eq 0 -o \ + ${HAVE_MKNODAT} -eq 0 -o \ + ${HAVE_READPASSPHRASE} -eq 0 -o \ + ${HAVE_REALLOCARRAY} -eq 0 -o \ + ${HAVE_RECALLOCARRAY} -eq 0 -o \ + ${HAVE_SETRESGID} -eq 0 -o \ + ${HAVE_SETRESUID} -eq 0 -o \ + ${HAVE_SHA2} -eq 0 -o \ + ${HAVE_STRLCAT} -eq 0 -o \ + ${HAVE_STRLCPY} -eq 0 -o \ + ${HAVE_STRNDUP} -eq 0 -o \ + ${HAVE_STRNLEN} -eq 0 ] +then + echo "#include <sys/types.h> /* size_t, mode_t, dev_t */ " + echo +fi + +if [ ${HAVE_MD5} -eq 0 -o \ + ${HAVE_SHA2} -eq 0 ] +then + echo "#include <stdint.h> /* C99 [u]int[nn]_t types */" + echo +fi + +if [ ${HAVE_ERR} -eq 0 ] +then + echo "#include <stdarg.h> /* err(3) */" + echo +fi + +# Now we handle our HAVE_xxxx values. +# Most will just be defined as 0 or 1. + +if [ ${HAVE_PATH_MAX} -eq 0 ] +then + echo "#define PATH_MAX 4096" + echo +fi + +if [ ${HAVE_WAIT_ANY} -eq 0 ] +then + echo "#define WAIT_ANY (-1) /* sys/wait.h */" + echo "#define WAIT_MYPGRP 0" + echo +fi + + +if [ ${HAVE_INFTIM} -eq 0 ] +then + echo "#define INFTIM (-1) /* poll.h */" + echo +fi + +cat << __HEREDOC__ +/* + * Results of configuration feature-testing. + */ +#define HAVE_ARC4RANDOM ${HAVE_ARC4RANDOM} +#define HAVE_B64_NTOP ${HAVE_B64_NTOP} +#define HAVE_CAPSICUM ${HAVE_CAPSICUM} +#define HAVE_CRYPT ${HAVE_CRYPT} +#define HAVE_ENDIAN_H ${HAVE_ENDIAN_H} +#define HAVE_ERR ${HAVE_ERR} +#define HAVE_EXPLICIT_BZERO ${HAVE_EXPLICIT_BZERO} +#define HAVE_FTS ${HAVE_FTS} +#define HAVE_GETEXECNAME ${HAVE_GETEXECNAME} +#define HAVE_GETPROGNAME ${HAVE_GETPROGNAME} +#define HAVE_INFTIM ${HAVE_INFTIM} +#define HAVE_MD5 ${HAVE_MD5} +#define HAVE_MEMMEM ${HAVE_MEMMEM} +#define HAVE_MEMRCHR ${HAVE_MEMRCHR} +#define HAVE_MEMSET_S ${HAVE_MEMSET_S} +#define HAVE_MKFIFOAT ${HAVE_MKFIFOAT} +#define HAVE_MKNODAT ${HAVE_MKNODAT} +#define HAVE_OSBYTEORDER_H ${HAVE_OSBYTEORDER_H} +#define HAVE_PATH_MAX ${HAVE_PATH_MAX} +#define HAVE_PLEDGE ${HAVE_PLEDGE} +#define HAVE_PROGRAM_INVOCATION_SHORT_NAME ${HAVE_PROGRAM_INVOCATION_SHORT_NAME} +#define HAVE_READPASSPHRASE ${HAVE_READPASSPHRASE} +#define HAVE_REALLOCARRAY ${HAVE_REALLOCARRAY} +#define HAVE_RECALLOCARRAY ${HAVE_RECALLOCARRAY} +#define HAVE_SANDBOX_INIT ${HAVE_SANDBOX_INIT} +#define HAVE_SECCOMP_FILTER ${HAVE_SECCOMP_FILTER} +#define HAVE_SETRESGID ${HAVE_SETRESGID} +#define HAVE_SETRESUID ${HAVE_SETRESUID} +#define HAVE_SHA2 ${HAVE_SHA2} +#define HAVE_SHA2_H ${HAVE_SHA2} +#define HAVE_SOCK_NONBLOCK ${HAVE_SOCK_NONBLOCK} +#define HAVE_STRLCAT ${HAVE_STRLCAT} +#define HAVE_STRLCPY ${HAVE_STRLCPY} +#define HAVE_STRNDUP ${HAVE_STRNDUP} +#define HAVE_STRNLEN ${HAVE_STRNLEN} +#define HAVE_STRTONUM ${HAVE_STRTONUM} +#define HAVE_SYS_BYTEORDER_H ${HAVE_SYS_BYTEORDER_H} +#define HAVE_SYS_ENDIAN_H ${HAVE_SYS_ENDIAN_H} +#define HAVE_SYS_MKDEV_H ${HAVE_SYS_MKDEV_H} +#define HAVE_SYS_QUEUE ${HAVE_SYS_QUEUE} +#define HAVE_SYS_SYSMACROS_H ${HAVE_SYS_SYSMACROS_H} +#define HAVE_SYS_TREE ${HAVE_SYS_TREE} +#define HAVE_SYSTRACE ${HAVE_SYSTRACE} +#define HAVE_UNVEIL ${HAVE_UNVEIL} +#define HAVE_WAIT_ANY ${HAVE_WAIT_ANY} +#define HAVE___PROGNAME ${HAVE___PROGNAME} + +__HEREDOC__ + +# Compat for libkern/OSByteOrder.h in place of endian.h. + +[ ${HAVE_OSBYTEORDER_H} -eq 1 -a \ + ${HAVE_ENDIAN_H} -eq 0 -a \ + ${HAVE_SYS_BYTEORDER_H} -eq 0 -a \ + ${HAVE_SYS_ENDIAN_H} -eq 0 ] \ + && cat << __HEREDOC__ +/* + * endian.h compatibility with libkern/OSByteOrder.h. + */ +#define htobe16(x) OSSwapHostToBigInt16(x) +#define htole16(x) OSSwapHostToLittleInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define htole32(x) OSSwapHostToLittleInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define htole64(x) OSSwapHostToLittleInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) + +__HEREDOC__ + +[ ${HAVE_SYS_BYTEORDER_H} -eq 1 -a \ + ${HAVE_ENDIAN_H} -eq 0 -a \ + ${HAVE_OSBYTEORDER_H} -eq 0 -a \ + ${HAVE_SYS_ENDIAN_H} -eq 0 ] \ + && cat << __HEREDOC__ +/* + * endian.h compatibility with sys/byteorder.h. + */ +#define htobe16(x) BE_16(x) +#define htole16(x) LE_16(x) +#define be16toh(x) BE_16(x) +#define le16toh(x) LE_16(x) +#define htobe32(x) BE_32(x) +#define htole32(x) LE_32(x) +#define be32toh(x) BE_32(x) +#define le32toh(x) LE_32(x) +#define htobe64(x) BE_64(x) +#define htole64(x) LE_64(x) +#define be64toh(x) BE_64(x) +#define le64toh(x) LE_64(x) + +__HEREDOC__ + +# Make minor()/major()/makedev() easier to use. + +cat << __HEREDOC__ +/* + * Handle the various major()/minor() header files. + * Use sys/mkdev.h before sys/sysmacros.h because SunOS + * has both, where only the former works properly. + */ +#if HAVE_SYS_MKDEV_H +# define COMPAT_MAJOR_MINOR_H <sys/mkdev.h> +#elif HAVE_SYS_SYSMACROS_H +# define COMPAT_MAJOR_MINOR_H <sys/sysmacros.h> +#else +# define COMPAT_MAJOR_MINOR_H <sys/types.h> +#endif + +__HEREDOC__ + +# Make endian.h easier by providing a COMPAT_ENDIAN_H. + +cat << __HEREDOC__ +/* + * Make it easier to include endian.h forms. + */ +#if HAVE_ENDIAN_H +# define COMPAT_ENDIAN_H <endian.h> +#elif HAVE_SYS_ENDIAN_H +# define COMPAT_ENDIAN_H <sys/endian.h> +#elif HAVE_OSBYTEORDER_H +# define COMPAT_ENDIAN_H <libkern/OSByteOrder.h> +#elif HAVE_SYS_BYTEORDER_H +# define COMPAT_ENDIAN_H <sys/byteorder.h> +#else +# warning No suitable endian.h could be found. +# warning Please e-mail the maintainers with your OS. +# define COMPAT_ENDIAN_H <endian.h> +#endif + +__HEREDOC__ + +# Now we do our function declarations for missing functions. + +[ ${HAVE_ERR} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility functions for err(3). + */ +extern void err(int, const char *, ...) __attribute__((noreturn)); +extern void errc(int, int, const char *, ...) __attribute__((noreturn)); +extern void errx(int, const char *, ...) __attribute__((noreturn)); +extern void verr(int, const char *, va_list) __attribute__((noreturn)); +extern void verrc(int, int, const char *, va_list) __attribute__((noreturn)); +extern void verrx(int, const char *, va_list) __attribute__((noreturn)); +extern void warn(const char *, ...); +extern void warnx(const char *, ...); +extern void warnc(int, const char *, ...); +extern void vwarn(const char *, va_list); +extern void vwarnc(int, const char *, va_list); +extern void vwarnx(const char *, va_list); +__HEREDOC__ + +[ ${HAVE_MD5} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for md4(3). + */ +#define MD5_BLOCK_LENGTH 64 +#define MD5_DIGEST_LENGTH 16 +#define MD5_DIGEST_STRING_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) + +typedef struct MD5Context { + uint32_t state[4]; + uint64_t count; + uint8_t buffer[MD5_BLOCK_LENGTH]; +} MD5_CTX; + +extern void MD5Init(MD5_CTX *); +extern void MD5Update(MD5_CTX *, const uint8_t *, size_t); +extern void MD5Pad(MD5_CTX *); +extern void MD5Transform(uint32_t [4], const uint8_t [MD5_BLOCK_LENGTH]); +extern char *MD5End(MD5_CTX *, char *); +extern void MD5Final(uint8_t [MD5_DIGEST_LENGTH], MD5_CTX *); + +__HEREDOC__ + +[ ${HAVE_SHA2} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for sha2(3). + */ + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +#define SHA256_BLOCK_LENGTH 64 +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) +#define SHA384_BLOCK_LENGTH 128 +#define SHA384_DIGEST_LENGTH 48 +#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) +#define SHA512_BLOCK_LENGTH 128 +#define SHA512_DIGEST_LENGTH 64 +#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) +#define SHA512_256_BLOCK_LENGTH 128 +#define SHA512_256_DIGEST_LENGTH 32 +#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1) + +/*** SHA-224/256/384/512 Context Structure *******************************/ +typedef struct _SHA2_CTX { + union { + uint32_t st32[8]; + uint64_t st64[8]; + } state; + uint64_t bitcount[2]; + uint8_t buffer[SHA512_BLOCK_LENGTH]; +} SHA2_CTX; + +void SHA256Init(SHA2_CTX *); +void SHA256Transform(uint32_t state[8], const uint8_t [SHA256_BLOCK_LENGTH]); +void SHA256Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA256Pad(SHA2_CTX *); +void SHA256Final(uint8_t [SHA256_DIGEST_LENGTH], SHA2_CTX *); +char *SHA256End(SHA2_CTX *, char *); +char *SHA256File(const char *, char *); +char *SHA256FileChunk(const char *, char *, off_t, off_t); +char *SHA256Data(const uint8_t *, size_t, char *); + +void SHA384Init(SHA2_CTX *); +void SHA384Transform(uint64_t state[8], const uint8_t [SHA384_BLOCK_LENGTH]); +void SHA384Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA384Pad(SHA2_CTX *); +void SHA384Final(uint8_t [SHA384_DIGEST_LENGTH], SHA2_CTX *); +char *SHA384End(SHA2_CTX *, char *); +char *SHA384File(const char *, char *); +char *SHA384FileChunk(const char *, char *, off_t, off_t); +char *SHA384Data(const uint8_t *, size_t, char *); + +void SHA512Init(SHA2_CTX *); +void SHA512Transform(uint64_t state[8], const uint8_t [SHA512_BLOCK_LENGTH]); +void SHA512Update(SHA2_CTX *, const uint8_t *, size_t); +void SHA512Pad(SHA2_CTX *); +void SHA512Final(uint8_t [SHA512_DIGEST_LENGTH], SHA2_CTX *); +char *SHA512End(SHA2_CTX *, char *); +char *SHA512File(const char *, char *); +char *SHA512FileChunk(const char *, char *, off_t, off_t); +char *SHA512Data(const uint8_t *, size_t, char *); + +__HEREDOC__ + +if [ ${HAVE_SECCOMP_FILTER} -eq 1 ]; then + arch=$(uname -m 2>/dev/null || echo unknown) + case "$arch" in + x86_64) + echo "#define SECCOMP_AUDIT_ARCH AUDIT_ARCH_X86_64" + ;; + i*86) + echo "#define SECCOMP_AUDIT_ARCH AUDIT_ARCH_I386" + ;; + arm*) + echo "#define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARM" + ;; + aarch64) + echo "#define SECCOMP_AUDIT_ARCH AUDIT_ARCH_AARCH64" + ;; + esac + echo +fi + +[ ${HAVE_B64_NTOP} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for b64_ntop(3). + */ +extern int b64_ntop(unsigned char const *, size_t, char *, size_t); +extern int b64_pton(char const *, unsigned char *, size_t); + +__HEREDOC__ + +[ ${HAVE_EXPLICIT_BZERO} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for explicit_bzero(3). + */ +extern void explicit_bzero(void *, size_t); + +__HEREDOC__ + +[ ${HAVE_MEMMEM} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for memmem(3). + */ +void *memmem(const void *, size_t, const void *, size_t); + +__HEREDOC__ + +[ ${HAVE_MEMRCHR} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for memrchr(3). + */ +void *memrchr(const void *b, int, size_t); + +__HEREDOC__ + +[ ${HAVE_GETPROGNAME} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for getprogname(3). + */ +extern const char *getprogname(void); + +__HEREDOC__ + +[ ${HAVE_READPASSPHRASE} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Macros and function required for readpassphrase(3). + */ +#define RPP_ECHO_OFF 0x00 +#define RPP_ECHO_ON 0x01 +#define RPP_REQUIRE_TTY 0x02 +#define RPP_FORCELOWER 0x04 +#define RPP_FORCEUPPER 0x08 +#define RPP_SEVENBIT 0x10 +#define RPP_STDIN 0x20 +char *readpassphrase(const char *, char *, size_t, int); + +__HEREDOC__ + +[ ${HAVE_REALLOCARRAY} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for reallocarray(3). + */ +extern void *reallocarray(void *, size_t, size_t); + +__HEREDOC__ + +[ ${HAVE_RECALLOCARRAY} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for recallocarray(3). + */ +extern void *recallocarray(void *, size_t, size_t, size_t); + +__HEREDOC__ + +[ ${HAVE_STRLCAT} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for strlcat(3). + */ +extern size_t strlcat(char *, const char *, size_t); + +__HEREDOC__ + +[ ${HAVE_STRLCPY} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for strlcpy(3). + */ +extern size_t strlcpy(char *, const char *, size_t); + +__HEREDOC__ + +[ ${HAVE_STRNDUP} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for strndup(3). + */ +extern char *strndup(const char *, size_t); + +__HEREDOC__ + +[ ${HAVE_STRNLEN} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for strnlen(3). + */ +extern size_t strnlen(const char *, size_t); + +__HEREDOC__ + +[ ${HAVE_STRTONUM} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for strotnum(3). + */ +extern long long strtonum(const char *, long long, long long, const char **); + +__HEREDOC__ + +[ ${HAVE_MKFIFOAT} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for mkfifoat(2). + */ +int mkfifoat(int, const char *, mode_t); + +__HEREDOC__ + +[ ${HAVE_MKNODAT} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for mknodat(2). + */ +int mknodat(int, const char *, mode_t, dev_t); + +__HEREDOC__ + +[ ${HAVE_SETRESGID} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for setresgid(2). + */ +int setresgid(gid_t rgid, gid_t egid, gid_t sgid); + +__HEREDOC__ + +[ ${HAVE_SETRESUID} -eq 0 ] && \ + cat << __HEREDOC__ +/* + * Compatibility for setresuid(2). + */ +int setresuid(uid_t ruid, uid_t euid, uid_t suid); + +__HEREDOC__ + +if [ ${HAVE_SYS_QUEUE} -eq 0 ]; then + cat << __HEREDOC__ +/* + * A compatible version of OpenBSD <sys/queue.h>. + */ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +/* OPENBSD ORIGINAL: sys/sys/queue.h */ + +/* + * Require for OS/X and other platforms that have old/broken/incomplete + * <sys/queue.h>. + */ + +#undef LIST_EMPTY +#undef LIST_END +#undef LIST_ENTRY +#undef LIST_FIRST +#undef LIST_FOREACH +#undef LIST_FOREACH_SAFE +#undef LIST_HEAD +#undef LIST_HEAD_INITIALIZER +#undef LIST_INIT +#undef LIST_INSERT_AFTER +#undef LIST_INSERT_BEFORE +#undef LIST_INSERT_HEAD +#undef LIST_NEXT +#undef LIST_REMOVE +#undef LIST_REPLACE +#undef SIMPLEQ_CONCAT +#undef SIMPLEQ_EMPTY +#undef SIMPLEQ_END +#undef SIMPLEQ_ENTRY +#undef SIMPLEQ_FIRST +#undef SIMPLEQ_FOREACH +#undef SIMPLEQ_FOREACH_SAFE +#undef SIMPLEQ_HEAD +#undef SIMPLEQ_HEAD_INITIALIZER +#undef SIMPLEQ_INIT +#undef SIMPLEQ_INSERT_AFTER +#undef SIMPLEQ_INSERT_HEAD +#undef SIMPLEQ_INSERT_TAIL +#undef SIMPLEQ_NEXT +#undef SIMPLEQ_REMOVE_AFTER +#undef SIMPLEQ_REMOVE_HEAD +#undef SLIST_EMPTY +#undef SLIST_END +#undef SLIST_ENTRY +#undef SLIST_FIRST +#undef SLIST_FOREACH +#undef SLIST_FOREACH_SAFE +#undef SLIST_HEAD +#undef SLIST_HEAD_INITIALIZER +#undef SLIST_INIT +#undef SLIST_INSERT_AFTER +#undef SLIST_INSERT_HEAD +#undef SLIST_NEXT +#undef SLIST_REMOVE +#undef SLIST_REMOVE_AFTER +#undef SLIST_REMOVE_HEAD +#undef TAILQ_CONCAT +#undef TAILQ_EMPTY +#undef TAILQ_END +#undef TAILQ_ENTRY +#undef TAILQ_FIRST +#undef TAILQ_FOREACH +#undef TAILQ_FOREACH_REVERSE +#undef TAILQ_FOREACH_REVERSE_SAFE +#undef TAILQ_FOREACH_SAFE +#undef TAILQ_HEAD +#undef TAILQ_HEAD_INITIALIZER +#undef TAILQ_INIT +#undef TAILQ_INSERT_AFTER +#undef TAILQ_INSERT_BEFORE +#undef TAILQ_INSERT_HEAD +#undef TAILQ_INSERT_TAIL +#undef TAILQ_LAST +#undef TAILQ_NEXT +#undef TAILQ_PREV +#undef TAILQ_REMOVE +#undef TAILQ_REPLACE +#undef XSIMPLEQ_EMPTY +#undef XSIMPLEQ_END +#undef XSIMPLEQ_ENTRY +#undef XSIMPLEQ_FIRST +#undef XSIMPLEQ_FOREACH +#undef XSIMPLEQ_FOREACH_SAFE +#undef XSIMPLEQ_HEAD +#undef XSIMPLEQ_INIT +#undef XSIMPLEQ_INSERT_AFTER +#undef XSIMPLEQ_INSERT_HEAD +#undef XSIMPLEQ_INSERT_TAIL +#undef XSIMPLEQ_NEXT +#undef XSIMPLEQ_REMOVE_AFTER +#undef XSIMPLEQ_REMOVE_HEAD +#undef XSIMPLEQ_XOR + +/* + * This file defines five types of data structures: singly-linked lists, + * lists, simple queues, tail queues and XOR simple queues. + * + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A simple queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are singly + * linked to save space, so elements can only be removed from the + * head of the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the + * list. A simple queue may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * An XOR simple queue is used in the same way as a regular simple queue. + * The difference is that the head structure also includes a "cookie" that + * is XOR'd with the queue pointer (first, last or next) to generate the + * real pointer value. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +#if defined(QUEUE_MACRO_DEBUG) || (defined(_KERNEL) && defined(DIAGNOSTIC)) +#define _Q_INVALID ((void *)-1) +#define _Q_INVALIDATE(a) (a) = _Q_INVALID +#else +#define _Q_INVALIDATE(a) +#endif + +/* + * Singly-linked List definitions. + */ +#define SLIST_HEAD(name, type) \\ +struct name { \\ + struct type *slh_first; /* first element */ \\ +} + +#define SLIST_HEAD_INITIALIZER(head) \\ + { NULL } + +#define SLIST_ENTRY(type) \\ +struct { \\ + struct type *sle_next; /* next element */ \\ +} + +/* + * Singly-linked List access methods. + */ +#define SLIST_FIRST(head) ((head)->slh_first) +#define SLIST_END(head) NULL +#define SLIST_EMPTY(head) (SLIST_FIRST(head) == SLIST_END(head)) +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_FOREACH(var, head, field) \\ + for((var) = SLIST_FIRST(head); \\ + (var) != SLIST_END(head); \\ + (var) = SLIST_NEXT(var, field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \\ + for ((var) = SLIST_FIRST(head); \\ + (var) && ((tvar) = SLIST_NEXT(var, field), 1); \\ + (var) = (tvar)) + +/* + * Singly-linked List functions. + */ +#define SLIST_INIT(head) { \\ + SLIST_FIRST(head) = SLIST_END(head); \\ +} + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \\ + (elm)->field.sle_next = (slistelm)->field.sle_next; \\ + (slistelm)->field.sle_next = (elm); \\ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \\ + (elm)->field.sle_next = (head)->slh_first; \\ + (head)->slh_first = (elm); \\ +} while (0) + +#define SLIST_REMOVE_AFTER(elm, field) do { \\ + (elm)->field.sle_next = (elm)->field.sle_next->field.sle_next; \\ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \\ + (head)->slh_first = (head)->slh_first->field.sle_next; \\ +} while (0) + +#define SLIST_REMOVE(head, elm, type, field) do { \\ + if ((head)->slh_first == (elm)) { \\ + SLIST_REMOVE_HEAD((head), field); \\ + } else { \\ + struct type *curelm = (head)->slh_first; \\ + \\ + while (curelm->field.sle_next != (elm)) \\ + curelm = curelm->field.sle_next; \\ + curelm->field.sle_next = \\ + curelm->field.sle_next->field.sle_next; \\ + } \\ + _Q_INVALIDATE((elm)->field.sle_next); \\ +} while (0) + +/* + * List definitions. + */ +#define LIST_HEAD(name, type) \\ +struct name { \\ + struct type *lh_first; /* first element */ \\ +} + +#define LIST_HEAD_INITIALIZER(head) \\ + { NULL } + +#define LIST_ENTRY(type) \\ +struct { \\ + struct type *le_next; /* next element */ \\ + struct type **le_prev; /* address of previous next element */ \\ +} + +/* + * List access methods. + */ +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_END(head) NULL +#define LIST_EMPTY(head) (LIST_FIRST(head) == LIST_END(head)) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_FOREACH(var, head, field) \\ + for((var) = LIST_FIRST(head); \\ + (var)!= LIST_END(head); \\ + (var) = LIST_NEXT(var, field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \\ + for ((var) = LIST_FIRST(head); \\ + (var) && ((tvar) = LIST_NEXT(var, field), 1); \\ + (var) = (tvar)) + +/* + * List functions. + */ +#define LIST_INIT(head) do { \\ + LIST_FIRST(head) = LIST_END(head); \\ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \\ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \\ + (listelm)->field.le_next->field.le_prev = \\ + &(elm)->field.le_next; \\ + (listelm)->field.le_next = (elm); \\ + (elm)->field.le_prev = &(listelm)->field.le_next; \\ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \\ + (elm)->field.le_prev = (listelm)->field.le_prev; \\ + (elm)->field.le_next = (listelm); \\ + *(listelm)->field.le_prev = (elm); \\ + (listelm)->field.le_prev = &(elm)->field.le_next; \\ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \\ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \\ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\\ + (head)->lh_first = (elm); \\ + (elm)->field.le_prev = &(head)->lh_first; \\ +} while (0) + +#define LIST_REMOVE(elm, field) do { \\ + if ((elm)->field.le_next != NULL) \\ + (elm)->field.le_next->field.le_prev = \\ + (elm)->field.le_prev; \\ + *(elm)->field.le_prev = (elm)->field.le_next; \\ + _Q_INVALIDATE((elm)->field.le_prev); \\ + _Q_INVALIDATE((elm)->field.le_next); \\ +} while (0) + +#define LIST_REPLACE(elm, elm2, field) do { \\ + if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \\ + (elm2)->field.le_next->field.le_prev = \\ + &(elm2)->field.le_next; \\ + (elm2)->field.le_prev = (elm)->field.le_prev; \\ + *(elm2)->field.le_prev = (elm2); \\ + _Q_INVALIDATE((elm)->field.le_prev); \\ + _Q_INVALIDATE((elm)->field.le_next); \\ +} while (0) + +/* + * Simple queue definitions. + */ +#define SIMPLEQ_HEAD(name, type) \\ +struct name { \\ + struct type *sqh_first; /* first element */ \\ + struct type **sqh_last; /* addr of last next element */ \\ +} + +#define SIMPLEQ_HEAD_INITIALIZER(head) \\ + { NULL, &(head).sqh_first } + +#define SIMPLEQ_ENTRY(type) \\ +struct { \\ + struct type *sqe_next; /* next element */ \\ +} + +/* + * Simple queue access methods. + */ +#define SIMPLEQ_FIRST(head) ((head)->sqh_first) +#define SIMPLEQ_END(head) NULL +#define SIMPLEQ_EMPTY(head) (SIMPLEQ_FIRST(head) == SIMPLEQ_END(head)) +#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) + +#define SIMPLEQ_FOREACH(var, head, field) \\ + for((var) = SIMPLEQ_FIRST(head); \\ + (var) != SIMPLEQ_END(head); \\ + (var) = SIMPLEQ_NEXT(var, field)) + +#define SIMPLEQ_FOREACH_SAFE(var, head, field, tvar) \\ + for ((var) = SIMPLEQ_FIRST(head); \\ + (var) && ((tvar) = SIMPLEQ_NEXT(var, field), 1); \\ + (var) = (tvar)) + +/* + * Simple queue functions. + */ +#define SIMPLEQ_INIT(head) do { \\ + (head)->sqh_first = NULL; \\ + (head)->sqh_last = &(head)->sqh_first; \\ +} while (0) + +#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \\ + if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \\ + (head)->sqh_last = &(elm)->field.sqe_next; \\ + (head)->sqh_first = (elm); \\ +} while (0) + +#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \\ + (elm)->field.sqe_next = NULL; \\ + *(head)->sqh_last = (elm); \\ + (head)->sqh_last = &(elm)->field.sqe_next; \\ +} while (0) + +#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \\ + if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\\ + (head)->sqh_last = &(elm)->field.sqe_next; \\ + (listelm)->field.sqe_next = (elm); \\ +} while (0) + +#define SIMPLEQ_REMOVE_HEAD(head, field) do { \\ + if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \\ + (head)->sqh_last = &(head)->sqh_first; \\ +} while (0) + +#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \\ + if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \\ + == NULL) \\ + (head)->sqh_last = &(elm)->field.sqe_next; \\ +} while (0) + +#define SIMPLEQ_CONCAT(head1, head2) do { \\ + if (!SIMPLEQ_EMPTY((head2))) { \\ + *(head1)->sqh_last = (head2)->sqh_first; \\ + (head1)->sqh_last = (head2)->sqh_last; \\ + SIMPLEQ_INIT((head2)); \\ + } \\ +} while (0) + +/* + * XOR Simple queue definitions. + */ +#define XSIMPLEQ_HEAD(name, type) \\ +struct name { \\ + struct type *sqx_first; /* first element */ \\ + struct type **sqx_last; /* addr of last next element */ \\ + unsigned long sqx_cookie; \\ +} + +#define XSIMPLEQ_ENTRY(type) \\ +struct { \\ + struct type *sqx_next; /* next element */ \\ +} + +/* + * XOR Simple queue access methods. + */ +#define XSIMPLEQ_XOR(head, ptr) ((__typeof(ptr))((head)->sqx_cookie ^ \\ + (unsigned long)(ptr))) +#define XSIMPLEQ_FIRST(head) XSIMPLEQ_XOR(head, ((head)->sqx_first)) +#define XSIMPLEQ_END(head) NULL +#define XSIMPLEQ_EMPTY(head) (XSIMPLEQ_FIRST(head) == XSIMPLEQ_END(head)) +#define XSIMPLEQ_NEXT(head, elm, field) XSIMPLEQ_XOR(head, ((elm)->field.sqx_next)) + + +#define XSIMPLEQ_FOREACH(var, head, field) \\ + for ((var) = XSIMPLEQ_FIRST(head); \\ + (var) != XSIMPLEQ_END(head); \\ + (var) = XSIMPLEQ_NEXT(head, var, field)) + +#define XSIMPLEQ_FOREACH_SAFE(var, head, field, tvar) \\ + for ((var) = XSIMPLEQ_FIRST(head); \\ + (var) && ((tvar) = XSIMPLEQ_NEXT(head, var, field), 1); \\ + (var) = (tvar)) + +/* + * XOR Simple queue functions. + */ +#define XSIMPLEQ_INIT(head) do { \\ + arc4random_buf(&(head)->sqx_cookie, sizeof((head)->sqx_cookie)); \\ + (head)->sqx_first = XSIMPLEQ_XOR(head, NULL); \\ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(head)->sqx_first); \\ +} while (0) + +#define XSIMPLEQ_INSERT_HEAD(head, elm, field) do { \\ + if (((elm)->field.sqx_next = (head)->sqx_first) == \\ + XSIMPLEQ_XOR(head, NULL)) \\ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \\ + (head)->sqx_first = XSIMPLEQ_XOR(head, (elm)); \\ +} while (0) + +#define XSIMPLEQ_INSERT_TAIL(head, elm, field) do { \\ + (elm)->field.sqx_next = XSIMPLEQ_XOR(head, NULL); \\ + *(XSIMPLEQ_XOR(head, (head)->sqx_last)) = XSIMPLEQ_XOR(head, (elm)); \\ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \\ +} while (0) + +#define XSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \\ + if (((elm)->field.sqx_next = (listelm)->field.sqx_next) == \\ + XSIMPLEQ_XOR(head, NULL)) \\ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \\ + (listelm)->field.sqx_next = XSIMPLEQ_XOR(head, (elm)); \\ +} while (0) + +#define XSIMPLEQ_REMOVE_HEAD(head, field) do { \\ + if (((head)->sqx_first = XSIMPLEQ_XOR(head, \\ + (head)->sqx_first)->field.sqx_next) == XSIMPLEQ_XOR(head, NULL)) \\ + (head)->sqx_last = XSIMPLEQ_XOR(head, &(head)->sqx_first); \\ +} while (0) + +#define XSIMPLEQ_REMOVE_AFTER(head, elm, field) do { \\ + if (((elm)->field.sqx_next = XSIMPLEQ_XOR(head, \\ + (elm)->field.sqx_next)->field.sqx_next) \\ + == XSIMPLEQ_XOR(head, NULL)) \\ + (head)->sqx_last = \\ + XSIMPLEQ_XOR(head, &(elm)->field.sqx_next); \\ +} while (0) + + +/* + * Tail queue definitions. + */ +#define TAILQ_HEAD(name, type) \\ +struct name { \\ + struct type *tqh_first; /* first element */ \\ + struct type **tqh_last; /* addr of last next element */ \\ +} + +#define TAILQ_HEAD_INITIALIZER(head) \\ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \\ +struct { \\ + struct type *tqe_next; /* next element */ \\ + struct type **tqe_prev; /* address of previous next element */ \\ +} + +/* + * Tail queue access methods. + */ +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_END(head) NULL +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define TAILQ_LAST(head, headname) \\ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +/* XXX */ +#define TAILQ_PREV(elm, headname, field) \\ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) +#define TAILQ_EMPTY(head) \\ + (TAILQ_FIRST(head) == TAILQ_END(head)) + +#define TAILQ_FOREACH(var, head, field) \\ + for((var) = TAILQ_FIRST(head); \\ + (var) != TAILQ_END(head); \\ + (var) = TAILQ_NEXT(var, field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \\ + for ((var) = TAILQ_FIRST(head); \\ + (var) != TAILQ_END(head) && \\ + ((tvar) = TAILQ_NEXT(var, field), 1); \\ + (var) = (tvar)) + + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \\ + for((var) = TAILQ_LAST(head, headname); \\ + (var) != TAILQ_END(head); \\ + (var) = TAILQ_PREV(var, headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \\ + for ((var) = TAILQ_LAST(head, headname); \\ + (var) != TAILQ_END(head) && \\ + ((tvar) = TAILQ_PREV(var, headname, field), 1); \\ + (var) = (tvar)) + +/* + * Tail queue functions. + */ +#define TAILQ_INIT(head) do { \\ + (head)->tqh_first = NULL; \\ + (head)->tqh_last = &(head)->tqh_first; \\ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \\ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \\ + (head)->tqh_first->field.tqe_prev = \\ + &(elm)->field.tqe_next; \\ + else \\ + (head)->tqh_last = &(elm)->field.tqe_next; \\ + (head)->tqh_first = (elm); \\ + (elm)->field.tqe_prev = &(head)->tqh_first; \\ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \\ + (elm)->field.tqe_next = NULL; \\ + (elm)->field.tqe_prev = (head)->tqh_last; \\ + *(head)->tqh_last = (elm); \\ + (head)->tqh_last = &(elm)->field.tqe_next; \\ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \\ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\\ + (elm)->field.tqe_next->field.tqe_prev = \\ + &(elm)->field.tqe_next; \\ + else \\ + (head)->tqh_last = &(elm)->field.tqe_next; \\ + (listelm)->field.tqe_next = (elm); \\ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \\ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \\ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \\ + (elm)->field.tqe_next = (listelm); \\ + *(listelm)->field.tqe_prev = (elm); \\ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \\ +} while (0) + +#define TAILQ_REMOVE(head, elm, field) do { \\ + if (((elm)->field.tqe_next) != NULL) \\ + (elm)->field.tqe_next->field.tqe_prev = \\ + (elm)->field.tqe_prev; \\ + else \\ + (head)->tqh_last = (elm)->field.tqe_prev; \\ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \\ + _Q_INVALIDATE((elm)->field.tqe_prev); \\ + _Q_INVALIDATE((elm)->field.tqe_next); \\ +} while (0) + +#define TAILQ_REPLACE(head, elm, elm2, field) do { \\ + if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != NULL) \\ + (elm2)->field.tqe_next->field.tqe_prev = \\ + &(elm2)->field.tqe_next; \\ + else \\ + (head)->tqh_last = &(elm2)->field.tqe_next; \\ + (elm2)->field.tqe_prev = (elm)->field.tqe_prev; \\ + *(elm2)->field.tqe_prev = (elm2); \\ + _Q_INVALIDATE((elm)->field.tqe_prev); \\ + _Q_INVALIDATE((elm)->field.tqe_next); \\ +} while (0) + +#define TAILQ_CONCAT(head1, head2, field) do { \\ + if (!TAILQ_EMPTY(head2)) { \\ + *(head1)->tqh_last = (head2)->tqh_first; \\ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \\ + (head1)->tqh_last = (head2)->tqh_last; \\ + TAILQ_INIT((head2)); \\ + } \\ +} while (0) + +__HEREDOC__ +fi + +if [ ${HAVE_SYS_TREE} -eq 0 ]; then + cat << __HEREDOC__ +/* + * A compatible version of OpenBSD <sys/tree.h>. + */ +/* + * Copyright 2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* OPENBSD ORIGINAL: sys/sys/tree.h */ + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \\ +struct name { \\ + struct type *sph_root; /* root of the tree */ \\ +} + +#define SPLAY_INITIALIZER(root) \\ + { NULL } + +#define SPLAY_INIT(root) do { \\ + (root)->sph_root = NULL; \\ +} while (0) + +#define SPLAY_ENTRY(type) \\ +struct { \\ + struct type *spe_left; /* left element */ \\ + struct type *spe_right; /* right element */ \\ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \\ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \\ + (head)->sph_root = tmp; \\ +} while (0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \\ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \\ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \\ + (head)->sph_root = tmp; \\ +} while (0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \\ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \\ + tmp = (head)->sph_root; \\ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \\ +} while (0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \\ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \\ + tmp = (head)->sph_root; \\ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \\ +} while (0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \\ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \\ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \\ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \\ +} while (0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \\ +void name##_SPLAY(struct name *, struct type *); \\ +void name##_SPLAY_MINMAX(struct name *, int); \\ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \\ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \\ + \\ +/* Finds the node with the same key as elm */ \\ +static __inline struct type * \\ +name##_SPLAY_FIND(struct name *head, struct type *elm) \\ +{ \\ + if (SPLAY_EMPTY(head)) \\ + return(NULL); \\ + name##_SPLAY(head, elm); \\ + if ((cmp)(elm, (head)->sph_root) == 0) \\ + return (head->sph_root); \\ + return (NULL); \\ +} \\ + \\ +static __inline struct type * \\ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \\ +{ \\ + name##_SPLAY(head, elm); \\ + if (SPLAY_RIGHT(elm, field) != NULL) { \\ + elm = SPLAY_RIGHT(elm, field); \\ + while (SPLAY_LEFT(elm, field) != NULL) { \\ + elm = SPLAY_LEFT(elm, field); \\ + } \\ + } else \\ + elm = NULL; \\ + return (elm); \\ +} \\ + \\ +static __inline struct type * \\ +name##_SPLAY_MIN_MAX(struct name *head, int val) \\ +{ \\ + name##_SPLAY_MINMAX(head, val); \\ + return (SPLAY_ROOT(head)); \\ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \\ +struct type * \\ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \\ +{ \\ + if (SPLAY_EMPTY(head)) { \\ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \\ + } else { \\ + int __comp; \\ + name##_SPLAY(head, elm); \\ + __comp = (cmp)(elm, (head)->sph_root); \\ + if(__comp < 0) { \\ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \\ + SPLAY_LEFT((head)->sph_root, field) = NULL; \\ + } else if (__comp > 0) { \\ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \\ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \\ + } else \\ + return ((head)->sph_root); \\ + } \\ + (head)->sph_root = (elm); \\ + return (NULL); \\ +} \\ + \\ +struct type * \\ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \\ +{ \\ + struct type *__tmp; \\ + if (SPLAY_EMPTY(head)) \\ + return (NULL); \\ + name##_SPLAY(head, elm); \\ + if ((cmp)(elm, (head)->sph_root) == 0) { \\ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \\ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\\ + } else { \\ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \\ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\\ + name##_SPLAY(head, elm); \\ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \\ + } \\ + return (elm); \\ + } \\ + return (NULL); \\ +} \\ + \\ +void \\ +name##_SPLAY(struct name *head, struct type *elm) \\ +{ \\ + struct type __node, *__left, *__right, *__tmp; \\ + int __comp; \\ +\\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\\ + __left = __right = &__node; \\ +\\ + while ((__comp = (cmp)(elm, (head)->sph_root))) { \\ + if (__comp < 0) { \\ + __tmp = SPLAY_LEFT((head)->sph_root, field); \\ + if (__tmp == NULL) \\ + break; \\ + if ((cmp)(elm, __tmp) < 0){ \\ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \\ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\\ + break; \\ + } \\ + SPLAY_LINKLEFT(head, __right, field); \\ + } else if (__comp > 0) { \\ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \\ + if (__tmp == NULL) \\ + break; \\ + if ((cmp)(elm, __tmp) > 0){ \\ + SPLAY_ROTATE_LEFT(head, __tmp, field); \\ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\\ + break; \\ + } \\ + SPLAY_LINKRIGHT(head, __left, field); \\ + } \\ + } \\ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \\ +} \\ + \\ +/* Splay with either the minimum or the maximum element \\ + * Used to find minimum or maximum element in tree. \\ + */ \\ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \\ +{ \\ + struct type __node, *__left, *__right, *__tmp; \\ +\\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\\ + __left = __right = &__node; \\ +\\ + while (1) { \\ + if (__comp < 0) { \\ + __tmp = SPLAY_LEFT((head)->sph_root, field); \\ + if (__tmp == NULL) \\ + break; \\ + if (__comp < 0){ \\ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \\ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\\ + break; \\ + } \\ + SPLAY_LINKLEFT(head, __right, field); \\ + } else if (__comp > 0) { \\ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \\ + if (__tmp == NULL) \\ + break; \\ + if (__comp > 0) { \\ + SPLAY_ROTATE_LEFT(head, __tmp, field); \\ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\\ + break; \\ + } \\ + SPLAY_LINKRIGHT(head, __left, field); \\ + } \\ + } \\ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \\ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \\ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \\ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \\ + for ((x) = SPLAY_MIN(name, head); \\ + (x) != NULL; \\ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \\ +struct name { \\ + struct type *rbh_root; /* root of the tree */ \\ +} + +#define RB_INITIALIZER(root) \\ + { NULL } + +#define RB_INIT(root) do { \\ + (root)->rbh_root = NULL; \\ +} while (0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \\ +struct { \\ + struct type *rbe_left; /* left element */ \\ + struct type *rbe_right; /* right element */ \\ + struct type *rbe_parent; /* parent element */ \\ + int rbe_color; /* node color */ \\ +} + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) do { \\ + RB_PARENT(elm, field) = parent; \\ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \\ + RB_COLOR(elm, field) = RB_RED; \\ +} while (0) + +#define RB_SET_BLACKRED(black, red, field) do { \\ + RB_COLOR(black, field) = RB_BLACK; \\ + RB_COLOR(red, field) = RB_RED; \\ +} while (0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) do {} while (0) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \\ + (tmp) = RB_RIGHT(elm, field); \\ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \\ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \\ + } \\ + RB_AUGMENT(elm); \\ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \\ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \\ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \\ + else \\ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \\ + } else \\ + (head)->rbh_root = (tmp); \\ + RB_LEFT(tmp, field) = (elm); \\ + RB_PARENT(elm, field) = (tmp); \\ + RB_AUGMENT(tmp); \\ + if ((RB_PARENT(tmp, field))) \\ + RB_AUGMENT(RB_PARENT(tmp, field)); \\ +} while (0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \\ + (tmp) = RB_LEFT(elm, field); \\ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \\ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \\ + } \\ + RB_AUGMENT(elm); \\ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \\ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \\ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \\ + else \\ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \\ + } else \\ + (head)->rbh_root = (tmp); \\ + RB_RIGHT(tmp, field) = (elm); \\ + RB_PARENT(elm, field) = (tmp); \\ + RB_AUGMENT(tmp); \\ + if ((RB_PARENT(tmp, field))) \\ + RB_AUGMENT(RB_PARENT(tmp, field)); \\ +} while (0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \\ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp,) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \\ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \\ +attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \\ +attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\\ +attr struct type *name##_RB_REMOVE(struct name *, struct type *); \\ +attr struct type *name##_RB_INSERT(struct name *, struct type *); \\ +attr struct type *name##_RB_FIND(struct name *, struct type *); \\ +attr struct type *name##_RB_NFIND(struct name *, struct type *); \\ +attr struct type *name##_RB_NEXT(struct type *); \\ +attr struct type *name##_RB_PREV(struct type *); \\ +attr struct type *name##_RB_MINMAX(struct name *, int); \\ + \\ + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \\ + RB_GENERATE_INTERNAL(name, type, field, cmp,) +#define RB_GENERATE_STATIC(name, type, field, cmp) \\ + RB_GENERATE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static) +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \\ +attr void \\ +name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \\ +{ \\ + struct type *parent, *gparent, *tmp; \\ + while ((parent = RB_PARENT(elm, field)) && \\ + RB_COLOR(parent, field) == RB_RED) { \\ + gparent = RB_PARENT(parent, field); \\ + if (parent == RB_LEFT(gparent, field)) { \\ + tmp = RB_RIGHT(gparent, field); \\ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \\ + RB_COLOR(tmp, field) = RB_BLACK; \\ + RB_SET_BLACKRED(parent, gparent, field);\\ + elm = gparent; \\ + continue; \\ + } \\ + if (RB_RIGHT(parent, field) == elm) { \\ + RB_ROTATE_LEFT(head, parent, tmp, field);\\ + tmp = parent; \\ + parent = elm; \\ + elm = tmp; \\ + } \\ + RB_SET_BLACKRED(parent, gparent, field); \\ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \\ + } else { \\ + tmp = RB_LEFT(gparent, field); \\ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \\ + RB_COLOR(tmp, field) = RB_BLACK; \\ + RB_SET_BLACKRED(parent, gparent, field);\\ + elm = gparent; \\ + continue; \\ + } \\ + if (RB_LEFT(parent, field) == elm) { \\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\\ + tmp = parent; \\ + parent = elm; \\ + elm = tmp; \\ + } \\ + RB_SET_BLACKRED(parent, gparent, field); \\ + RB_ROTATE_LEFT(head, gparent, tmp, field); \\ + } \\ + } \\ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \\ +} \\ + \\ +attr void \\ +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \\ +{ \\ + struct type *tmp; \\ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \\ + elm != RB_ROOT(head)) { \\ + if (RB_LEFT(parent, field) == elm) { \\ + tmp = RB_RIGHT(parent, field); \\ + if (RB_COLOR(tmp, field) == RB_RED) { \\ + RB_SET_BLACKRED(tmp, parent, field); \\ + RB_ROTATE_LEFT(head, parent, tmp, field);\\ + tmp = RB_RIGHT(parent, field); \\ + } \\ + if ((RB_LEFT(tmp, field) == NULL || \\ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\\ + (RB_RIGHT(tmp, field) == NULL || \\ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\\ + RB_COLOR(tmp, field) = RB_RED; \\ + elm = parent; \\ + parent = RB_PARENT(elm, field); \\ + } else { \\ + if (RB_RIGHT(tmp, field) == NULL || \\ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\\ + struct type *oleft; \\ + if ((oleft = RB_LEFT(tmp, field)))\\ + RB_COLOR(oleft, field) = RB_BLACK;\\ + RB_COLOR(tmp, field) = RB_RED; \\ + RB_ROTATE_RIGHT(head, tmp, oleft, field);\\ + tmp = RB_RIGHT(parent, field); \\ + } \\ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\\ + RB_COLOR(parent, field) = RB_BLACK; \\ + if (RB_RIGHT(tmp, field)) \\ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\\ + RB_ROTATE_LEFT(head, parent, tmp, field);\\ + elm = RB_ROOT(head); \\ + break; \\ + } \\ + } else { \\ + tmp = RB_LEFT(parent, field); \\ + if (RB_COLOR(tmp, field) == RB_RED) { \\ + RB_SET_BLACKRED(tmp, parent, field); \\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\\ + tmp = RB_LEFT(parent, field); \\ + } \\ + if ((RB_LEFT(tmp, field) == NULL || \\ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\\ + (RB_RIGHT(tmp, field) == NULL || \\ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\\ + RB_COLOR(tmp, field) = RB_RED; \\ + elm = parent; \\ + parent = RB_PARENT(elm, field); \\ + } else { \\ + if (RB_LEFT(tmp, field) == NULL || \\ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\\ + struct type *oright; \\ + if ((oright = RB_RIGHT(tmp, field)))\\ + RB_COLOR(oright, field) = RB_BLACK;\\ + RB_COLOR(tmp, field) = RB_RED; \\ + RB_ROTATE_LEFT(head, tmp, oright, field);\\ + tmp = RB_LEFT(parent, field); \\ + } \\ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\\ + RB_COLOR(parent, field) = RB_BLACK; \\ + if (RB_LEFT(tmp, field)) \\ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\\ + elm = RB_ROOT(head); \\ + break; \\ + } \\ + } \\ + } \\ + if (elm) \\ + RB_COLOR(elm, field) = RB_BLACK; \\ +} \\ + \\ +attr struct type * \\ +name##_RB_REMOVE(struct name *head, struct type *elm) \\ +{ \\ + struct type *child, *parent, *old = elm; \\ + int color; \\ + if (RB_LEFT(elm, field) == NULL) \\ + child = RB_RIGHT(elm, field); \\ + else if (RB_RIGHT(elm, field) == NULL) \\ + child = RB_LEFT(elm, field); \\ + else { \\ + struct type *left; \\ + elm = RB_RIGHT(elm, field); \\ + while ((left = RB_LEFT(elm, field))) \\ + elm = left; \\ + child = RB_RIGHT(elm, field); \\ + parent = RB_PARENT(elm, field); \\ + color = RB_COLOR(elm, field); \\ + if (child) \\ + RB_PARENT(child, field) = parent; \\ + if (parent) { \\ + if (RB_LEFT(parent, field) == elm) \\ + RB_LEFT(parent, field) = child; \\ + else \\ + RB_RIGHT(parent, field) = child; \\ + RB_AUGMENT(parent); \\ + } else \\ + RB_ROOT(head) = child; \\ + if (RB_PARENT(elm, field) == old) \\ + parent = elm; \\ + (elm)->field = (old)->field; \\ + if (RB_PARENT(old, field)) { \\ + if (RB_LEFT(RB_PARENT(old, field), field) == old)\\ + RB_LEFT(RB_PARENT(old, field), field) = elm;\\ + else \\ + RB_RIGHT(RB_PARENT(old, field), field) = elm;\\ + RB_AUGMENT(RB_PARENT(old, field)); \\ + } else \\ + RB_ROOT(head) = elm; \\ + RB_PARENT(RB_LEFT(old, field), field) = elm; \\ + if (RB_RIGHT(old, field)) \\ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \\ + if (parent) { \\ + left = parent; \\ + do { \\ + RB_AUGMENT(left); \\ + } while ((left = RB_PARENT(left, field))); \\ + } \\ + goto color; \\ + } \\ + parent = RB_PARENT(elm, field); \\ + color = RB_COLOR(elm, field); \\ + if (child) \\ + RB_PARENT(child, field) = parent; \\ + if (parent) { \\ + if (RB_LEFT(parent, field) == elm) \\ + RB_LEFT(parent, field) = child; \\ + else \\ + RB_RIGHT(parent, field) = child; \\ + RB_AUGMENT(parent); \\ + } else \\ + RB_ROOT(head) = child; \\ +color: \\ + if (color == RB_BLACK) \\ + name##_RB_REMOVE_COLOR(head, parent, child); \\ + return (old); \\ +} \\ + \\ +/* Inserts a node into the RB tree */ \\ +attr struct type * \\ +name##_RB_INSERT(struct name *head, struct type *elm) \\ +{ \\ + struct type *tmp; \\ + struct type *parent = NULL; \\ + int comp = 0; \\ + tmp = RB_ROOT(head); \\ + while (tmp) { \\ + parent = tmp; \\ + comp = (cmp)(elm, parent); \\ + if (comp < 0) \\ + tmp = RB_LEFT(tmp, field); \\ + else if (comp > 0) \\ + tmp = RB_RIGHT(tmp, field); \\ + else \\ + return (tmp); \\ + } \\ + RB_SET(elm, parent, field); \\ + if (parent != NULL) { \\ + if (comp < 0) \\ + RB_LEFT(parent, field) = elm; \\ + else \\ + RB_RIGHT(parent, field) = elm; \\ + RB_AUGMENT(parent); \\ + } else \\ + RB_ROOT(head) = elm; \\ + name##_RB_INSERT_COLOR(head, elm); \\ + return (NULL); \\ +} \\ + \\ +/* Finds the node with the same key as elm */ \\ +attr struct type * \\ +name##_RB_FIND(struct name *head, struct type *elm) \\ +{ \\ + struct type *tmp = RB_ROOT(head); \\ + int comp; \\ + while (tmp) { \\ + comp = cmp(elm, tmp); \\ + if (comp < 0) \\ + tmp = RB_LEFT(tmp, field); \\ + else if (comp > 0) \\ + tmp = RB_RIGHT(tmp, field); \\ + else \\ + return (tmp); \\ + } \\ + return (NULL); \\ +} \\ + \\ +/* Finds the first node greater than or equal to the search key */ \\ +attr struct type * \\ +name##_RB_NFIND(struct name *head, struct type *elm) \\ +{ \\ + struct type *tmp = RB_ROOT(head); \\ + struct type *res = NULL; \\ + int comp; \\ + while (tmp) { \\ + comp = cmp(elm, tmp); \\ + if (comp < 0) { \\ + res = tmp; \\ + tmp = RB_LEFT(tmp, field); \\ + } \\ + else if (comp > 0) \\ + tmp = RB_RIGHT(tmp, field); \\ + else \\ + return (tmp); \\ + } \\ + return (res); \\ +} \\ + \\ +/* ARGSUSED */ \\ +attr struct type * \\ +name##_RB_NEXT(struct type *elm) \\ +{ \\ + if (RB_RIGHT(elm, field)) { \\ + elm = RB_RIGHT(elm, field); \\ + while (RB_LEFT(elm, field)) \\ + elm = RB_LEFT(elm, field); \\ + } else { \\ + if (RB_PARENT(elm, field) && \\ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \\ + elm = RB_PARENT(elm, field); \\ + else { \\ + while (RB_PARENT(elm, field) && \\ + (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\\ + elm = RB_PARENT(elm, field); \\ + elm = RB_PARENT(elm, field); \\ + } \\ + } \\ + return (elm); \\ +} \\ + \\ +/* ARGSUSED */ \\ +attr struct type * \\ +name##_RB_PREV(struct type *elm) \\ +{ \\ + if (RB_LEFT(elm, field)) { \\ + elm = RB_LEFT(elm, field); \\ + while (RB_RIGHT(elm, field)) \\ + elm = RB_RIGHT(elm, field); \\ + } else { \\ + if (RB_PARENT(elm, field) && \\ + (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \\ + elm = RB_PARENT(elm, field); \\ + else { \\ + while (RB_PARENT(elm, field) && \\ + (elm == RB_LEFT(RB_PARENT(elm, field), field)))\\ + elm = RB_PARENT(elm, field); \\ + elm = RB_PARENT(elm, field); \\ + } \\ + } \\ + return (elm); \\ +} \\ + \\ +attr struct type * \\ +name##_RB_MINMAX(struct name *head, int val) \\ +{ \\ + struct type *tmp = RB_ROOT(head); \\ + struct type *parent = NULL; \\ + while (tmp) { \\ + parent = tmp; \\ + if (val < 0) \\ + tmp = RB_LEFT(tmp, field); \\ + else \\ + tmp = RB_RIGHT(tmp, field); \\ + } \\ + return (parent); \\ +} + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \\ + for ((x) = RB_MIN(name, head); \\ + (x) != NULL; \\ + (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_SAFE(x, name, head, y) \\ + for ((x) = RB_MIN(name, head); \\ + ((x) != NULL) && ((y) = name##_RB_NEXT(x), 1); \\ + (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) \\ + for ((x) = RB_MAX(name, head); \\ + (x) != NULL; \\ + (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \\ + for ((x) = RB_MAX(name, head); \\ + ((x) != NULL) && ((y) = name##_RB_PREV(x), 1); \\ + (x) = (y)) + +__HEREDOC__ +fi + +cat << __HEREDOC__ +#endif /*!OCONFIGURE_CONFIG_H*/ +__HEREDOC__ + +if [ ${HAVE_FTS} -eq 0 ]; then + cat << __HEREDOC__ +/* + * Compatibility for fts(3) functions. + */ +typedef struct { + struct _ftsent *fts_cur; /* current node */ + struct _ftsent *fts_child; /* linked list of children */ + struct _ftsent **fts_array; /* sort array */ + dev_t fts_dev; /* starting device # */ + char *fts_path; /* path for this descent */ + int fts_rfd; /* fd for root */ + size_t fts_pathlen; /* sizeof(path) */ + int fts_nitems; /* elements in the sort array */ + int (*fts_compar)(const struct _ftsent **, const struct _ftsent **); /* compare function */ +#define FTS_COMFOLLOW 0x0001 /* follow command line symlinks */ +#define FTS_LOGICAL 0x0002 /* logical walk */ +#define FTS_NOCHDIR 0x0004 /* don't change directories */ +#define FTS_NOSTAT 0x0008 /* don't get stat info */ +#define FTS_PHYSICAL 0x0010 /* physical walk */ +#define FTS_SEEDOT 0x0020 /* return dot and dot-dot */ +#define FTS_XDEV 0x0040 /* don't cross devices */ +#define FTS_OPTIONMASK 0x00ff /* valid user option mask */ +#define FTS_NAMEONLY 0x1000 /* (private) child names only */ +#define FTS_STOP 0x2000 /* (private) unrecoverable error */ + int fts_options; /* fts_open options, global flags */ +} FTS; + +typedef struct _ftsent { + struct _ftsent *fts_cycle; /* cycle node */ + struct _ftsent *fts_parent; /* parent directory */ + struct _ftsent *fts_link; /* next file in directory */ + long fts_number; /* local numeric value */ + void *fts_pointer; /* local address value */ + char *fts_accpath; /* access path */ + char *fts_path; /* root path */ + int fts_errno; /* errno for this node */ + int fts_symfd; /* fd for symlink */ + size_t fts_pathlen; /* strlen(fts_path) */ + size_t fts_namelen; /* strlen(fts_name) */ + ino_t fts_ino; /* inode */ + dev_t fts_dev; /* device */ + nlink_t fts_nlink; /* link count */ +#define FTS_ROOTPARENTLEVEL -1 +#define FTS_ROOTLEVEL 0 +#define FTS_MAXLEVEL 0x7fffffff + int fts_level; /* depth (-1 to N) */ +#define FTS_D 1 /* preorder directory */ +#define FTS_DC 2 /* directory that causes cycles */ +#define FTS_DEFAULT 3 /* none of the above */ +#define FTS_DNR 4 /* unreadable directory */ +#define FTS_DOT 5 /* dot or dot-dot */ +#define FTS_DP 6 /* postorder directory */ +#define FTS_ERR 7 /* error; errno is set */ +#define FTS_F 8 /* regular file */ +#define FTS_INIT 9 /* initialized only */ +#define FTS_NS 10 /* stat(2) failed */ +#define FTS_NSOK 11 /* no stat(2) requested */ +#define FTS_SL 12 /* symbolic link */ +#define FTS_SLNONE 13 /* symbolic link without target */ + unsigned short fts_info; /* user flags for FTSENT structure */ +#define FTS_DONTCHDIR 0x01 /* don't chdir .. to the parent */ +#define FTS_SYMFOLLOW 0x02 /* followed a symlink to get here */ + unsigned short fts_flags; /* private flags for FTSENT structure */ +#define FTS_AGAIN 1 /* read node again */ +#define FTS_FOLLOW 2 /* follow symbolic link */ +#define FTS_NOINSTR 3 /* no instructions */ +#define FTS_SKIP 4 /* discard node */ + unsigned short fts_instr; /* fts_set() instructions */ + unsigned short fts_spare; /* unused */ + struct stat *fts_statp; /* stat(2) information */ + char fts_name[1]; /* file name */ +} FTSENT; + +FTSENT *fts_children(FTS *, int); +int fts_close(FTS *); +FTS *fts_open(char * const *, int, + int (*)(const FTSENT **, const FTSENT **)); +FTSENT *fts_read(FTS *); +int fts_set(FTS *, FTSENT *, int); + +__HEREDOC__ +fi + +echo "config.h: written" 1>&2 +echo "config.h: written" 1>&3 + +#---------------------------------------------------------------------- +# Now we go to generate our Makefile.configure. +# This file is simply a bunch of Makefile variables. +# They'll work in both GNUmakefile and BSDmakefile. +# You MIGHT want to change this. +#---------------------------------------------------------------------- + +exec > Makefile.configure + +[ -z "${BINDIR}" ] && BINDIR="${PREFIX}/bin" +[ -z "${SBINDIR}" ] && SBINDIR="${PREFIX}/sbin" +[ -z "${INCLUDEDIR}" ] && INCLUDEDIR="${PREFIX}/include" +[ -z "${LIBDIR}" ] && LIBDIR="${PREFIX}/lib" +[ -z "${MANDIR}" ] && MANDIR="${PREFIX}/man" +[ -z "${SHAREDIR}" ] && SHAREDIR="${PREFIX}/share" + +[ -z "${INSTALL_PROGRAM}" ] && INSTALL_PROGRAM="${INSTALL} -m 0555" +[ -z "${INSTALL_LIB}" ] && INSTALL_LIB="${INSTALL} -m 0444" +[ -z "${INSTALL_MAN}" ] && INSTALL_MAN="${INSTALL} -m 0444" +[ -z "${INSTALL_DATA}" ] && INSTALL_DATA="${INSTALL} -m 0444" + +cat << __HEREDOC__ +CC = ${CC} +CFLAGS = ${CFLAGS} +CPPFLAGS = ${CPPFLAGS} +LDADD = ${LDADD} +LDADD_B64_NTOP = ${LDADD_B64_NTOP} +LDADD_CRYPT = ${LDADD_CRYPT} +LDADD_LIB_SOCKET = ${LDADD_LIB_SOCKET} +LDADD_MD5 = ${LDADD_MD5} +LDADD_SHA2 = ${LDADD_SHA2} +LDADD_STATIC = ${LDADD_STATIC} +LDFLAGS = ${LDFLAGS} +STATIC = ${STATIC} +PREFIX = ${PREFIX} +BINDIR = ${BINDIR} +SHAREDIR = ${SHAREDIR} +SBINDIR = ${SBINDIR} +INCLUDEDIR = ${INCLUDEDIR} +LIBDIR = ${LIBDIR} +MANDIR = ${MANDIR} +INSTALL = ${INSTALL} +INSTALL_PROGRAM = ${INSTALL_PROGRAM} +INSTALL_LIB = ${INSTALL_LIB} +INSTALL_MAN = ${INSTALL_MAN} +INSTALL_DATA = ${INSTALL_DATA} +__HEREDOC__ + +echo "Makefile.configure: written" 1>&2 +echo "Makefile.configure: written" 1>&3 + +exit 0 diff --git a/diff.c b/diff.c @@ -0,0 +1,1554 @@ +/* $Id$ */ +/* + * Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <float.h> +#include <math.h> +#if HAVE_MD5 +# include <md5.h> +#endif +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "libdiff.h" +#include "extern.h" + +/* + * If "node" is not NULL, this represents our match attempts for a + * single node in a node tree. We basically use "optmatch" and "opt" to + * keep trying to find the most optimal candidate in the other tree, + * which ends up being "match". + */ +struct xnode { + char sig[MD5_DIGEST_STRING_LENGTH]; + double weight; /* queue weight */ + const struct lowdown_node *node; /* basis node */ + const struct lowdown_node *match; /* matching node */ + size_t opt; /* match optimality */ + const struct lowdown_node *optmatch; /* current optimal */ +}; + +/* + * A map of all nodes in the current tree by their ID. A map can have + * holes (in which case the xnode's "node" is NULL) since we collapse + * adjacent text nodes as a preprocess. + */ +struct xmap { + struct xnode *nodes; /* holey table */ + size_t maxsize; /* size of "nodes" */ + size_t maxid; /* max node id */ + size_t maxnodes; /* non-NULL count */ + double maxweight; /* node weight */ +}; + +/* + * Queue of nodes. This is used in creating the priority queue of next + * nodes to parse. + */ +struct pnode { + const struct lowdown_node *node; /* priority node */ + TAILQ_ENTRY(pnode) entries; +}; + +/* + * Convenience structure to hold maps we use when merging together the + * trees. + */ +struct merger { + const struct xmap *xoldmap; /* source xnodes */ + const struct xmap *xnewmap; /* destination xnodes */ + size_t id; /* maxid in new tree */ +}; + +TAILQ_HEAD(pnodeq, pnode); + +/* + * A node used in computing the shortest edit script. + */ +struct sesnode { + char *buf; /* buffer */ + size_t bufsz; /* length of buffer (less NUL) */ + int tailsp; /* whether there's trailing space */ + int headsp; /* whether there's leading space */ +}; + +static void +MD5Updatebuf(MD5_CTX *ctx, const struct lowdown_buf *v) +{ + + assert(v != NULL); + MD5Update(ctx, (const uint8_t *)v->data, v->size); +} + +static void +MD5Updatev(MD5_CTX *ctx, const void *v, size_t sz) +{ + + assert(v != NULL); + MD5Update(ctx, (const unsigned char *)v, sz); +} + +/* + * If this returns non-zero, the node should be considered opaque and + * we will not do any difference processing within it. It will still be + * marked with weight and signature from child nodes and interior data. + */ +static int +is_opaque(const struct lowdown_node *n) +{ + + assert(n != NULL); + return n->type == LOWDOWN_TABLE_BLOCK || + n->type == LOWDOWN_META; +} + +/* + * Assign signatures and weights. + * This is defined by "Phase 2" in sec. 5.2., along with the specific + * heuristics given in the "Tuning" section. + * We use the MD5 algorithm for computing hashes. + * Returns the weight of the node rooted at "n". + * If "parent" is not NULL, its hash is updated with the hash computed + * for the current "n" and its children. + * Return <0 on failure. + */ +static double +assign_sigs(MD5_CTX *parent, struct xmap *map, + const struct lowdown_node *n, int ign) +{ + const struct lowdown_node *nn; + ssize_t weight = -1; + MD5_CTX ctx; + double v = 0.0, vv; + struct xnode *xn; + struct xnode xntmp; + void *pp; + int ign_chld = ign; + + /* + * Get our node slot unless we're ignoring the node. + * Ignoring comes when a parent in our chain is opaque. + */ + + if (!ign) { + if (n->id >= map->maxsize) { + pp = recallocarray(map->nodes, map->maxsize, + n->id + 64, sizeof(struct xnode)); + if (pp == NULL) + return -1.0; + map->nodes = pp; + map->maxsize = n->id + 64; + } + xn = &map->nodes[n->id]; + assert(xn->node == NULL); + assert(xn->weight == 0.0); + xn->node = n; + if (n->id > map->maxid) + map->maxid = n->id; + assert(map->maxid < map->maxsize); + map->maxnodes++; + ign_chld = is_opaque(n); + } + + /* Recursive step. */ + + MD5Init(&ctx); + MD5Updatev(&ctx, &n->type, sizeof(enum lowdown_rndrt)); + + TAILQ_FOREACH(nn, &n->children, entries) { + if ((vv = assign_sigs(&ctx, map, nn, ign_chld)) < 0.0) + return vv; + v += vv; + } + + /* Re-assign "xn": child might have reallocated. */ + + memset(&xntmp, 0, sizeof(struct xnode)); + xn = ign ? &xntmp : &map->nodes[n->id]; + xn->weight = v; + + /* + * Compute our weight. + * The weight is either the log of the contained text length for + * leaf nodes or the accumulated sub-element weight for + * non-terminal nodes plus one. + */ + + switch (n->type) { + case LOWDOWN_BLOCKCODE: + weight = n->rndr_blockcode.text.size; + break; + case LOWDOWN_BLOCKHTML: + weight = n->rndr_blockhtml.text.size; + break; + case LOWDOWN_LINK_AUTO: + weight = n->rndr_autolink.link.size; + break; + case LOWDOWN_CODESPAN: + weight = n->rndr_codespan.text.size; + break; + case LOWDOWN_META: + weight = n->rndr_meta.key.size; + break; + case LOWDOWN_IMAGE: + weight = n->rndr_image.link.size + + n->rndr_image.title.size + + n->rndr_image.dims.size + + n->rndr_image.alt.size; + break; + case LOWDOWN_RAW_HTML: + weight = n->rndr_raw_html.text.size; + break; + case LOWDOWN_NORMAL_TEXT: + weight = n->rndr_normal_text.text.size; + break; + case LOWDOWN_ENTITY: + weight = n->rndr_entity.text.size; + break; + default: + break; + } + + /* Weight can be zero if text size is zero. */ + + if (weight >= 0) + xn->weight = 1.0 + (weight == 0 ? 0.0 : log(weight)); + else + xn->weight += 1.0; + + /* + * Augment our signature from our attributes. + * This depends upon the node. + * Avoid using attributes that are "mutable" relative to the + * generated output, e.g., list display numbers. + */ + + switch (n->type) { + case LOWDOWN_LIST: + MD5Updatev(&ctx, &n->rndr_list.flags, + sizeof(enum hlist_fl)); + break; + case LOWDOWN_LISTITEM: + MD5Updatev(&ctx, &n->rndr_listitem.flags, + sizeof(enum hlist_fl)); + MD5Updatev(&ctx, &n->rndr_listitem.num, + sizeof(size_t)); + break; + case LOWDOWN_HEADER: + MD5Updatev(&ctx, &n->rndr_header.level, + sizeof(size_t)); + break; + case LOWDOWN_NORMAL_TEXT: + MD5Updatebuf(&ctx, &n->rndr_normal_text.text); + break; + case LOWDOWN_META: + MD5Updatebuf(&ctx, &n->rndr_meta.key); + break; + case LOWDOWN_ENTITY: + MD5Updatebuf(&ctx, &n->rndr_entity.text); + break; + case LOWDOWN_LINK_AUTO: + MD5Updatebuf(&ctx, &n->rndr_autolink.link); + MD5Updatev(&ctx, &n->rndr_autolink.type, + sizeof(enum halink_type)); + break; + case LOWDOWN_RAW_HTML: + MD5Updatebuf(&ctx, &n->rndr_raw_html.text); + break; + case LOWDOWN_LINK: + MD5Updatebuf(&ctx, &n->rndr_link.link); + MD5Updatebuf(&ctx, &n->rndr_link.title); + break; + case LOWDOWN_BLOCKCODE: + MD5Updatebuf(&ctx, &n->rndr_blockcode.text); + MD5Updatebuf(&ctx, &n->rndr_blockcode.lang); + break; + case LOWDOWN_CODESPAN: + MD5Updatebuf(&ctx, &n->rndr_codespan.text); + break; + case LOWDOWN_TABLE_HEADER: + MD5Updatev(&ctx, &n->rndr_table_header.columns, + sizeof(size_t)); + break; + case LOWDOWN_TABLE_CELL: + MD5Updatev(&ctx, &n->rndr_table_cell.flags, + sizeof(enum htbl_flags)); + MD5Updatev(&ctx, &n->rndr_table_cell.col, + sizeof(size_t)); + break; + case LOWDOWN_IMAGE: + MD5Updatebuf(&ctx, &n->rndr_image.link); + MD5Updatebuf(&ctx, &n->rndr_image.title); + MD5Updatebuf(&ctx, &n->rndr_image.dims); + MD5Updatebuf(&ctx, &n->rndr_image.alt); + break; + case LOWDOWN_MATH_BLOCK: + MD5Updatev(&ctx, &n->rndr_math.blockmode, + sizeof(int)); + break; + case LOWDOWN_BLOCKHTML: + MD5Updatebuf(&ctx, &n->rndr_blockhtml.text); + break; + default: + break; + } + + MD5End(&ctx, xn->sig); + + if (parent != NULL) + MD5Update(parent, (uint8_t *)xn->sig, + MD5_DIGEST_STRING_LENGTH - 1); + + if (xn->weight > map->maxweight) + map->maxweight = xn->weight; + + assert(isfinite(xn->weight)); + assert(isnormal(xn->weight)); + assert(xn->weight > 0.0); + return xn->weight; +} + +/* + * Enqueue "n" into a priority queue "pq". + * Priority is given to weights; and if weights are equal, then + * proximity to the parse root given by a pre-order identity. + * FIXME: use a priority heap. + * Return zero on failure, non-zero on success. + */ +static int +pqueue(const struct lowdown_node *n, + struct xmap *map, struct pnodeq *pq) +{ + struct pnode *p, *pp; + struct xnode *xnew, *xold; + + if ((p = malloc(sizeof(struct pnode))) == NULL) + return 0; + p->node = n; + + xnew = &map->nodes[n->id]; + assert(xnew != NULL); + assert(xnew->node != NULL); + + TAILQ_FOREACH(pp, pq, entries) { + xold = &map->nodes[pp->node->id]; + assert(xold->node != NULL); + if (xnew->weight >= xold->weight) + break; + } + + if (pp == NULL) { + TAILQ_INSERT_TAIL(pq, p, entries); + return 1; + } else if (xnew->weight > xold->weight) { + TAILQ_INSERT_BEFORE(pp, p, entries); + return 1; + } + + for (; pp != NULL; pp = TAILQ_NEXT(pp, entries)) { + assert(p->node->id != pp->node->id); + if (p->node->id < pp->node->id) + break; + } + + if (pp == NULL) + TAILQ_INSERT_TAIL(pq, p, entries); + else + TAILQ_INSERT_BEFORE(pp, p, entries); + return 1; +} + +/* + * Candidate optimality between "xnew" and "xold" as described in "Phase + * 3" of sec. 5.2. + * This also uses the heuristic described in "Tuning" for how many + * levels to search upward. + */ +static size_t +optimality(struct xnode *xnew, struct xmap *xnewmap, + struct xnode *xold, struct xmap *xoldmap) +{ + size_t opt = 1, d, i = 0; + + /* Height: log(n) * W/W_0 or at least 1. */ + + d = ceil(log(xnewmap->maxnodes) * + xnew->weight / xnewmap->maxweight); + + if (d == 0) + d = 1; + + /* FIXME: are we supposed to bound to "d"? */ + + while (xnew->node->parent != NULL && + xold->node->parent != NULL && i < d) { + xnew = &xnewmap->nodes[xnew->node->parent->id]; + xold = &xoldmap->nodes[xold->node->parent->id]; + if (xnew->match != NULL && xnew->match == xold->node) + opt++; + i++; + } + + return opt; +} + +/* + * Compute the candidacy of "xnew" to "xold" as described in "Phase 3" + * of sec. 5.2 and using the optimality() function as a basis. + * If "xnew" does not have a match assigned (no prior candidacy), assign + * it immediately to "xold". + * If it does, then compute the optimality and select the greater of the + * two optimalities. + * As an extension to the paper, if the optimalities are equal, use the + * "closer" node to the current identifier. + */ +static void +candidate(struct xnode *xnew, struct xmap *xnewmap, + struct xnode *xold, struct xmap *xoldmap) +{ + size_t opt; + long long dnew, dold; + + assert(xnew->node != NULL); + assert(xold->node != NULL); + + if (xnew->optmatch == NULL) { + xnew->optmatch = xold->node; + xnew->opt = optimality(xnew, xnewmap, xold, xoldmap); + return; + } + + opt = optimality(xnew, xnewmap, xold, xoldmap); + + if (opt == xnew->opt) { + /* + * Use a simple norm over the identifier space. + * Choose the lesser of the norms. + */ + dold = llabs((long long) + (xnew->optmatch->id - xnew->node->id)); + dnew = llabs((long long) + (xold->node->id - xnew->node->id)); + if (dold > dnew) { + xnew->optmatch = xold->node; + xnew->opt = opt; + } + } else if (opt > xnew->opt) { + xnew->optmatch = xold->node; + xnew->opt = opt; + } +} + +/* + * Do the two internal nodes equal each other? + * This depends upon the node type. + * By default, all similarly-labelled (typed) nodes are equal. + */ +static int +match_eq(const struct lowdown_node *n1, + const struct lowdown_node *n2) +{ + + if (n1->type != n2->type) + return 0; + + switch (n1->type) { + case LOWDOWN_LINK: + if (!hbuf_eq + (&n1->rndr_link.link, &n2->rndr_link.link)) + return 0; + if (!hbuf_eq + (&n1->rndr_link.title, &n2->rndr_link.title)) + return 0; + break; + case LOWDOWN_HEADER: + if (n1->rndr_header.level != n2->rndr_header.level) + return 0; + break; + case LOWDOWN_META: + if (!hbuf_eq + (&n1->rndr_meta.key, &n2->rndr_meta.key)) + return 0; + break; + case LOWDOWN_LISTITEM: + if (n1->rndr_listitem.num != n2->rndr_listitem.num) + return 0; + if (n1->rndr_listitem.flags != n2->rndr_listitem.flags) + return 0; + break; + default: + break; + } + + return 1; +} + +/* + * Return non-zero if this node is the only child. + */ +static int +match_singleton(const struct lowdown_node *n) +{ + + if (n->parent == NULL) + return 1; + return TAILQ_NEXT(n, entries) == + TAILQ_PREV(n, lowdown_nodeq, entries); +} + +/* + * Algorithm to "propogate up" according to "Phase 3" of sec. 5.2. + * This also uses the heuristic described in "Tuning" for how many + * levels to search upward. + * I augment this by making singleton children pass upward. + * FIXME: right now, this doesn't clobber existing upward matches. Is + * that correct behaviour? + */ +static void +match_up(struct xnode *xnew, struct xmap *xnewmap, + struct xnode *xold, struct xmap *xoldmap) +{ + size_t d, i = 0; + + /* Height: log(n) * W/W_0 or at least 1. */ + + d = ceil(log(xnewmap->maxnodes) * + xnew->weight / xnewmap->maxweight); + if (d == 0) + d = 1; + + while (xnew->node->parent != NULL && + xold->node->parent != NULL && i < d) { + /* Are the "labels" the same? */ + if (!match_eq(xnew->node->parent, xold->node->parent)) + break; + xnew = &xnewmap->nodes[xnew->node->parent->id]; + xold = &xoldmap->nodes[xold->node->parent->id]; + if (xold->match != NULL || xnew->match != NULL) + break; + xnew->match = xold->node; + xold->match = xnew->node; + i++; + } + + if (i != d) + return; + + /* + * Pass up singletons. + * This is an extension of the algorithm. + */ + + while (xnew->node->parent != NULL && + xold->node->parent != NULL) { + if (!match_singleton(xnew->node) || + !match_singleton(xold->node)) + break; + if (!match_eq(xnew->node->parent, xold->node->parent)) + break; + xnew = &xnewmap->nodes[xnew->node->parent->id]; + xold = &xoldmap->nodes[xold->node->parent->id]; + if (xold->match != NULL || xnew->match != NULL) + break; + xnew->match = xold->node; + xold->match = xnew->node; + } +} + +/* + * Algorithm that "propogates down" according to "Phase 3" of sec. 5.2. + * This (recursively) makes sure that a matched tree has all of the + * subtree nodes also matched. + */ +static void +match_down(struct xnode *xnew, struct xmap *xnewmap, + struct xnode *xold, struct xmap *xoldmap) +{ + struct lowdown_node *nnew, *nold; + + /* + * If we're matching into a component that has already been + * matched, we're in the subtree proper (the subtree root is + * checked that it's not already matched) and the fact that this + * is within a match indicates we're more the "larger" of the + * matches, so unset its match status. + */ + + if (xold->match != NULL) { + assert(xold->node == + xnewmap->nodes[xold->match->id].match); + xnewmap->nodes[xold->match->id].match = NULL; + xold->match = NULL; + } + + assert(xnew->match == NULL); + assert(xold->match == NULL); + + xnew->match = xold->node; + xold->match = xnew->node; + + if (is_opaque(xnew->node)) { + assert(is_opaque(xold->node)); + return; + } + + nnew = TAILQ_FIRST(&xnew->node->children); + nold = TAILQ_FIRST(&xold->node->children); + + while (nnew != NULL) { + assert(NULL != nold); + xnew = &xnewmap->nodes[nnew->id]; + xold = &xoldmap->nodes[nold->id]; + match_down(xnew, xnewmap, xold, xoldmap); + nnew = TAILQ_NEXT(nnew, entries); + nold = TAILQ_NEXT(nold, entries); + } + assert(nold == NULL); +} + +/* + * Clone a single node and all of its "attributes". + * That is, its type and "leaf node" data. + * Assign the identifier as given. + * Note that some attributes, such as the table column array, aren't + * copied. + * We'll re-create those later. + */ +static struct lowdown_node * +node_clone(const struct lowdown_node *v, size_t id) +{ + struct lowdown_node *n; + int rc = 1; + size_t i; + + if ((n = calloc(1, sizeof(struct lowdown_node))) == NULL) + return NULL; + + TAILQ_INIT(&n->children); + n->type = v->type; + n->id = id; + + switch (n->type) { + case LOWDOWN_DEFINITION: + n->rndr_definition.flags = + v->rndr_definition.flags; + break; + case LOWDOWN_META: + rc = hbuf_clone(&v->rndr_meta.key, + &n->rndr_meta.key); + break; + case LOWDOWN_LIST: + n->rndr_list.flags = v->rndr_list.flags; + break; + case LOWDOWN_LISTITEM: + n->rndr_listitem.flags = v->rndr_listitem.flags; + n->rndr_listitem.num = v->rndr_listitem.num; + break; + case LOWDOWN_HEADER: + n->rndr_header.level = v->rndr_header.level; + break; + case LOWDOWN_NORMAL_TEXT: + rc = hbuf_clone(&v->rndr_normal_text.text, + &n->rndr_normal_text.text); + break; + case LOWDOWN_ENTITY: + rc = hbuf_clone(&v->rndr_entity.text, + &n->rndr_entity.text); + break; + case LOWDOWN_LINK_AUTO: + rc = hbuf_clone(&v->rndr_autolink.link, + &n->rndr_autolink.link); + n->rndr_autolink.type = v->rndr_autolink.type; + break; + case LOWDOWN_RAW_HTML: + rc = hbuf_clone(&v->rndr_raw_html.text, + &n->rndr_raw_html.text); + break; + case LOWDOWN_LINK: + rc = hbuf_clone(&v->rndr_link.link, + &n->rndr_link.link) && + hbuf_clone(&v->rndr_link.title, + &n->rndr_link.title); + break; + case LOWDOWN_BLOCKCODE: + rc = hbuf_clone(&v->rndr_blockcode.text, + &n->rndr_blockcode.text) && + hbuf_clone(&v->rndr_blockcode.lang, + &n->rndr_blockcode.lang); + break; + case LOWDOWN_CODESPAN: + rc = hbuf_clone(&v->rndr_codespan.text, + &n->rndr_codespan.text); + break; + case LOWDOWN_TABLE_BLOCK: + n->rndr_table.columns = v->rndr_table.columns; + break; + case LOWDOWN_TABLE_HEADER: + n->rndr_table_header.columns = + v->rndr_table_header.columns; + n->rndr_table_header.flags = calloc + (n->rndr_table_header.columns, + sizeof(enum htbl_flags)); + if (n->rndr_table_header.flags == NULL) + return NULL; + for (i = 0; i < n->rndr_table_header.columns; i++) + n->rndr_table_header.flags[i] = + v->rndr_table_header.flags[i]; + break; + case LOWDOWN_TABLE_CELL: + n->rndr_table_cell.flags = + v->rndr_table_cell.flags; + n->rndr_table_cell.col = + v->rndr_table_cell.col; + n->rndr_table_cell.columns = + v->rndr_table_cell.columns; + break; + case LOWDOWN_IMAGE: + rc = hbuf_clone(&v->rndr_image.link, + &n->rndr_image.link) && + hbuf_clone(&v->rndr_image.title, + &n->rndr_image.title) && + hbuf_clone(&v->rndr_image.dims, + &n->rndr_image.dims) && + hbuf_clone(&v->rndr_image.alt, + &n->rndr_image.alt); + break; + case LOWDOWN_MATH_BLOCK: + n->rndr_math.blockmode = + v->rndr_math.blockmode; + break; + case LOWDOWN_BLOCKHTML: + rc = hbuf_clone(&v->rndr_blockhtml.text, + &n->rndr_blockhtml.text); + break; + default: + break; + } + + if (!rc) { + lowdown_node_free(n); + n = NULL; + } + + return n; +} + +/* + * Take the sub-tree "v" and clone it and all of the nodes beneath it, + * returning the cloned node. + * This starts using identifiers at "id". + */ +static struct lowdown_node * +node_clonetree(const struct lowdown_node *v, size_t *id) +{ + struct lowdown_node *n, *nn; + const struct lowdown_node *vv; + + if ((n = node_clone(v, *id++)) == NULL) + return NULL; + + TAILQ_FOREACH(vv, &v->children, entries) { + if ((nn = node_clonetree(vv, id)) == NULL) + goto out; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + } + + return n; +out: + lowdown_node_free(n); + return NULL; +} + +/* + * Count the number of words in a normal-text node. + */ +static size_t +node_countwords(const struct lowdown_node *n) +{ + const char *cp; + size_t i = 0, sz, words = 0; + + cp = n->rndr_normal_text.text.data; + sz = n->rndr_normal_text.text.size; + + /* Skip leading space. */ + + while (i < sz && + isspace((unsigned char)cp[i])) + i++; + + /* First go through word, then trailing space. */ + + while (i < sz) { + assert(!isspace((unsigned char)cp[i])); + words++; + while (i < sz && + !isspace((unsigned char)cp[i])) + i++; + while (i < sz && + isspace((unsigned char)cp[i])) + i++; + } + + return words; +} + +/* + * Like node_countwords(), except dupping individual words into a + * structure. + * Return zero on failure (memory), non-zero on success. + */ +static int +node_tokenise(const struct lowdown_node *n, + struct sesnode *toks, size_t toksz, char **savep) +{ + char *cp; + size_t i = 0, sz, words = 0; + + *savep = NULL; + + if (toksz == 0) + return 1; + + sz = n->rndr_normal_text.text.size; + *savep = cp = malloc(sz + 1); + if (cp == NULL) + return 0; + memcpy(cp, n->rndr_normal_text.text.data, sz); + cp[sz] = '\0'; + + *savep = cp; + + /* Skip leading space. */ + + if (i < sz) + toks[0].headsp = isspace((unsigned char)cp[0]); + + while (i < sz && + isspace((unsigned char)cp[i])) + i++; + + while (i < sz) { + assert(words < toksz); + assert(!isspace((unsigned char)cp[i])); + toks[words].buf = &cp[i]; + toks[words].bufsz = 0; + while (i < sz && + !isspace((unsigned char)cp[i])) { + toks[words].bufsz++; + i++; + } + words++; + if (i == sz) + break; + toks[words - 1].tailsp = 1; + assert(isspace((unsigned char)cp[i])); + cp[i++] = '\0'; + while (i < sz && + isspace((unsigned char)cp[i])) + i++; + } + return 1; +} + +static int +node_word_cmp(const void *p1, const void *p2) +{ + const struct sesnode *l1 = p1, *l2 = p2; + + if (l1->bufsz != l2->bufsz) + return 0; + return 0 == strncmp(l1->buf, l2->buf, l1->bufsz); +} + +/* + * Return zero on failure (memory), non-zero on success. + */ +static int +node_lcs(const struct lowdown_node *nold, + const struct lowdown_node *nnew, + struct lowdown_node *n, size_t *id) +{ + const struct sesnode *tmp; + struct lowdown_node *nn; + struct sesnode *newtok = NULL, *oldtok = NULL; + char *newtokbuf = NULL, *oldtokbuf = NULL; + size_t i, newtoksz, oldtoksz; + struct diff d; + int rc = 0; + + memset(&d, 0, sizeof(struct diff)); + + newtoksz = node_countwords(nnew); + oldtoksz = node_countwords(nold); + + newtok = calloc(newtoksz, sizeof(struct sesnode)); + if (newtok == NULL) + goto out; + oldtok = calloc(oldtoksz, sizeof(struct sesnode)); + if (oldtok == NULL) + goto out; + + if (!node_tokenise(nnew, newtok, newtoksz, &newtokbuf)) + goto out; + if (!node_tokenise(nold, oldtok, oldtoksz, &oldtokbuf)) + goto out; + + if (!diff(&d, node_word_cmp, sizeof(struct sesnode), + oldtok, oldtoksz, newtok, newtoksz)) + goto out; + + for (i = 0; i < d.sessz; i++) { + tmp = d.ses[i].e; + + if (tmp->headsp) { + nn = calloc(1, sizeof(struct lowdown_node)); + if (nn == NULL) + goto out; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + TAILQ_INIT(&nn->children); + + nn->type = LOWDOWN_NORMAL_TEXT; + nn->id = (*id)++; + nn->parent = n; + nn->rndr_normal_text.text.size = 1; + nn->rndr_normal_text.text.data = strdup(" "); + if (nn->rndr_normal_text.text.data == NULL) + goto out; + } + + nn = calloc(1, sizeof(struct lowdown_node)); + if (nn == NULL) + goto out; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + TAILQ_INIT(&nn->children); + + nn->type = LOWDOWN_NORMAL_TEXT; + nn->id = (*id)++; + nn->parent = n; + nn->rndr_normal_text.text.size = tmp->bufsz; + nn->rndr_normal_text.text.data = + calloc(1, tmp->bufsz + 1); + if (nn->rndr_normal_text.text.data == NULL) + goto out; + + memcpy(nn->rndr_normal_text.text.data, + tmp->buf, tmp->bufsz); + nn->chng = DIFF_DELETE == d.ses[i].type ? + LOWDOWN_CHNG_DELETE : + DIFF_ADD == d.ses[i].type ? + LOWDOWN_CHNG_INSERT : + LOWDOWN_CHNG_NONE; + + if (tmp->tailsp) { + nn = calloc(1, sizeof(struct lowdown_node)); + if (nn == NULL) + goto out; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + TAILQ_INIT(&nn->children); + nn->type = LOWDOWN_NORMAL_TEXT; + nn->id = (*id)++; + nn->parent = n; + nn->rndr_normal_text.text.size = 1; + nn->rndr_normal_text.text.data = strdup(" "); + if (nn->rndr_normal_text.text.data == NULL) + goto out; + } + } + + rc = 1; +out: + free(d.ses); + free(d.lcs); + free(newtok); + free(oldtok); + free(newtokbuf); + free(oldtokbuf); + return rc; +} + +/* + * Merge the new tree "nnew" with the old "nold" using a depth-first + * algorithm. + * The produced tree will show the new tree with deleted nodes from the + * old and inserted ones. + * It will also show moved nodes by delete/add pairs. + * This uses "Phase 5" semantics, but implements the merge algorithm + * without notes from the paper. + */ +static struct lowdown_node * +node_merge(const struct lowdown_node *nold, + const struct lowdown_node *nnew, struct merger *parms) +{ + const struct xnode *xnew, *xold; + struct lowdown_node *n, *nn; + const struct lowdown_node *nnold; + const struct xmap *xoldmap = parms->xoldmap, + *xnewmap = parms->xnewmap; + + /* + * Invariant: the current nodes are matched. + * Start by putting that node into the current output. + */ + + assert(nnew != NULL && nold != NULL ); + xnew = &xnewmap->nodes[nnew->id]; + xold = &xoldmap->nodes[nold->id]; + assert(xnew->match != NULL); + assert(xold->match != NULL); + assert(xnew->match == xold->node); + + if ((n = node_clone(nnew, parms->id++)) == NULL) + goto err; + + /* Now walk through the children on both sides. */ + + nold = TAILQ_FIRST(&nold->children); + nnew = TAILQ_FIRST(&nnew->children); + + while (nnew != NULL) { + /* + * Begin by flushing out all of the nodes that have been + * deleted from the old tree at this level. + * According to the paper, deleted nodes have no match. + * These will leave us with old nodes that are in the + * new tree (not necessarily at this level, though). + */ + + while (nold != NULL) { + xold = &xoldmap->nodes[nold->id]; + if (xold->match != NULL || + LOWDOWN_NORMAL_TEXT == nold->type) + break; + if ((nn = node_clonetree + (nold, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_DELETE; + nold = TAILQ_NEXT(nold, entries); + } + + /* + * Now flush inserted nodes. + * According to the paper, these have no match. + * This leaves us with nodes that are matched somewhere + * (not necessarily at this level) with the old. + */ + + while (nnew != NULL) { + xnew = &xnewmap->nodes[nnew->id]; + if (xnew->match != NULL || + LOWDOWN_NORMAL_TEXT == nnew->type) + break; + if ((nn = node_clonetree + (nnew, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_INSERT; + nnew = TAILQ_NEXT(nnew, entries); + } + + /* + * If both nodes are text nodes, then we want to run the + * LCS algorithm on them. + * This is an extension of the BULD algorithm. + */ + + if (nold != NULL && nnew != NULL && + nold->type == LOWDOWN_NORMAL_TEXT && + xold->match == NULL && + nnew->type == LOWDOWN_NORMAL_TEXT && + xnew->match == NULL) { + if (!node_lcs(nold, nnew, n, &parms->id)) + goto err; + nold = TAILQ_NEXT(nold, entries); + nnew = TAILQ_NEXT(nnew, entries); + } + + while (nold != NULL) { + xold = &xoldmap->nodes[nold->id]; + if (xold->match != NULL) + break; + if ((nn = node_clonetree + (nold, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_DELETE; + nold = TAILQ_NEXT(nold, entries); + } + + while (nnew != NULL) { + xnew = &xnewmap->nodes[nnew->id]; + if (xnew->match != NULL) + break; + if ((nn = node_clonetree + (nnew, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_INSERT; + nnew = TAILQ_NEXT(nnew, entries); + } + + /* Nothing more to do at this level? */ + + if (nnew == NULL) + break; + + /* + * Now we take the current new node and see if it's a + * match with a node in the current level. + * If it is, then we can flush out old nodes (moved, + * which we call deleted and re-inserted) until we get + * to the matching one. + * Then we'll be in lock-step with the old tree. + */ + + xnew = &xnewmap->nodes[nnew->id]; + assert(xnew->match != NULL); + + /* Scan ahead to find a matching old. */ + + for (nnold = nold; nnold != NULL ; ) { + xold = &xoldmap->nodes[nnold->id]; + if (xnew->node == xold->match) + break; + nnold = TAILQ_NEXT(nnold, entries); + } + + /* + * We did not find a match. + * This means that the new node has been moved from + * somewhere else in the tree. + */ + + if (nnold == NULL) { + if ((nn = node_clonetree + (nnew, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_INSERT; + nnew = TAILQ_NEXT(nnew, entries); + continue; + } + + /* Match found: flush old nodes til the match. */ + + while (nold != NULL) { + xold = &xoldmap->nodes[nold->id]; + if (xnew->node == xold->match) + break; + if ((nn = node_clonetree + (nold, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_DELETE; + nold = TAILQ_NEXT(nold, entries); + } + + assert(nold != NULL); + + /* + * Now we're in lock-step. + * Do the recursive step between the matched pair. + * Then continue on to the next nodes. + */ + + if (is_opaque(nnew)) { + assert(is_opaque(nold)); + if ((nn = node_clonetree + (nnew, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + } else { + assert(!is_opaque(nold)); + nn = node_merge(nold, nnew, parms); + if (nn == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + } + + nold = TAILQ_NEXT(nold, entries); + nnew = TAILQ_NEXT(nnew, entries); + } + + /* Flush remaining old nodes. */ + + while (nold != NULL) { + if ((nn = node_clonetree (nold, &parms->id)) == NULL) + goto err; + TAILQ_INSERT_TAIL(&n->children, nn, entries); + nn->parent = n; + nn->chng = LOWDOWN_CHNG_DELETE; + nold = TAILQ_NEXT(nold, entries); + } + + return n; +err: + lowdown_node_free(n); + return NULL; +} + +/* + * Optimise from top down. + * This works by selecting matching non-terminal nodes, both adjacent + * (i.e., children of the same adjacent nodes), and seeing if their + * immediate siblings may be matched by label. + * This works well when looking at pure-paragraph changes. + */ +static void +node_optimise_topdown(const struct lowdown_node *n, + struct xmap *newmap, struct xmap *oldmap) +{ + struct xnode *xn, *xmatch, *xnchild, + *xmchild, *xnnext, *xmnext; + const struct lowdown_node *match, *nchild, *mchild, + *nnext, *mnext; + + if (is_opaque(n) || TAILQ_EMPTY(&n->children)) + return; + + xn = &newmap->nodes[n->id]; + assert(xn != NULL); + + if ((match = xn->match) == NULL) + return; + + xmatch = &oldmap->nodes[match->id]; + assert(xmatch != NULL); + + TAILQ_FOREACH(nchild, &n->children, entries) { + if (is_opaque(nchild) || TAILQ_EMPTY(&nchild->children)) + continue; + xnchild = &newmap->nodes[nchild->id]; + assert(xnchild != NULL); + if ((mchild = xnchild->match) == NULL) + continue; + if (mchild->parent->id != match->id) + continue; + xmchild = &oldmap->nodes[mchild->id]; + assert(xmchild != NULL); + + /* + * Do we have a non-terminal sibling after us without a + * match? + */ + + if ((nnext = TAILQ_NEXT(nchild, entries)) == NULL) + continue; + if (is_opaque(nnext) || TAILQ_EMPTY(&nnext->children)) + continue; + xnnext = &newmap->nodes[nnext->id]; + assert(xnnext != NULL); + if (xnnext->match != NULL) + continue; + + if ((mnext = TAILQ_NEXT(mchild, entries)) == NULL) + continue; + if (is_opaque(mnext) || TAILQ_EMPTY(&mnext->children)) + continue; + xmnext = &oldmap->nodes[mnext->id]; + assert(xmnext != NULL); + if (xmnext->match != NULL) + continue; + + if (!match_eq(nnext, mnext)) + continue; + + xnnext->match = mnext; + xmnext->match = nnext; + } + + TAILQ_FOREACH(nchild, &n->children, entries) + node_optimise_topdown(nchild, newmap, oldmap); +} + +/* + * Optimise bottom-up over all un-matched nodes: examine all the + * children of the un-matched nodes and see which of their matches, if + * found, are under a root that's the same node as we are. + * This lets us compute the largest fraction of un-matched nodes' + * children that are in the same tree. + * If that fraction is >50%, then we consider that the subtrees are + * matched. + */ +static void +node_optimise_bottomup(const struct lowdown_node *n, + struct xmap *newmap, struct xmap *oldmap) +{ + const struct lowdown_node *nn, *on, *nnn, *maxn = NULL; + double w, maxw = 0.0, tw = 0.0; + + /* Ignore opaque nodes. */ + + if (is_opaque(n) || TAILQ_EMPTY(&n->children)) + return; + + /* Do a depth-first pre-order search. */ + + TAILQ_FOREACH(nn, &n->children, entries) { + tw += newmap->nodes[nn->id].weight; + node_optimise_bottomup(nn, newmap, oldmap); + } + + /* + * We're now at a non-leaf node. + * If we're already matched, then move on. + */ + + if (newmap->nodes[n->id].match != NULL) + return; + + TAILQ_FOREACH(nn, &n->children, entries) { + if (newmap->nodes[nn->id].match == NULL) + continue; + if ((on = newmap->nodes[nn->id].match->parent) == NULL) + continue; + if (on == maxn) + continue; + if (!match_eq(n, on)) + continue; + + /* + * We've now established "on" as the parent of the + * matched node, and that "on" is equivalent. + * See what fraction of on's children are matched to our + * children. + * FIXME: this will harmlessly (except in time) look at + * the same parent multiple times. + */ + + w = 0.0; + TAILQ_FOREACH(nnn, &n->children, entries) { + if (newmap->nodes[nnn->id].match == NULL) + continue; + if (on != newmap->nodes[nnn->id].match->parent) + continue; + w += newmap->nodes[nnn->id].weight; + } + + /* Is this the highest fraction? */ + + if (w > maxw) { + maxw = w; + maxn = on; + } + } + + /* See if we found any similar sub-trees. */ + + if (maxn == NULL) + return; + + /* + * Our magic breakpoint is 50%. + * If the matched sub-tree has a greater than 50% match by + * weight, then set us as a match! + */ + + if (maxw / tw >= 0.5) { + newmap->nodes[n->id].match = maxn; + oldmap->nodes[maxn->id].match = n; + } +} + +struct lowdown_node * +lowdown_diff(const struct lowdown_node *nold, + const struct lowdown_node *nnew, size_t *maxn) +{ + struct xmap xoldmap, xnewmap; + struct xnode *xnew, *xold; + struct pnodeq pq; + struct pnode *p; + const struct lowdown_node *n, *nn; + struct lowdown_node *comp = NULL; + size_t i; + struct merger parms; + + memset(&xoldmap, 0, sizeof(struct xmap)); + memset(&xnewmap, 0, sizeof(struct xmap)); + + TAILQ_INIT(&pq); + + /* + * First, assign signatures and weights. + * See "Phase 2", sec 5.2. + */ + + if (assign_sigs(NULL, &xoldmap, nold, 0) < 0.0) + goto out; + if (assign_sigs(NULL, &xnewmap, nnew, 0) < 0.0) + goto out; + + /* Prime the priority queue with the root. */ + + if (!pqueue(nnew, &xnewmap, &pq)) + goto out; + + /* + * Match-make while we have nodes in the priority queue. + * This is guaranteed to be finite. + * See "Phase 3", sec 5.2. + */ + + while ((p = TAILQ_FIRST(&pq)) != NULL) { + TAILQ_REMOVE(&pq, p, entries); + n = p->node; + free(p); + + xnew = &xnewmap.nodes[n->id]; + assert(xnew->match == NULL); + assert(xnew->optmatch == NULL); + assert(xnew->opt == 0); + + /* + * Look for candidates: if we have a matching signature, + * test for optimality. + * Highest optimality gets to be matched. + * See "Phase 3", sec. 5.2. + */ + + for (i = 0; i < xoldmap.maxid + 1; i++) { + xold = &xoldmap.nodes[i]; + if (xold->node == NULL) + continue; + if (xold->match != NULL) + continue; + if (strcmp(xnew->sig, xold->sig)) + continue; + + assert(xold->match == NULL); + candidate(xnew, &xnewmap, xold, &xoldmap); + } + + /* + * No match: enqueue children ("Phase 3" cont.). + * Ignore opaque nodes. + */ + + if (xnew->optmatch == NULL) { + if (is_opaque(n)) + continue; + TAILQ_FOREACH(nn, &n->children, entries) + if (!pqueue(nn, &xnewmap, &pq)) + goto out; + continue; + } + + /* + * Match found and is optimal. + * Now use the bottom-up and top-down (doesn't matter + * which order) algorithms. + * See "Phase 3", sec. 5.2. + */ + + assert(xnew->match == NULL); + assert(xoldmap.nodes[xnew->optmatch->id].match == NULL); + + match_down(xnew, &xnewmap, + &xoldmap.nodes[xnew->optmatch->id], &xoldmap); + match_up(xnew, &xnewmap, + &xoldmap.nodes[xnew->optmatch->id], &xoldmap); + } + + /* + * If our trees are *totally* different, we may end up in the + * situation where our root nodes are never matched. This will + * violate an invariant in node_merge() where the entry nodes + * are assumed to be matched. + */ + + if (xnewmap.nodes[nnew->id].match == NULL) { + assert(nnew->type == LOWDOWN_ROOT); + assert(nold->type == LOWDOWN_ROOT); + xnew = &xnewmap.nodes[nnew->id]; + xold = &xoldmap.nodes[nold->id]; + assert(xold->match == NULL); + xnew->match = xold->node; + xold->match = xnew->node; + } + + /* + * Following the above, make sure that our LOWDOWN_DOC_HEADER + * nodes are also matched, because they are fixed in the tree. + */ + + n = TAILQ_FIRST(&nnew->children); + nn = TAILQ_FIRST(&nold->children); + if (n != NULL && nn != NULL && + n->type == LOWDOWN_DOC_HEADER && + nn->type == LOWDOWN_DOC_HEADER) { + xnew = &xnewmap.nodes[n->id]; + xold = &xoldmap.nodes[nn->id]; + if (xnew->match == NULL) { + xnew->match = xold->node; + xold->match = xnew->node; + } + } + + /* + * All nodes have been processed. + * Now we need to optimise, so run a "Phase 4", sec. 5.2. + * Our optimisation is nothing like the paper's. + */ + + node_optimise_topdown(nnew, &xnewmap, &xoldmap); + node_optimise_bottomup(nnew, &xnewmap, &xoldmap); + + /* + * The tree is optimal. + * Now we need to compute the delta and merge the trees. + * See "Phase 5", sec. 5.2. + */ + + memset(&parms, 0, sizeof(struct merger)); + parms.xoldmap = &xoldmap; + parms.xnewmap = &xnewmap; + comp = node_merge(nold, nnew, &parms); + + *maxn = xnewmap.maxid > xoldmap.maxid ? + xnewmap.maxid + 1 : + xoldmap.maxid + 1; + +out: + assert(comp != NULL); + while ((p = TAILQ_FIRST(&pq)) != NULL) { + TAILQ_REMOVE(&pq, p, entries); + free(p); + } + free(xoldmap.nodes); + free(xnewmap.nodes); + return comp; +} diff --git a/diff.o b/diff.o Binary files differ. diff --git a/document.c b/document.c @@ -0,0 +1,4738 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * Make sure these are larger than enum hlist_fl. + */ +#define HLIST_LI_END (1 << 6) /* End of list item. */ + +/* + * Mask of all list item types. + */ +#define HLIST_FL_MASK (HLIST_FL_DEF | \ + HLIST_FL_ORDERED | \ + HLIST_FL_UNORDERED) + +/* + * Reference to a link. + */ +struct link_ref { + struct lowdown_buf *name; /* id of link (or NULL) */ + struct lowdown_buf *link; /* link address */ + struct lowdown_buf *title; /* optional title */ + struct lowdown_buf *attrs; /* optional attributes */ + TAILQ_ENTRY(link_ref) entries; +}; + +TAILQ_HEAD(link_refq, link_ref); + +/* + * Reference to a footnote. This keeps track of all footnotes + * definitions and whether there's both a definition and reference. + */ +struct foot_ref { + size_t num; /* if used, the order */ + struct lowdown_node *ref; /* if used, the reference */ + struct lowdown_buf name; /* identifier */ + struct lowdown_buf contents; /* definition */ + TAILQ_ENTRY(foot_ref) entries; +}; + +TAILQ_HEAD(foot_refq, foot_ref); + +struct lowdown_doc { + struct link_refq refq; /* all internal references */ + struct foot_refq footq; /* all footnotes */ + size_t foots; /* # of used footnotes */ + int active_char[256]; /* jump table */ + unsigned int ext_flags; /* options */ + int in_link_body; /* parsing link body */ + int in_footnote; /* prevent nested */ + size_t nodes; /* number of nodes */ + struct lowdown_node *current; /* current node */ + struct lowdown_metaq *metaq; /* raw metadata key/values */ + size_t depth; /* current parse tree depth */ + size_t maxdepth; /* max parse tree depth */ + char **meta; /* primer metadata */ + size_t metasz; /* size of meta */ + char **metaovr; /* override metadata */ + size_t metaovrsz; /* size of metaovr */ +}; + +/* + * Function pointer to render active chars, where "data" is the pointer + * of the beginning of the span and "offset" is the number of valid + * chars before data. + * Returns the number of chars taken care of, or <0 on failure. + */ +typedef ssize_t (*char_trigger)(struct lowdown_doc *, char *, size_t, size_t); + +static ssize_t char_emphasis(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_linebreak(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_codespan(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_escape(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_entity(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_langle_tag(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_autolink_url(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_autolink_email(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_autolink_www(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_link(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_image(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_superscript(struct lowdown_doc *, char *, size_t, size_t); +static ssize_t char_math(struct lowdown_doc *, char *, size_t, size_t); + +enum markdown_char_t { + MD_CHAR_NONE = 0, + MD_CHAR_EMPHASIS, + MD_CHAR_CODESPAN, + MD_CHAR_LINEBREAK, + MD_CHAR_LINK, + MD_CHAR_IMAGE, + MD_CHAR_LANGLE, + MD_CHAR_ESCAPE, + MD_CHAR_ENTITY, + MD_CHAR_AUTOLINK_URL, + MD_CHAR_AUTOLINK_EMAIL, + MD_CHAR_AUTOLINK_WWW, + MD_CHAR_SUPERSCRIPT, + MD_CHAR_QUOTE, + MD_CHAR_MATH +}; + +static const char_trigger markdown_char_ptrs[] = { + NULL, + &char_emphasis, + &char_codespan, + &char_linebreak, + &char_link, + &char_image, + &char_langle_tag, + &char_escape, + &char_entity, + &char_autolink_url, + &char_autolink_email, + &char_autolink_www, + &char_superscript, + NULL, + &char_math +}; + +static int +parse_block(struct lowdown_doc *, char *, size_t); + +static ssize_t +parse_listitem(struct lowdown_buf *, struct lowdown_doc *, + char *, size_t, enum hlist_fl *, size_t); + +/* + * Add a node to the parse stack, or retrieve a current node if + * requesting multiple LOWDOWN_NORMAL_TEXTs in sequence. Returns the + * node, initialised to the given type, after adjusting the parse + * position. Returns NULL on memory allocation failure. + */ +static struct lowdown_node * +pushnode(struct lowdown_doc *doc, enum lowdown_rndrt t) +{ + struct lowdown_node *n; + + /* + * Special case: if we're pushing a NORMAL_TEXT node, see if one + * already exists and return that. This means that each push + * for text nodes should be careful to use hbuf_push() instead + * of hbuf_create() when adding text content. + */ + + if (t == LOWDOWN_NORMAL_TEXT && doc->current != NULL) { + n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); + if (n != NULL && n->type == t) { + doc->depth++; + doc->current = n; + return n; + } + } + + /* New node. */ + + if ((doc->depth++ > doc->maxdepth) && doc->maxdepth) + return NULL; + if ((n = calloc(1, sizeof(struct lowdown_node))) == NULL) + return NULL; + + n->id = doc->nodes++; + n->type = t; + n->parent = doc->current; + TAILQ_INIT(&n->children); + if (n->parent != NULL) + TAILQ_INSERT_TAIL(&n->parent->children, n, entries); + doc->current = n; + return n; +} + +/* + * Sets a buffer with the contents of "data" of size "datasz". The + * buffer must be empty. Return FALSE on failure, TRUE on success. + */ +static int +hbuf_create(struct lowdown_buf *buf, const char *data, size_t datasz) +{ + + assert(buf->size == 0); + assert(buf->data == NULL); + memset(buf, 0, sizeof(struct lowdown_buf)); + if (datasz) { + if ((buf->data = malloc(datasz)) == NULL) + return 0; + buf->unit = 1; + buf->size = buf->maxsize = datasz; + memcpy(buf->data, data, datasz); + } + return 1; +} + +/* + * See hbuf_create(). + */ +static int +hbuf_createb(struct lowdown_buf *buf, const struct lowdown_buf *nbuf) +{ + + return hbuf_create(buf, nbuf->data, nbuf->size); +} + +/* + * Pushes data into the buffer, which is initialised if empty. Return + * FALSE on failure, TRUE on success. + */ +static int +hbuf_push(struct lowdown_buf *buf, const char *data, size_t datasz) +{ + + if (buf->size == 0 || buf->data == NULL) + return hbuf_create(buf, data, datasz); + return hbuf_put(buf, data, datasz); +} + + +/* + * See pushnode(). + * Pops the current node on the stack, replacing it with the parent. + */ +static void +popnode(struct lowdown_doc *doc, const struct lowdown_node *n) +{ + + assert(doc->depth > 0); + doc->depth--; + assert(doc->current == n); + doc->current = doc->current->parent; +} + +/* + * Remove the backslash from a text sequence. + * Return zero on failure (memory), non-zero on success. + */ +static int +unscape_text(struct lowdown_buf *ob, struct lowdown_buf *src) +{ + size_t i, org; + + for (i = 0; i < src->size; i += 2) { + org = i; + while (i < src->size && src->data[i] != '\\') + i++; + if (i > org && + !hbuf_put(ob, src->data + org, i - org)) + return 0; + if (i + 1 >= src->size) + break; + if (!hbuf_putc(ob, src->data[i + 1])) + return 0; + } + + return 1; +} + +static struct link_ref * +find_link_ref(struct link_refq *q, char *name, size_t length) +{ + struct link_ref *ref; + + TAILQ_FOREACH(ref, q, entries) + if ((ref->name == NULL && length == 0) || + (ref->name != NULL && + ref->name->size == length && + memcmp(ref->name->data, name, length) == 0)) + return ref; + + return NULL; +} + +static void +free_link_refs(struct link_refq *q) +{ + struct link_ref *r; + + while ((r = TAILQ_FIRST(q)) != NULL) { + TAILQ_REMOVE(q, r, entries); + hbuf_free(r->link); + hbuf_free(r->name); + hbuf_free(r->title); + hbuf_free(r->attrs); + free(r); + } +} + +static void +free_foot_refq(struct foot_refq *q) +{ + struct foot_ref *ref; + + while ((ref = TAILQ_FIRST(q)) != NULL) { + TAILQ_REMOVE(q, ref, entries); + hbuf_free(&ref->contents); + hbuf_free(&ref->name); + free(ref); + } +} + +/* + * Check whether a char is a Markdown spacing char. + * Right now we only consider spaces the actual space and a newline: + * tabs and carriage returns are filtered out during the preprocessing + * phase. + * If we wanted to actually be UTF-8 compliant, we should instead + * extract an Unicode codepoint from this character and check for space + * properties. + */ +static int +xisspace(int c) +{ + + return c == ' ' || c == '\n'; +} + +/* + * Returns the number of leading spaces from data starting from offset + * to size. + * If maxlen is greater than zero, only at most maxlen number of leading + * spaces will be counted. + * Otherwise, all leading spaces will be counted. + */ +static size_t +countspaces(const char *data, size_t offset, size_t size, size_t maxlen) +{ + size_t i; + + for (i = offset; i < size; i++) { + if (maxlen > 0 && i - offset == maxlen) + break; + if (data[i] != ' ') + break; + } + + return i; +} + +/* + * Replace all spacing characters in data with spaces. + * As a special case, this collapses a newline with the previous space, + * if possible. + * Return zero on failure (memory), non-zero on success. + */ +static int +replace_spacing(struct lowdown_buf *ob, const char *data, size_t size) +{ + size_t i, mark; + + if (!hbuf_grow(ob, size)) + return 0; + + for (i = 0; ; i++) { + mark = i; + while (i < size && data[i] != '\n') + i++; + if (!hbuf_put(ob, data + mark, i - mark)) + return 0; + if (i >= size) + break; + if (!(i > 0 && data[i - 1] == ' ')) + if (!hbuf_putc(ob, ' ')) + return 0; + } + + return 1; +} + +/* + * Looks for the address part of a mail autolink and '>'. + * This is less strict than the original markdown e-mail address + * matching. + */ +static size_t +is_mail_autolink(const char *data, size_t size) +{ + size_t i, nb = 0; + + /* Assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'. */ + + for (i = 0; i < size; ++i) { + if (isalnum((unsigned char)data[i])) + continue; + + switch (data[i]) { + case '@': + nb++; + case '-': + case '.': + case '_': + break; + case '>': + return (nb == 1) ? i + 1 : 0; + default: + return 0; + } + } + + return 0; +} + +/* + * Returns the length of the given tag, or 0 is it's not valid. + */ +static size_t +tag_length(const char *data, size_t size, enum halink_type *ltype) +{ + size_t i, j; + + /* A valid tag can't be shorter than 3 chars. */ + + if (size < 3) + return 0; + + if (data[0] != '<') + return 0; + + /* HTML comment, laxist form. */ + + if (size > 5 && data[1] == '!' && + data[2] == '-' && data[3] == '-') { + i = 5; + while (i < size && !(data[i - 2] == '-' && + data[i - 1] == '-' && data[i] == '>')) + i++; + i++; + if (i <= size) + return i; + } + + /* + * Begins with a '<' optionally followed by '/', followed by letter or + * number. + */ + + i = (data[1] == '/') ? 2 : 1; + + if (!isalnum((unsigned char)data[i])) + return 0; + + /* Scheme test. */ + + *ltype = HALINK_NONE; + + /* Try to find the beginning of an URI. */ + + while (i < size && (isalnum((unsigned char)data[i]) || + data[i] == '.' || data[i] == '+' || data[i] == '-')) + i++; + + if (i > 1 && data[i] == '@') + if ((j = is_mail_autolink(data + i, size - i)) != 0) { + *ltype = HALINK_EMAIL; + return i + j; + } + + if (i > 2 && data[i] == ':') { + *ltype = HALINK_NORMAL; + i++; + } + + /* Completing autolink test: no spacing or ' or ". */ + + if (i >= size) + *ltype = HALINK_NONE; + else if (*ltype) { + j = i; + while (i < size) { + if (data[i] == '\\') + i += 2; + else if (data[i] == '>' || data[i] == '\'' || + data[i] == '"' || data[i] == ' ' || + data[i] == '\n') + break; + else + i++; + } + + if (i >= size) + return 0; + if (i > j && data[i] == '>') + return i + 1; + + /* One of the forbidden chars has been found. */ + + *ltype = HALINK_NONE; + } + + /* Looking for something looking like a tag end. */ + + while (i < size && data[i] != '>') + i++; + if (i >= size) + return 0; + return i + 1; +} + +/* + * Parses inline markdown elements. + * This function is important because it handles raw input that we pass + * directly to the output formatter ("normal_text"). + * Return zero on failure, non-zero on success. + */ +static int +parse_inline(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t i = 0, end = 0, consumed = 0; + ssize_t rc; + struct lowdown_buf work; + const int *active_char = doc->active_char; + struct lowdown_node *n; + + memset(&work, 0, sizeof(struct lowdown_buf)); + + while (i < size) { + /* Copying non-macro chars into the output. */ + + while (end < size && + active_char[(unsigned char)data[end]] == 0) + end++; + + /* Only allocate if non-empty... */ + + if (end - i > 0) { + n = pushnode(doc, LOWDOWN_NORMAL_TEXT); + if (n == NULL) + return 0; + if (!hbuf_push(&n->rndr_normal_text.text, + data + i, end - i)) + return 0; + popnode(doc, n); + } + + /* End of file? */ + + if (end >= size) + break; + + i = end; + rc = markdown_char_ptrs[ + active_char[(unsigned char)data[end]]] + (doc, data + i, i - consumed, size - i); + if (rc < 0) + return 0; + end = rc; + + /* Check if no action from the callback. */ + + if (end == 0) { + end = i + 1; + continue; + } + + i += end; + end = consumed = i; + } + + return 1; +} + +/* + * Returns whether special char at data[loc] is escaped by '\\'. + */ +static int +is_escaped(const char *data, size_t loc) +{ + size_t i = loc; + + while (i >= 1 && data[i - 1] == '\\') + i--; + + /* Odd numbers of backslashes escapes data[loc]. */ + + return (loc - i) % 2; +} + +/* + * Looks for the next emph char, skipping other constructs. + */ +static size_t +find_emph_char(const char *data, size_t size, char c) +{ + size_t i = 0, span_nb, bt, tmp_i; + char cc; + + while (i < size) { + while (i < size && data[i] != c && + data[i] != '[' && data[i] != '`') + i++; + + if (i == size) + return 0; + + /* Not counting escaped chars. */ + + if (is_escaped(data, i)) { + i++; + continue; + } + + if (data[i] == c) + return i; + + /* Skipping a codespan. */ + + if (data[i] == '`') { + span_nb = 0; + tmp_i = 0; + + /* Counting the number of opening backticks. */ + + while (i < size && data[i] == '`') { + i++; + span_nb++; + } + + if (i >= size) + return 0; + + /* Finding the matching closing sequence. */ + + bt = 0; + while (i < size && bt < span_nb) { + if (!tmp_i && data[i] == c) + tmp_i = i; + + if (data[i] == '`') + bt++; + else + bt = 0; + i++; + } + + /* + * Not a well-formed codespan; use found + * matching emph char. + */ + if (bt < span_nb && i >= size) + return tmp_i; + } else if (data[i] == '[') { + tmp_i = 0; + + /* Skipping a link. */ + + i++; + while (i < size && data[i] != ']') { + if (!tmp_i && data[i] == c) + tmp_i = i; + i++; + } + + i++; + while (i < size && xisspace(data[i])) + i++; + + if (i >= size) + return tmp_i; + + switch (data[i]) { + case '[': + cc = ']'; + break; + case '(': + cc = ')'; + break; + default: + if (tmp_i) + return tmp_i; + else + continue; + } + + i++; + while (i < size && data[i] != cc) { + if (!tmp_i && data[i] == c) + tmp_i = i; + i++; + } + + if (i >= size) + return tmp_i; + + i++; + } + } + + return 0; +} + +/* + * Parsing single emphase. + * Closed by a symbol not preceded by spacing and not followed by + * symbol. + * Return 0 if not an emphasis, <0 on failure, >0 on success. + */ +static ssize_t +parse_emph1(struct lowdown_doc *doc, char *data, size_t size, char c) +{ + size_t i = 0, len; + struct lowdown_node *n; + + /* Skipping one symbol if coming from emph3. */ + + if (size > 1 && data[0] == c && data[1] == c) + i = 1; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) + return 0; + i += len; + if (i >= size) + return 0; + + if (data[i] == c && !xisspace(data[i - 1])) { + if ((doc->ext_flags & LOWDOWN_NOINTEM) && + i + 1 < size && + isalnum((unsigned char)data[i + 1])) + continue; + + n = pushnode(doc, LOWDOWN_EMPHASIS); + if (n == NULL) + return -1; + if (!parse_inline(doc, data, i)) + return -1; + popnode(doc, n); + return i + 1; + } + } + + return 0; +} + +/* + * Parsing single emphase. + * Return 0 if not an emphasis, <0 on failure, >0 on success. + */ +static ssize_t +parse_emph2(struct lowdown_doc *doc, char *data, size_t size, char c) +{ + size_t i = 0, len; + struct lowdown_node *n; + enum lowdown_rndrt t; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (len == 0) + return 0; + i += len; + + if (i + 1 < size && data[i] == c && + data[i + 1] == c && i && + !xisspace(data[i - 1])) { + if (c == '~') + t = LOWDOWN_STRIKETHROUGH; + else if (c == '=') + t = LOWDOWN_HIGHLIGHT; + else + t = LOWDOWN_DOUBLE_EMPHASIS; + + if ((n = pushnode(doc, t)) == NULL) + return -1; + if (!parse_inline(doc, data, i)) + return -1; + popnode(doc, n); + return i + 2; + } + i++; + } + + return 0; +} + +/* + * Parsing single emphase + * Finds the first closing tag, and delegates to the other emph. + * Return 0 if not an emphasis, <0 on failure, >0 on success. + */ +static size_t +parse_emph3(struct lowdown_doc *doc, char *data, size_t size, char c) +{ + size_t i = 0, len; + ssize_t rc; + struct lowdown_node *n; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (len == 0) + return 0; + i += len; + + /* Skip spacing preceded symbols. */ + + if (data[i] != c || xisspace(data[i - 1])) + continue; + + /* Case for triple, double, and single asterisk. */ + + if (i + 2 < size && data[i + 1] == c && + data[i + 2] == c) { + n = pushnode(doc, LOWDOWN_TRIPLE_EMPHASIS); + if (n == NULL) + return -1; + if (!parse_inline(doc, data, i)) + return -1; + popnode(doc, n); + return i + 3; + } else if (i + 1 < size && data[i + 1] == c) { + rc = parse_emph1(doc, data - 2, size + 2, c); + if (rc < 0) + return -1; + assert(rc == 0 || rc >= 2); + return rc == 0 ? 0 : rc - 2; + } else { + rc = parse_emph2(doc, data - 1, size + 1, c); + if (rc < 0) + return -1; + return rc == 0 ? 0 : rc - 1; + } + } + + return 0; +} + +/* + * Parses a math span until the given ending delimiter. + * Return 0 if not math, <0 on failure, >0 on success. + */ +static ssize_t +parse_math(struct lowdown_doc *doc, char *data, size_t offset, + size_t size, const char *end, size_t delimsz, int blockmode) +{ + size_t i; + struct lowdown_node *n; + + /* + * Find ending delimiter. + * All text within the equation is opaque, so we don't need to + * care about embedded macros. + */ + + for (i = delimsz; ; i++) { + while (i < size && data[i] != end[0]) + i++; + if (i >= size) + return 0; + if (!is_escaped(data, i) && !(i + delimsz > size) && + memcmp(data + i, end, delimsz) == 0) + break; + } + + i += delimsz; + + if (!(doc->ext_flags & LOWDOWN_MATH)) { + n = pushnode(doc, LOWDOWN_NORMAL_TEXT); + if (n == NULL) + return -1; + if (!hbuf_push(&n->rndr_normal_text.text, data, i)) + return -1; + popnode(doc, n); + return i; + } + + n = pushnode(doc, LOWDOWN_MATH_BLOCK); + if (n == NULL) + return -1; + if (!hbuf_create(&n->rndr_math.text, + data + delimsz, i - 2 * delimsz)) + return -1; + n->rndr_math.blockmode = blockmode; + popnode(doc, n); + return i; +} + +/* + * Single and double emphasis parsing. + */ +static ssize_t +char_emphasis(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + char c = data[0]; + ssize_t ret; + + if (doc->ext_flags & LOWDOWN_NOINTEM) + if (offset > 0 && !xisspace(data[-1]) && + data[-1] != '>' && data[-1] != '(') + return 0; + + /* + * Spacing cannot follow an opening emphasis: strikethrough and + * highlight only takes '~~'. + * FIXME: don't depend upon the "ret =" as the last part of an + * "or" chain---it's hard to read. + */ + + if (size > 2 && data[1] != c) { + if (c == '~' || c == '=' || xisspace(data[1]) || + (ret = parse_emph1 + (doc, data + 1, size - 1, c)) == 0) + return 0; + return ret > 0 ? ret + 1 : ret; + } + + if (size > 3 && data[1] == c && data[2] != c) { + if (xisspace(data[2]) || + (ret = parse_emph2 + (doc, data + 2, size - 2, c)) == 0) + return 0; + return ret > 0 ? ret + 2 : ret; + } + + if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { + if (c == '~' || c == '=' || xisspace(data[3]) || + (ret = parse_emph3 + (doc, data + 3, size - 3, c)) == 0) + return 0; + return ret > 0 ? ret + 3 : ret; + } + + return 0; +} + +/* + * '\n' preceded by two spaces (assuming linebreak != 0) + */ +static ssize_t +char_linebreak(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_node *n; + size_t w; + struct lowdown_buf *b; + + if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') + return 0; + + /* Removing the last space from nodes. */ + + assert(doc->current != NULL); + n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); + assert(n != NULL && LOWDOWN_NORMAL_TEXT == n->type); + b = &n->rndr_normal_text.text; + + while (b->size && b->data[b->size - 1] == ' ') + b->size--; + + /* + * Swallow leading white-space of next line. + * XXX: is this just CommonMark? + */ + + for (w = 1; w < size; w++) + if (data[w] != ' ') + break; + + if ((n = pushnode(doc, LOWDOWN_LINEBREAK)) == NULL) + return -1; + popnode(doc, n); + return w; +} + +/* + * '`' parsing a code span (assuming codespan != 0) + */ +static ssize_t +char_codespan(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf work; + struct lowdown_node *n; + size_t end, nb = 0, i, f_begin, f_end; + + memset(&work, 0, sizeof(struct lowdown_buf)); + + /* Counting the number of backticks in the delimiter. */ + + while (nb < size && data[nb] == '`') + nb++; + + /* Finding the next delimiter. */ + + i = 0; + for (end = nb; end < size && i < nb; end++) { + if (data[end] == '`') + i++; + else + i = 0; + } + + if (i < nb && end >= size) + return 0; /* no matching delimiter */ + + /* Trimming outside spaces. */ + + f_begin = countspaces(data, nb, end, 0); + + f_end = end - nb; + while (f_end > nb && data[f_end-1] == ' ') + f_end--; + + /* Real code span. */ + + if ((n = pushnode(doc, LOWDOWN_CODESPAN)) == NULL) + return -1; + + if (f_begin < f_end) { + work.data = data + f_begin; + work.size = f_end - f_begin; + if (!hbuf_createb(&n->rndr_codespan.text, &work)) + return -1; + } + + popnode(doc, n); + + return end; +} + +/* + * '\\' backslash escape + */ +static ssize_t +char_escape(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + static const char *escape_chars = + "\\`*_{}[]()#+-.!:|&<>^~=\"$"; + struct lowdown_buf work; + size_t w; + ssize_t ret; + const char *end; + struct lowdown_node *n; + + memset(&work, 0, sizeof(struct lowdown_buf)); + + if (size > 1) { + if (data[1] == '\\' && + (doc->ext_flags & LOWDOWN_MATH) && + size > 2 && + (data[2] == '(' || data[2] == '[')) { + end = (data[2] == '[') ? "\\\\]" : "\\\\)"; + ret = parse_math(doc, data, offset, + size, end, 3, data[2] == '['); + if (ret != 0) + return ret; + } + + /* Swallow leading white-space of next line. */ + + if (LOWDOWN_COMMONMARK & doc->ext_flags && + data[1] == '\n') { + for (w = 2; w < size; w++) + if (data[w] != ' ') + break; + n = pushnode(doc, LOWDOWN_LINEBREAK); + if (n == NULL) + return -1; + popnode(doc, n); + return w; + } + + if (strchr(escape_chars, data[1]) == NULL) + return 0; + if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) + return -1; + if (!hbuf_push(&n->rndr_normal_text.text, data + 1, 1)) + return -1; + popnode(doc, n); + } else if (size == 1) { + if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) + return -1; + if (!hbuf_push(&n->rndr_normal_text.text, data, 1)) + return -1; + popnode(doc, n); + } + + return 2; +} + +/* + * '&': parse entity, or escape if it's not an entity. + * Valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; + */ +static ssize_t +char_entity(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + size_t end = 1; + struct lowdown_node *n; + + if (end < size && data[end] == '#') + end++; + + while (end < size && isalnum((unsigned char)data[end])) + end++; + + if (end < size && data[end] == ';') + end++; /* real entity */ + else + return 0; /* lone '&' */ + + if ((n = pushnode(doc, LOWDOWN_ENTITY)) == NULL) + return -1; + if (!hbuf_create(&n->rndr_entity.text, data, end)) + return -1; + popnode(doc, n); + return end; +} + +/* + * '<': parse link when tags or autolinks are allowed. + */ +static ssize_t +char_langle_tag(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf work; + struct lowdown_buf *u_link = NULL; + enum halink_type altype = HALINK_NONE; + size_t end = tag_length(data, size, &altype); + int ret = 0; + struct lowdown_node *n; + + memset(&work, 0, sizeof(struct lowdown_buf)); + + work.data = data; + work.size = end; + + if (end > 2) { + if (altype != HALINK_NONE) { + if ((u_link = hbuf_new(64)) == NULL) + goto err; + work.data = data + 1; + work.size = end - 2; + if (!unscape_text(u_link, &work)) + goto err; + + n = pushnode(doc, LOWDOWN_LINK_AUTO); + if (n == NULL) + goto err; + n->rndr_autolink.type = altype; + if (!hbuf_createb(&n->rndr_autolink.link, u_link)) + goto err; + popnode(doc, n); + } else { + n = pushnode(doc, LOWDOWN_RAW_HTML); + if (n == NULL) + goto err; + if (!hbuf_create + (&n->rndr_raw_html.text, data, end)) + goto err; + popnode(doc, n); + } + ret = 1; + } + + hbuf_free(u_link); + return !ret ? 0 : end; +err: + hbuf_free(u_link); + return -1; +} + +/* + * 'w': parse URL when autolinking is allowed (from "www"). + */ +static ssize_t +char_autolink_www(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf *link = NULL, *link_url = NULL; + size_t link_len, rewind; + struct lowdown_node *n; + ssize_t ret; + + if (doc->in_link_body) + return 0; + if ((link = hbuf_new(64)) == NULL) + goto err; + ret = halink_www(&rewind, link, data, offset, size); + if (ret < 0) + goto err; + link_len = ret; + + if (link_len > 0) { + if ((link_url = hbuf_new(64)) == NULL) + goto err; + if (!HBUF_PUTSL(link_url, "http://")) + goto err; + if (!hbuf_put(link_url, link->data, link->size)) + goto err; + + if (doc->current && + (n = TAILQ_LAST(&doc->current->children, + lowdown_nodeq)) != NULL && + n->type == LOWDOWN_NORMAL_TEXT) { + if (n->rndr_normal_text.text.size > rewind) + n->rndr_normal_text.text.size -= rewind; + else + n->rndr_normal_text.text.size = 0; + } + + if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) + goto err; + n->rndr_autolink.type = HALINK_NORMAL; + if (!hbuf_createb(&n->rndr_autolink.link, link_url)) + goto err; + popnode(doc, n); + } + + hbuf_free(link); + hbuf_free(link_url); + return link_len; +err: + hbuf_free(link); + hbuf_free(link_url); + return -1; +} + +/* + * '@': parse email when autolinking is allowed (from the at sign). + */ +static ssize_t +char_autolink_email(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf *link = NULL; + size_t link_len, rewind; + ssize_t ret; + struct lowdown_node *n; + + if (doc->in_link_body) + return 0; + if ((link = hbuf_new(64)) == NULL) + goto err; + ret = halink_email(&rewind, link, data, offset, size); + if (ret < 0) + goto err; + link_len = ret; + + if (link_len > 0) { + if (doc->current && + (n = TAILQ_LAST(&doc->current->children, + lowdown_nodeq)) != NULL && + n->type == LOWDOWN_NORMAL_TEXT) { + if (n->rndr_normal_text.text.size > rewind) + n->rndr_normal_text.text.size -= rewind; + else + n->rndr_normal_text.text.size = 0; + } + + if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) + goto err; + n->rndr_autolink.type = HALINK_EMAIL; + if (!hbuf_createb(&n->rndr_autolink.link, link)) + goto err; + popnode(doc, n); + } + + hbuf_free(link); + return link_len; +err: + hbuf_free(link); + return -1; +} + +/* + * ':': parse URL when autolinking is allowed (from the schema). + */ +static ssize_t +char_autolink_url(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf *link = NULL; + size_t link_len, rewind; + struct lowdown_node *n; + ssize_t ret; + + if (doc->in_link_body) + return 0; + if ((link = hbuf_new(64)) == NULL) + goto err; + ret = halink_url(&rewind, link, data, offset, size); + if (ret < 0) + goto err; + link_len = ret; + + if (link_len > 0) { + if (doc->current && + (n = TAILQ_LAST(&doc->current->children, + lowdown_nodeq)) != NULL && + n->type == LOWDOWN_NORMAL_TEXT) { + if (n->rndr_normal_text.text.size > rewind) + n->rndr_normal_text.text.size -= rewind; + else + n->rndr_normal_text.text.size = 0; + } + + if ((n = pushnode(doc, LOWDOWN_LINK_AUTO)) == NULL) + goto err; + n->rndr_autolink.type = HALINK_NORMAL; + if (!hbuf_createb(&n->rndr_autolink.link, link)) + goto err; + popnode(doc, n); + } + + hbuf_free(link); + return link_len; +err: + hbuf_free(link); + return -1; +} + +/* + * '!': parse an image. + */ +static ssize_t +char_image(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + ssize_t ret; + + if (size < 2 || data[1] != '[') + return 0; + + ret = char_link(doc, data + 1, offset + 1, size - 1); + return ret <= 0 ? ret : ret + 1; +} + +/* + * Parse extended attributes from the buffer "data". The buffer should + * not have any enclosing characters, e.g., { foo }. Return 0 on + * failure or position of *next* word. + */ +static size_t +parse_ext_attrs(const char *data, size_t size, + struct lowdown_buf **attrid, + struct lowdown_buf **attrcls, + struct lowdown_buf **attrwidth, + struct lowdown_buf **attrheight) +{ + size_t word_b, word_e; + + word_b = 0; + + while (word_b < size) { + while (word_b < size && data[word_b] == ' ') + word_b++; + word_e = word_b; + while (word_e < size && data[word_e] != ' ') + word_e++; + + /* Classes. */ + + if (attrid != NULL && + word_e > word_b + 1 && + data[word_b] == '#') { + if (*attrid == NULL && + (*attrid = hbuf_new(64)) == NULL) + return 0; + hbuf_truncate(*attrid); + if (!hbuf_put(*attrid, + data + word_b + 1, word_e - word_b - 1)) + return 0; + } + + if (attrwidth != NULL && + word_e > word_b + 7 && + strncasecmp(&data[word_b], "width=", 6) == 0) { + if (*attrwidth == NULL && + (*attrwidth = hbuf_new(64)) == NULL) + return 0; + hbuf_truncate(*attrwidth); + if (!hbuf_put(*attrwidth, + data + word_b + 6, word_e - word_b - 6)) + return 0; + } + if (attrheight != NULL && + word_e > word_b + 8 && + strncasecmp(&data[word_b], "height=", 7) == 0) { + if (*attrheight == NULL && + (*attrheight = hbuf_new(64)) == NULL) + return 0; + hbuf_truncate(*attrheight); + if (!hbuf_put(*attrheight, + data + word_b + 7, word_e - word_b - 7)) + return 0; + } + + if (attrcls != NULL && + word_e > word_b + 1 && + data[word_b] == '.') { + if (*attrcls != NULL && + !hbuf_putc(*attrcls, ' ')) + return 0; + if (*attrcls == NULL && + (*attrcls = hbuf_new(64)) == NULL) + return 0; + if (!hbuf_put(*attrcls, + data + word_b + 1, word_e - word_b - 1)) + return 0; + } + word_b = word_e + 1; + } + + return word_b; +} + +/* + * Parse a header's extended attributes. Return FALSE on failure, TRUE + * on success. + */ +static int +parse_header_ext_attrs(struct lowdown_node *n) +{ + struct lowdown_node *nn; + struct lowdown_buf *b, *attrid = NULL, *attrcls = NULL; + size_t i; + int rc = 0; + + /* + * The last node on the line must be non-empty normal text and + * must end with a '}'. + */ + + nn = TAILQ_LAST(&n->children, lowdown_nodeq); + if (nn == NULL || + nn->type != LOWDOWN_NORMAL_TEXT || + nn->rndr_normal_text.text.size == 0 || + nn->rndr_normal_text.text.data + [nn->rndr_normal_text.text.size - 1] != '}') + return 1; + + /* Scan from the trailing '}' to the opening '{'. */ + + b = &nn->rndr_normal_text.text; + assert(b->size && b->data[b->size - 1] == '}'); + for (i = b->size - 1; i > 0; i--) + if (b->data[i] == '{') + break; + if (b->data[i] != '{') + return 1; + + /* Parse the extended attributes. */ + + if (!parse_ext_attrs(&b->data[i + 1], b->size - i - 2, + &attrid, &attrcls, NULL, NULL)) + goto out; + + if (attrid != NULL && + !hbuf_createb(&n->rndr_header.attr_id, attrid)) + goto out; + if (attrcls != NULL && + !hbuf_createb(&n->rndr_header.attr_cls, attrcls)) + goto out; + + b->size = i; + while (b->size && b->data[b->size - 1] == ' ') + b->size--; + + /* Is there nothing left? */ + + if (b->size == 0) { + TAILQ_REMOVE(&n->children, nn, entries); + lowdown_node_free(nn); + } + + rc = 1; +out: + hbuf_free(attrid); + hbuf_free(attrcls); + return rc; +} + +/* + * '[': parsing a link, footnote, metadata, or image. + */ +static ssize_t +char_link(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + struct lowdown_buf *content = NULL, *link = NULL, + *title = NULL, *u_link = NULL, + *dims = NULL, *idp = NULL, + *linkp = NULL, *titlep = NULL, + *attrcls = NULL, *attrid = NULL, + *attrwidth = NULL, *attrheight = NULL; + size_t i = 1, j, txt_e, link_b = 0, link_e = 0, + title_b = 0, title_e = 0, nb_p, + dims_b = 0, dims_e = 0; + int ret = 0, in_title = 0, qtype = 0, + is_img, is_footnote, is_metadata; + struct lowdown_buf id; + struct link_ref *lr = NULL; + struct foot_ref *fr; + struct lowdown_node *n; + struct lowdown_meta *m; + + is_img = offset && data[-1] == '!' && + !is_escaped(data - offset, offset - 1); + is_footnote = (doc->ext_flags & LOWDOWN_FOOTNOTES) && + data[1] == '^'; + is_metadata = (doc->ext_flags & LOWDOWN_METADATA) && + data[1] == '%'; + + /* Looking for the matching closing bracket. */ + + i += find_emph_char(data + i, size - i, ']'); + txt_e = i; + + if (i < size && data[i] == ']') + i++; + else + goto cleanup; + + /* + * If we start as an image then change into metadata or a + * footnote, make sure to emit the exclamation mark. + */ + + if (is_img && (is_footnote || is_metadata)) { + n = pushnode(doc, LOWDOWN_NORMAL_TEXT); + if (n == NULL) + goto err; + if (!hbuf_push(&n->rndr_normal_text.text, &data[-1], 1)) + goto err; + popnode(doc, n); + } + + /* + * Footnote (in footer): look up footnote by its key in our + * queue of footnotes. This queue was created in the first pass + * of the compiler. If we've already listed the footnote, don't + * render it twice. Don't allow embedded footnotes as well. + */ + + if (is_footnote) { + memset(&id, 0, sizeof(struct lowdown_buf)); + if (txt_e < 3) + goto cleanup; + id.data = data + 2; + id.size = txt_e - 2; + + TAILQ_FOREACH(fr, &doc->footq, entries) + if (hbuf_eq(&fr->name, &id)) + break; + + /* Override. */ + + if (doc->in_footnote) + fr = NULL; + + /* + * Mark footnote used. If it's NULL, then there was no + * footnote found. If it is NULL and the reference is + * defined, then we've already registered the footnote. + * XXX: Markdown, as is, can only use one footnote + * reference per definition. This is stupid. + */ + + if (fr != NULL && fr->ref == NULL) { + n = pushnode(doc, LOWDOWN_FOOTNOTE); + if (n == NULL) + goto err; + fr->num = ++doc->foots; + fr->ref = n; + assert(doc->in_footnote == 0); + doc->in_footnote = 1; + if (!parse_block(doc, + fr->contents.data, fr->contents.size)) + goto err; + assert(doc->in_footnote); + doc->in_footnote = 0; + } else { + n = pushnode(doc, LOWDOWN_NORMAL_TEXT); + if (n == NULL) + goto err; + if (!hbuf_push(&n->rndr_normal_text.text, + data, txt_e + 1)) + goto err; + } + + popnode(doc, n); + ret = 1; + goto cleanup; + } + + /* + * Metadata: simply copy the variable (if found) into our + * stream. It's raw text, so we need to pass it into our + * "normal text" formatter. + */ + + if (is_metadata) { + memset(&id, 0, sizeof(struct lowdown_buf)); + if (txt_e < 3) + goto cleanup; + id.data = data + 2; + id.size = txt_e - 2; + + /* FIXME: slow O(n). */ + + TAILQ_FOREACH(m, doc->metaq, entries) { + if (!hbuf_streq(&id, m->key)) + continue; + assert(m->value != NULL); + n = pushnode(doc, LOWDOWN_NORMAL_TEXT); + if (n == NULL) + goto err; + if (!hbuf_push(&n->rndr_normal_text.text, + m->value, strlen(m->value))) + goto err; + popnode(doc, n); + break; + } + + ret = 1; + goto cleanup; + } + + /* + * Skip any amount of spacing. (This is much more laxist than + * original markdown syntax.) + */ + + while (i < size && xisspace(data[i])) + i++; + + /* Different style of links (regular, reference, shortcut. */ + + if (i < size && data[i] == '(') { + i++; + while (i < size && xisspace(data[i])) + i++; + + link_b = i; + + /* + * Looking for link end: ' " ) + * Count the number of open parenthesis. + */ + + nb_p = 0; + + while (i < size) { + if (data[i] == '\\') { + i += 2; + } else if (data[i] == '(' && i != 0) { + nb_p++; + i++; + } else if (data[i] == ')') { + if (nb_p == 0) + break; + else + nb_p--; + i++; + } else if (i >= 1 && xisspace(data[i-1]) && + (data[i] == '\'' || + data[i] == '=' || + data[i] == '"')) + break; + else + i++; + } + + if (i >= size) + goto cleanup; + + link_e = i; + + /* + * We might be at the end of the link, or we might be at + * the title of the link. + * In the latter case, progress til link-end. + */ +again: + if (data[i] == '\'' || data[i] == '"') { + /* + * Looking for title end if present. + * This is a quoted part after the image. + */ + + qtype = data[i]; + in_title = 1; + i++; + title_b = i; + + for ( ; i < size; i++) + if (data[i] == '\\') + i++; + else if (data[i] == qtype) + in_title = 0; + else if ((data[i] == '=') && !in_title) + break; + else if ((data[i] == ')') && !in_title) + break; + + if (i >= size) + goto cleanup; + + /* Skipping spacing after title. */ + + title_e = i - 1; + while (title_e > title_b && + xisspace(data[title_e])) + title_e--; + + /* Checking for closing quote presence. */ + + if (data[title_e] != '\'' && + data[title_e] != '"') { + title_b = title_e = 0; + link_e = i; + } + + /* + * If we're followed by a dimension string, then + * jump back into the parsing engine for it. + */ + + if (data[i] == '=') + goto again; + } else if (data[i] == '=') { + dims_b = ++i; + for ( ; i < size; i++) + if (data[i] == '\\') + i++; + else if ('\'' == data[i] || '"' == data[i]) + break; + else if (data[i] == ')') + break; + + if (i >= size) + goto cleanup; + + /* Skipping spacing after dimensions. */ + + dims_e = i; + while (dims_e > dims_b && + xisspace(data[dims_e])) + dims_e--; + + /* + * If we're followed by a title string, then + * jump back into the parsing engine for it. + */ + + if (data[i] == '"' || data[i] == '\'') + goto again; + } + + /* Remove spacing at the end of the link. */ + + while (link_e > link_b && xisspace(data[link_e - 1])) + link_e--; + + /* Remove optional angle brackets around the link. */ + + if (data[link_b] == '<' && data[link_e - 1] == '>') { + link_b++; + link_e--; + } + + /* building escaped link and title */ + if (link_e > link_b) { + link = linkp = hbuf_new(64); + if (linkp == NULL) + goto err; + if (!hbuf_put(link, + data + link_b, link_e - link_b)) + goto err; + } + + if (title_e > title_b) { + title = titlep = hbuf_new(64); + if (titlep == NULL) + goto err; + if (!hbuf_put(title, + data + title_b, title_e - title_b)) + goto err; + } + + if (dims_e > dims_b) { + if ((dims = hbuf_new(64)) == NULL) + goto err; + if (!hbuf_put(dims, + data + dims_b, dims_e - dims_b)) + goto err; + } + + i++; + } else if (i < size && data[i] == '[') { + if ((idp = hbuf_new(64)) == NULL) + goto err; + + /* Looking for the id. */ + + i++; + link_b = i; + while (i < size && data[i] != ']') + i++; + if (i >= size) + goto cleanup; + link_e = i; + + /* Finding the link_ref. */ + + if (link_b == link_e) { + if (!replace_spacing + (idp, data + 1, txt_e - 1)) + goto err; + } else + if (!hbuf_put(idp, + data + link_b, link_e - link_b)) + goto err; + + lr = find_link_ref(&doc->refq, idp->data, idp->size); + if (lr == NULL) + goto cleanup; + + /* Keeping link and title from link_ref. */ + + link = lr->link; + title = lr->title; + if (lr->attrs != NULL && parse_ext_attrs + (lr->attrs->data, lr->attrs->size, + &attrid, &attrcls, &attrwidth, &attrheight) == 0) + goto err; + i++; + } else { + /* + * Shortcut reference style link. + */ + if ((idp = hbuf_new(64)) == NULL) + goto err; + + /* Crafting the id. */ + + if (!replace_spacing(idp, data + 1, txt_e - 1)) + goto err; + + /* Finding the link_ref. */ + + lr = find_link_ref(&doc->refq, idp->data, idp->size); + if (lr == NULL) + goto cleanup; + + /* Keeping link and title from link_ref. */ + + link = lr->link; + title = lr->title; + if (lr->attrs != NULL && parse_ext_attrs + (lr->attrs->data, lr->attrs->size, + &attrid, &attrcls, &attrwidth, &attrheight) == 0) + goto err; + + /* Rewinding the spacing. */ + + i = txt_e + 1; + } + + /* PHP markdown extra attributes (if not ref link). */ + + if ((doc->ext_flags & LOWDOWN_ATTRS) && lr == NULL && + i + 2 < size && data[i] == '{') { + i++; + + /* Find trailing marker. */ + + for (j = i; j < size && data[j] != '}'; j++) + continue; + j = parse_ext_attrs(&data[i], j - i, + &attrid, &attrcls, &attrwidth, &attrheight); + if (j == 0) + goto err; + i += j; + if (i < size && data[i] == '}') + i++; + } + + n = pushnode(doc, is_img ? LOWDOWN_IMAGE : LOWDOWN_LINK); + if (n == NULL) + goto err; + + /* + * Building content: img alt is kept, only link content is + * parsed. + */ + + if (txt_e > 1) { + if ( ! is_img) { + /* + * Disable autolinking when parsing inline the + * content of a link. + */ + doc->in_link_body = 1; + if (!parse_inline(doc, data + 1, txt_e - 1)) + goto err; + doc->in_link_body = 0; + } else { + if ((content = hbuf_new(64)) == NULL) + goto err; + if (!hbuf_put(content, data + 1, txt_e - 1)) + goto err; + } + } + + if (link) { + if ((u_link = hbuf_new(64)) == NULL) + goto err; + if (!unscape_text(u_link, link)) + goto err; + } + + /* Calling the relevant rendering function. */ + + if (is_img) { + if (u_link != NULL && + !hbuf_createb(&n->rndr_image.link, u_link)) + goto err; + if (title != NULL && + !hbuf_createb(&n->rndr_image.title, title)) + goto err; + if (dims != NULL && + !hbuf_createb(&n->rndr_image.dims, dims)) + goto err; + if (content != NULL && + !hbuf_createb(&n->rndr_image.alt, content)) + goto err; + if (attrcls != NULL && + !hbuf_createb(&n->rndr_image.attr_cls, attrcls)) + goto err; + if (attrid != NULL && + !hbuf_createb(&n->rndr_image.attr_id, attrid)) + goto err; + if (attrwidth != NULL && + !hbuf_createb(&n->rndr_image.attr_width, attrwidth)) + goto err; + if (attrheight != NULL && + !hbuf_createb(&n->rndr_image.attr_height, attrheight)) + goto err; + ret = 1; + } else { + if (u_link != NULL && + !hbuf_createb(&n->rndr_link.link, u_link)) + goto err; + if (title != NULL && + !hbuf_createb(&n->rndr_link.title, title)) + goto err; + if (attrcls != NULL && + !hbuf_createb(&n->rndr_link.attr_cls, attrcls)) + goto err; + if (attrid != NULL && + !hbuf_createb(&n->rndr_link.attr_id, attrid)) + goto err; + ret = 1; + } + + popnode(doc, n); + goto cleanup; +err: + ret = -1; +cleanup: + hbuf_free(attrid); + hbuf_free(attrcls); + hbuf_free(attrheight); + hbuf_free(attrwidth); + hbuf_free(linkp); + hbuf_free(titlep); + hbuf_free(dims); + hbuf_free(idp); + hbuf_free(content); + hbuf_free(u_link); + return ret > 0 ? (ssize_t)i : ret; +} + +static ssize_t +char_superscript(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + size_t sup_start, sup_len; + struct lowdown_node *n; + + if (size < 2) + return 0; + + if (data[1] == '(') { + sup_start = 2; + sup_len = find_emph_char(data + 2, size - 2, ')') + 2; + if (sup_len == size) + return 0; + } else { + sup_start = sup_len = 1; + while (sup_len < size && !xisspace(data[sup_len])) + sup_len++; + } + + if (sup_len - sup_start == 0) + return (sup_start == 2) ? 3 : 0; + + if ((n = pushnode(doc, LOWDOWN_SUPERSCRIPT)) == NULL) + return -1; + if (!parse_inline(doc, data + sup_start, sup_len - sup_start)) + return -1; + popnode(doc, n); + return (sup_start == 2) ? sup_len + 1 : sup_len; +} + +static ssize_t +char_math(struct lowdown_doc *doc, + char *data, size_t offset, size_t size) +{ + + return size > 1 && data[1] == '$' ? + parse_math(doc, data, offset, size, "$$", 2, 1) : + parse_math(doc, data, offset, size, "$", 1, 0); +} + +/* + * Returns the line length when it is empty, 0 otherwise. + */ +static size_t +is_empty(const char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size && data[i] != '\n'; i++) + if (data[i] != ' ') + return 0; + + return i + 1; +} + +/* + * Returns whether a line is a horizontal rule. + */ +static int +is_hrule(const char *data, size_t size) +{ + size_t i = 0, n = 0; + char c; + + /* Skipping initial spaces. */ + + if (size < 3) + return 0; + + i = countspaces(data, 0, size, 3); + + /* Looking at the hrule char. */ + + if (i + 2 >= size || + (data[i] != '*' && data[i] != '-' && data[i] != '_')) + return 0; + + c = data[i]; + + /* The whole line must be the char or space. */ + + while (i < size && data[i] != '\n') { + if (data[i] == c) + n++; + else if (data[i] != ' ') + return 0; + i++; + } + + return n >= 3; +} + +/* + * Check if a line is a code fence; return the end of the code fence. + * If passed, width of the fence rule and character will be returned. + */ +static size_t +is_codefence(const char *data, size_t size, size_t *width, char *chr) +{ + size_t i = 0, n = 1; + char c; + + /* Skipping initial spaces. */ + + if (size < 3) + return 0; + + i = countspaces(data, 0, size, 3); + + /* Looking at the hrule char. */ + + c = data[i]; + + if (i + 2 >= size || !(c == '~' || c == '`')) + return 0; + + /* The fence must be that same character. */ + + while (++i < size && data[i] == c) + ++n; + + if (n < 3) + return 0; + + if (width) + *width = n; + if (chr) + *chr = c; + + return i; +} + +/* + * Expects single line, checks if it's a codefence and extracts + * language. + * Return zero if not a code-fence, >0 offset otherwise. + */ +static size_t +parse_codefence(char *data, size_t size, + struct lowdown_buf *lang, size_t *width, char *chr) +{ + size_t i, w, lang_start; + + i = w = is_codefence(data, size, width, chr); + + if (i == 0) + return 0; + + while (i < size && xisspace(data[i])) + i++; + + lang_start = i; + + while (i < size && !xisspace(data[i])) + i++; + + lang->data = data + lang_start; + lang->size = i - lang_start; + + /* Avoid parsing a codespan as a fence */ + + i = lang_start + 2; + + while (i < size && + !(data[i] == *chr && + data[i-1] == *chr && + data[i-2] == *chr)) + i++; + + return i < size ? 0 : w; +} + +/* + * Returns whether the line is a hash-prefixed header. + * Return zero if not an at-header, non-zero otherwise. + */ +static int +is_atxheader(const struct lowdown_doc *doc, + const char *data, size_t size) +{ + size_t level; + + if (data[0] != '#') + return 0; + + /* + * CommonMark requires a space. + * Classical Markdown does not. + */ + + if (doc->ext_flags & LOWDOWN_COMMONMARK) { + level = 0; + while (level < size && level < 6 && data[level] == '#') + level++; + if (level < size && data[level] != ' ') + return 0; + } + + return 1; +} + +/* + * Tests for level 1 setext-style header ("=") or level 2 ("-"). + * Returns zero if it's not, non-zero otherwise. + */ +static int +is_headerline(const char *data, size_t size) +{ + size_t i; + char hchr; + int level; + + if ('=' == *data || '-' == *data) { + level = '=' == *data ? 1 : 2; + hchr = *data; + } else + return 0; + + for (i = 1; i < size && data[i] == hchr; i++) + continue; + i = countspaces(data, i, size, 0); + + return (i >= size || data[i] == '\n') ? level : 0; +} + +static int +is_next_headerline(const char *data, size_t size) +{ + size_t i = 0; + + while (i < size && data[i] != '\n') + i++; + + if (++i >= size) + return 0; + return is_headerline(data + i, size - i); +} + +/* + * Returns unordered list item prefix. + * This does nothing if LOWDOWN_DEFLIST is not set. + */ +static size_t +prefix_dli(const struct lowdown_doc *doc, const char *data, size_t size) +{ + size_t i; + + if (!(doc->ext_flags & LOWDOWN_DEFLIST)) + return 0; + + i = countspaces(data, 0, size, 3); + + if (i + 1 >= size || data[i] != ':' || data[i + 1] != ' ') + return 0; + if (is_next_headerline(data + i, size - i)) + return 0; + + return i + 2; +} + +/* + * Returns blockquote prefix length. + */ +static size_t +prefix_quote(const char *data, size_t size) +{ + size_t i; + + i = countspaces(data, 0, size, 3); + + if (i < size && data[i] == '>') + return countspaces(data, i + 1, size, 1); + + return 0; +} + +/* + * Returns prefix length for block code. + */ +static size_t +prefix_code(const char *data, size_t size) +{ + + if (countspaces(data, 0, size, 4) == 4) + return 4; + + return 0; +} + +/* + * Returns ordered list item prefix. + * On success (return value >0) and if "value" is not NULL *and* we're + * also commonmark processing, copy and NUL-terminate the value into it. + * If all of those except for commonmark, simply NUL-terminate the + * string. + */ +static size_t +prefix_oli(const struct lowdown_doc *doc, + const char *data, size_t size, char *value) +{ + size_t i, st, vsize; + const char *vdata; + + i = countspaces(data, 0, size, 3); + + if (i >= size || !isdigit((unsigned char)data[i])) + return 0; + + st = i; + vdata = &data[i]; + + while (i < size && isdigit((unsigned char)data[i])) + i++; + + /* Commonmark limits us to nine characters. */ + + vsize = i - st; + if ((doc->ext_flags & LOWDOWN_COMMONMARK) && vsize > 9) + return 0; + + /* + * Commonmark accepts ')' and '.' following the numeric prefix, + * while regular markdown only has '.'. + */ + + if (doc->ext_flags & LOWDOWN_COMMONMARK) { + if (i + 1 >= size || + (data[i] != '.' && data[i] != ')') || + data[i + 1] != ' ') + return 0; + } else if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') + return 0; + + if (is_next_headerline(data + i, size - i)) + return 0; + + if (value != NULL) { + if (doc->ext_flags & LOWDOWN_COMMONMARK) { + assert(vsize > 0); + assert(vsize < 10); + memcpy(value, vdata, vsize); + value[vsize] = '\0'; + } else + value[0] = '\0'; + } + + return i + 2; +} + +/* + * Returns unordered list item prefix, including a GFM checkbox. The + * "checked" pointer, if not NULL, is set to whether the check is set + * (>0), unset (=0), or not there (<0). + */ +static size_t +prefix_uli(const struct lowdown_doc *doc, + const char *data, size_t size, int *checked) +{ + size_t i; + + if (checked != NULL) + *checked = -1; + + i = countspaces(data, 0, size, 3); + + if (i + 1 >= size || + (data[i] != '*' && data[i] != '+' && + data[i] != '-') || + data[i + 1] != ' ') + return 0; + + if (is_next_headerline(data + i, size - i)) + return 0; + + if (!(doc->ext_flags & LOWDOWN_TASKLIST) || i + 5 >= size) + return i + 2; + + if (data[i + 2] == '[' && + (data[i + 3] == ' ' || + data[i + 3] == 'x' || + data[i + 3] == 'X') && + data[i + 4] == ']' && + data[i + 5] == ' ') { + if (checked != NULL) + *checked = data[i + 3] != ' '; + return i + 6; + } + + return i + 2; +} + +/* + * Handles parsing of a blockquote fragment. + * Return <0 on failure, otherwise the end offset. + */ +static ssize_t +parse_blockquote(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t beg = 0, end = 0, pre, work_size = 0; + char *work_data = NULL; + struct lowdown_node *n; + + while (beg < size) { + for (end = beg + 1; + end < size && data[end - 1] != '\n'; + end++) + continue; + + pre = prefix_quote(data + beg, end - beg); + + /* Skip prefix or empty line followed by non-quote. */ + + if (pre) + beg += pre; + else if (is_empty(data + beg, end - beg) && + (end >= size || + (prefix_quote(data + end, size - end) == 0 && + !is_empty(data + end, size - end)))) + break; + + if (beg < end) { + if (!work_data) + work_data = data + beg; + else if (data + beg != work_data + work_size) + memmove(work_data + work_size, + data + beg, end - beg); + work_size += end - beg; + } + beg = end; + } + + n = pushnode(doc, LOWDOWN_BLOCKQUOTE); + if (n == NULL) + return -1; + if (!parse_block(doc, work_data, work_size)) + return -1; + popnode(doc, n); + return end; +} + +/* + * Handles parsing of a regular paragraph, which terminates at sections + * or blank lines. + * Returns <0 on failure or the number of characters parsed from the + * paragraph input. + */ +static ssize_t +parse_paragraph(struct lowdown_doc *doc, char *data, size_t size) +{ + struct lowdown_buf work; + struct lowdown_node *n; + size_t i = 0, end = 0, beg, lines = 0; + int level = 0, beoln = 0; + + memset(&work, 0, sizeof(struct lowdown_buf)); + work.data = data; + + while (i < size) { + /* Parse ahead to the next newline. */ + + for (end = i + 1; + end < size && data[end - 1] != '\n'; end++) + continue; + + /* + * Empty line: end of paragraph. + * However, check if we have a dli prefix following + * that, which means that we're a block-mode dli. + */ + + if (is_empty(data + i, size - i)) { + beoln = 1; + break; + } + + /* Header line: end of paragraph. */ + + if ((level = is_headerline(data + i, size - i)) != 0) + break; + + /* Other ways of ending a paragraph. */ + + if (is_atxheader(doc, data + i, size - i) || + is_hrule(data + i, size - i) || + (lines == 1 && + prefix_dli(doc, data + i, size - i)) || + prefix_quote(data + i, size - i)) { + end = i; + break; + } + + lines++; + i = end; + } + + work.size = i; + + while (work.size && data[work.size - 1] == '\n') + work.size--; + + /* + * The paragraph isn't ending on a header line. + * So it's a regular paragraph. + */ + + if (!level) { + n = pushnode(doc, LOWDOWN_PARAGRAPH); + if (n == NULL) + return -1; + n->rndr_paragraph.lines = lines; + n->rndr_paragraph.beoln = beoln; + if (!parse_inline(doc, work.data, work.size)) + return -1; + popnode(doc, n); + return end; + } + + /* Paragraph material prior to header break. */ + + if (work.size) { + i = work.size; + work.size -= 1; + while (work.size && data[work.size] != '\n') + work.size -= 1; + beg = work.size + 1; + while (work.size && data[work.size - 1] == '\n') + work.size -= 1; + + if (work.size > 0) { + n = pushnode(doc, LOWDOWN_PARAGRAPH); + if (n == NULL) + return -1; + n->rndr_paragraph.lines = lines - 1; + n->rndr_paragraph.beoln = beoln; + if (!parse_inline(doc, work.data, work.size)) + return -1; + popnode(doc, n); + work.data += beg; + work.size = i - beg; + } else + work.size = i; + } + + /* Definition data parts. */ + + if ((n = pushnode(doc, LOWDOWN_HEADER)) == NULL) + return -1; + assert(level > 0); + n->rndr_header.level = level - 1; + if (!parse_inline(doc, work.data, work.size)) + return -1; + popnode(doc, n); + + if ((doc->ext_flags & LOWDOWN_ATTRS) && + !parse_header_ext_attrs(n)) + return -1; + + return end; +} + +/* + * Handles parsing of a block-level code fragment. + * Return <0 on failure, 0 if not a fragment, >0 on success. + */ +static ssize_t +parse_fencedcode(struct lowdown_doc *doc, char *data, size_t size) +{ + struct lowdown_buf text, lang; + size_t i = 0, text_start, line_start, + w, w2, width, width2; + char chr, chr2; + struct lowdown_node *n; + + memset(&text, 0, sizeof(struct lowdown_buf)); + memset(&lang, 0, sizeof(struct lowdown_buf)); + + /* Parse codefence line. */ + + while (i < size && data[i] != '\n') + i++; + if ((w = parse_codefence(data, i, &lang, &width, &chr)) == 0) + return 0; + + /* Search for end. */ + + i++; + text_start = i; + while ((line_start = i) < size) { + while (i < size && data[i] != '\n') + i++; + w2 = is_codefence(data + line_start, + i - line_start, &width2, &chr2); + if (w == w2 && + width == width2 && + chr == chr2 && + is_empty(data + + (line_start+w), i - (line_start+w))) + break; + i++; + } + + text.data = data + text_start; + text.size = line_start - text_start; + + if ((n = pushnode(doc, LOWDOWN_BLOCKCODE)) == NULL) + return -1; + + if (!hbuf_create(&n->rndr_blockcode.text, + data + text_start, line_start - text_start)) + return -1; + if (!hbuf_createb(&n->rndr_blockcode.lang, &lang)) + return -1; + popnode(doc, n); + return i; +} + +static ssize_t +parse_blockcode(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t beg = 0, end, pre; + struct lowdown_buf *work = NULL; + struct lowdown_node *n; + + if ((work = hbuf_new(256)) == NULL) + goto err; + + while (beg < size) { + for (end = beg + 1; + end < size && data[end - 1] != '\n'; + end++) + continue; + + pre = prefix_code(data + beg, end - beg); + + /* + * Skip prefix or non-empty non-prefixed line breaking + * the pre. + */ + + if (pre) + beg += pre; + else if (!is_empty(data + beg, end - beg)) + break; + + /* + * Verbatim copy to the working buffer, escaping + * entities. + */ + + if (beg < end) { + if (is_empty(data + beg, end - beg)) { + if (!hbuf_putc(work, '\n')) + goto err; + } else { + if (!hbuf_put(work, + data + beg, end - beg)) + goto err; + } + } + beg = end; + } + + while (work->size && work->data[work->size - 1] == '\n') + work->size -= 1; + + if (!hbuf_putc(work, '\n')) + goto err; + + if ((n = pushnode(doc, LOWDOWN_BLOCKCODE)) == NULL) + goto err; + if (!hbuf_createb(&n->rndr_blockcode.text, work)) + goto err; + popnode(doc, n); + hbuf_free(work); + return beg; +err: + hbuf_free(work); + return -1; +} + +/* + * Parsing of a single list item assuming initial prefix is already + * removed. + */ +static ssize_t +parse_listitem(struct lowdown_buf *ob, struct lowdown_doc *doc, + char *data, size_t size, enum hlist_fl *flags, size_t num) +{ + struct lowdown_buf *work = NULL; + size_t beg = 0, end, pre, sublist = 0, + orgpre, i, has_next_uli = 0, dli_lines, + has_next_oli = 0, has_next_dli = 0; + int in_empty = 0, has_inside_empty = 0, + in_fence = 0, ff, checked = -1; + struct lowdown_node *n; + + /* Keeping track of the first indentation prefix. */ + + orgpre = countspaces(data, 0, size, 3); + + beg = prefix_uli(doc, data, size, &checked); + + if (!beg) + beg = prefix_oli(doc, data, size, NULL); + if (!beg) + beg = prefix_dli(doc, data, size); + if (!beg) + return 0; + + /* Skipping to the beginning of the following line. */ + + end = beg; + while (end < size && data[end - 1] != '\n') + end++; + + /* Getting working buffers. */ + + if ((work = hbuf_new(64)) == NULL) + goto err; + + /* Putting the first line into the working buffer. */ + + if (!hbuf_put(work, data + beg, end - beg)) + goto err; + beg = end; + dli_lines = 1; + + /* + * Process the following lines. + * Use the "dli_lines" variable to see if we should consider an + * opening dli prefix to be a valid dli token. + */ + + while (beg < size) { + has_next_uli = has_next_oli = has_next_dli = 0; + end++; + + while (end < size && data[end - 1] != '\n') + end++; + + /* Process an empty line. */ + + if (is_empty(data + beg, end - beg)) { + in_empty = 1; + beg = end; + dli_lines = 0; + continue; + } + + dli_lines++; + + /* Calculating the indentation. */ + + pre = i = countspaces(data, beg, end, 4) - beg; + + if (doc->ext_flags & LOWDOWN_FENCED) + if (is_codefence(data + beg + i, + end - beg - i, NULL, NULL)) + in_fence = !in_fence; + + /* + * Only check for new list items if we are **not** + * inside a fenced code block. + * We only allow dli if we've had a single line of + * content beforehand. + */ + + if (!in_fence) { + has_next_uli = prefix_uli(doc, + data + beg + i, end - beg - i, NULL); + has_next_dli = dli_lines <= 2 && prefix_dli + (doc, data + beg + i, end - beg - i); + has_next_oli = prefix_oli + (doc, data + beg + i, end - beg - i, NULL); + if (has_next_uli || has_next_dli || has_next_oli) + dli_lines = 0; + } + + /* Checking for a new item. */ + + if ((has_next_uli && + !is_hrule(data + beg + i, end - beg - i)) || + has_next_oli || has_next_dli) { + if (in_empty) + has_inside_empty = 1; + + /* + * The following item must have the same (or + * less) indentation. + */ + + if (pre <= orgpre) { + /* + * If the following item has different + * list type, we end this list. + */ + + ff = *flags & HLIST_FL_MASK; + assert(ff == HLIST_FL_ORDERED || + ff == HLIST_FL_UNORDERED || + ff == HLIST_FL_DEF); + + if (in_empty && + (((ff == HLIST_FL_ORDERED) && + (has_next_uli || has_next_dli)) || + ((ff == HLIST_FL_UNORDERED) && + (has_next_oli || has_next_dli)) || + ((ff == HLIST_FL_DEF) && + (has_next_oli || has_next_uli)))) { + *flags |= HLIST_LI_END; + } + + break; + } + + if (!sublist) + sublist = work->size; + } else if (in_empty && pre == 0) { + /* + * Joining only indented stuff after empty + * lines; note that now we only require 1 space + * of indentation to continue a list. + */ + + *flags |= HLIST_LI_END; + break; + } + + if (in_empty) { + if (!hbuf_putc(work, '\n')) + goto err; + has_inside_empty = 1; + in_empty = 0; + } + + /* + * Adding the line without prefix into the working + * buffer. + */ + + if (!hbuf_put(work, data + beg + i, end - beg - i)) + goto err; + beg = end; + } + + /* Render of li contents. */ + + if (has_inside_empty) + *flags |= HLIST_FL_BLOCK; + + if ((n = pushnode(doc, LOWDOWN_LISTITEM)) == NULL) + goto err; + n->rndr_listitem.flags = *flags; + n->rndr_listitem.num = num; + + if (checked > 0) + n->rndr_listitem.flags |= HLIST_FL_CHECKED; + else if (checked == 0) + n->rndr_listitem.flags |= HLIST_FL_UNCHECKED; + + if (*flags & HLIST_FL_BLOCK) { + /* Intermediate render of block li. */ + + if (sublist && sublist < work->size) { + if (!parse_block(doc, + work->data, sublist)) + goto err; + if (!parse_block(doc, + work->data + sublist, + work->size - sublist)) + goto err; + } else { + if (!parse_block(doc, + work->data, work->size)) + goto err; + } + } else { + /* Intermediate render of inline li. */ + + if (sublist && sublist < work->size) { + if (!parse_inline(doc, + work->data, sublist)) + goto err; + if (!parse_block(doc, + work->data + sublist, + work->size - sublist)) + goto err; + } else { + if (!parse_inline(doc, + work->data, work->size)) + goto err; + } + } + + popnode(doc, n); + hbuf_free(work); + return beg; +err: + hbuf_free(work); + return -1; +} + +/* + * Parse definition list. + * This must follow a single-line paragraph, which it integrates as the + * title of the list. + * (The paragraph can contain arbitrary styling.) + */ +static ssize_t +parse_definition(struct lowdown_doc *doc, char *data, size_t size) +{ + struct lowdown_buf *work = NULL; + size_t i = 0, k = 1; + ssize_t ret; + enum hlist_fl flags = HLIST_FL_DEF; + struct lowdown_node *n, *nn, *cur, *prev; + + if ((work = hbuf_new(256)) == NULL) + goto err; + + /* Record whether we want to start in block mode. */ + + cur = TAILQ_LAST(&doc->current->children, lowdown_nodeq); + if (cur->rndr_paragraph.beoln) + flags |= HLIST_FL_BLOCK; + + /* Do we need to merge into a previous definition list? */ + + prev = TAILQ_PREV(cur, lowdown_nodeq, entries); + + if (prev != NULL && prev->type == LOWDOWN_DEFINITION) { + n = doc->current = prev; + flags |= n->rndr_definition.flags; + doc->depth++; + } else { + n = pushnode(doc, LOWDOWN_DEFINITION); + if (n == NULL) + goto err; + n->rndr_definition.flags = flags; + } + + TAILQ_REMOVE(&cur->parent->children, cur, entries); + TAILQ_INSERT_TAIL(&n->children, cur, entries); + cur->type = LOWDOWN_DEFINITION_TITLE; + cur->parent = n; + + while (i < size) { + nn = pushnode(doc, LOWDOWN_DEFINITION_DATA); + if (nn == NULL) + goto err; + ret = parse_listitem(work, doc, + data + i, size - i, &flags, k++); + if (ret < 0) + goto err; + i += ret; + popnode(doc, nn); + if (ret == 0 || (flags & HLIST_LI_END)) + break; + } + + if (flags & HLIST_FL_BLOCK) + n->rndr_definition.flags |= HLIST_FL_BLOCK; + + popnode(doc, n); + hbuf_free(work); + return i; +err: + hbuf_free(work); + return -1; +} + +/* + * Parsing ordered or unordered list block. + * If "oli_data" is not NULL, it's the numeric string prefix of the + * ordered entry. It's either zero-length or well-formed. + */ +static ssize_t +parse_list(struct lowdown_doc *doc, + char *data, size_t size, const char *oli_data) +{ + struct lowdown_buf *work = NULL; + size_t i = 0, pos; + ssize_t ret; + enum hlist_fl flags; + struct lowdown_node *n; + + flags = oli_data != NULL ? + HLIST_FL_ORDERED : HLIST_FL_UNORDERED; + if ((work = hbuf_new(256)) == NULL) + goto err; + if ((n = pushnode(doc, LOWDOWN_LIST)) == NULL) + goto err; + n->rndr_list.start = 1; + n->rndr_list.flags = flags; + + if (oli_data != NULL && oli_data[0] != '\0') { + n->rndr_list.start = strtonum + (oli_data, 0, UINT32_MAX, NULL); + if (n->rndr_list.start == 0) + n->rndr_list.start = 1; + } + + pos = n->rndr_list.start; + while (i < size) { + ret = parse_listitem(work, doc, + data + i, size - i, &flags, pos++); + if (ret < 0) + goto err; + i += ret; + if (ret == 0 || (flags & HLIST_LI_END)) + break; + } + + if (flags & HLIST_FL_BLOCK) + n->rndr_list.flags |= HLIST_FL_BLOCK; + + popnode(doc, n); + hbuf_free(work); + return i; +err: + hbuf_free(work); + return -1; +} + +/* + * Parsing of atx-style headers. + */ +static ssize_t +parse_atxheader(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t level = 0, i, end, skip; + struct lowdown_node *n; + + while (level < size && level < 6 && data[level] == '#') + level++; + + i = countspaces(data, level, size, 0); + + for (end = i; end < size && data[end] != '\n'; end++) + continue; + + skip = end; + + while (end && data[end - 1] == '#') + end--; + + while (end && data[end - 1] == ' ') + end--; + + if (end > i) { + if ((n = pushnode(doc, LOWDOWN_HEADER)) == NULL) + return -1; + assert(level > 0); + n->rndr_header.level = level - 1; + if (!parse_inline(doc, data + i, end - i)) + return -1; + popnode(doc, n); + if ((doc->ext_flags & LOWDOWN_ATTRS) && + !parse_header_ext_attrs(n)) + return -1; + } + + return skip; +} + +/* + * Check for end of HTML block : </tag>( *)\n + * Returns tag length on match, 0 otherwise. + * Assumes data starts with "<". + */ +static size_t +htmlblock_is_end(const char *tag, size_t tag_len, + struct lowdown_doc *doc, const char *data, size_t size) +{ + size_t i = tag_len + 3, w; + + /* + * Try to match the end tag + * Note: we're not considering tags like "</tag >" which are + * still valid. + */ + + if (i > size || + data[1] != '/' || + strncasecmp(data + 2, tag, tag_len) != 0 || + data[tag_len + 2] != '>') + return 0; + + /* Rest of the line must be empty. */ + + if ((w = is_empty(data + i, size - i)) == 0 && i < size) + return 0; + + return i + w; +} + +/* + * Try to find HTML block ending tag. + * Returns the length on match, 0 otherwise. + */ +static size_t +htmlblock_find_end(const char *tag, size_t tag_len, + struct lowdown_doc *doc, const char *data, size_t size) +{ + size_t i, w = 0; + + for (i = 0; ; i++) { + while (i < size && data[i] != '<') + i++; + if (i >= size) + return 0; + w = htmlblock_is_end(tag, + tag_len, doc, data + i, size - i); + if (w) + break; + } + + return i + w; +} + +/* + * Try to find end of HTML block in strict mode (it must be an + * unindented line, and have a blank line afterwards). + * Returns the length on match, 0 otherwise. + */ +static size_t +htmlblock_find_end_strict(const char *tag, size_t tag_len, + struct lowdown_doc *doc, const char *data, size_t size) +{ + size_t i = 0, mark; + + while (1) { + mark = i; + while (i < size && data[i] != '\n') + i++; + if (i < size) + i++; + if (i == mark) + return 0; + + if (data[mark] == ' ' && mark > 0) + continue; + mark += htmlblock_find_end(tag, tag_len, + doc, data + mark, i - mark); + if (mark == i && + (is_empty(data + i, size - i) || i >= size)) + break; + } + + return i; +} + +/* + * Canonicalise a sequence of length "len" bytes in "str". + * This returns NULL if the sequence is not recognised, or a + * nil-terminated string of the sequence otherwise. + */ +static const char * +hhtml_find_block(const char *str, size_t len) +{ + size_t i; + static const char *tags[] = { + "address", + "article", + "aside", + "blockquote", + "del", + "details", + "dialog", + "dd", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "header", + "hgroup", + "iframe", + "ins", + "li", + "main", + "math", + "nav", + "noscript", + "ol", + "p", + "pre", + "section", + "script", + "style", + "table", + "ul", + NULL, + }; + + for (i = 0; tags[i] != NULL; i++) + if (strncasecmp(tags[i], str, len) == 0) + return tags[i]; + + return NULL; +} + +/* + * Parsing of inline HTML block. + * Return <0 on failure, >0 on success, 0 if not a block. + */ +static ssize_t +parse_htmlblock(struct lowdown_doc *doc, char *data, size_t size) +{ + struct lowdown_buf work; + size_t i, j = 0, tag_len, tag_end; + const char *curtag = NULL; + struct lowdown_node *n; + + memset(&work, 0, sizeof(struct lowdown_buf)); + + work.data = data; + + /* Identification of the opening tag. */ + + if (size < 2 || data[0] != '<') + return 0; + + i = 1; + while (i < size && data[i] != '>' && data[i] != ' ') + i++; + if (i < size) + curtag = hhtml_find_block(data + 1, i - 1); + + /* Handling of special cases. */ + + if (!curtag) { + /* HTML comment, laxist form. */ + + if (size > 5 && data[1] == '!' && + data[2] == '-' && data[3] == '-') { + i = 5; + while (i < size && !(data[i - 2] == '-' && + data[i - 1] == '-' && data[i] == '>')) + i++; + i++; + + if (i < size) + j = is_empty(data + i, size - i); + + if (j) { + n = pushnode(doc, LOWDOWN_BLOCKHTML); + if (n == NULL) + return -1; + work.size = i + j; + if (!hbuf_createb + (&n->rndr_blockhtml.text, &work)) + return -1; + popnode(doc, n); + return work.size; + } + } + + /* + * HR, which is the only self-closing block tag + * considered. + * FIXME: we should also do <br />. + */ + + if (size > 4 && + (data[1] == 'h' || data[1] == 'H') && + (data[2] == 'r' || data[2] == 'R')) { + i = 3; + while (i < size && data[i] != '>') + i++; + if (i + 1 < size) { + i++; + j = is_empty(data + i, size - i); + if (j) { + n = pushnode(doc, + LOWDOWN_BLOCKHTML); + if (n == NULL) + return -1; + work.size = i + j; + if (!hbuf_createb + (&n->rndr_blockhtml.text, + &work)) + return -1; + popnode(doc, n); + return work.size; + } + } + } + + /* No special case recognised. */ + + return 0; + } + + /* Looking for a matching closing tag in strict mode. */ + + tag_len = strlen(curtag); + tag_end = htmlblock_find_end_strict + (curtag, tag_len, doc, data, size); + + /* + * If not found, trying a second pass looking for indented match + * but not if tag is "ins" or "del" (following original + * Markdown.pl). + */ + + if (!tag_end && + strcmp(curtag, "ins") != 0 && + strcmp(curtag, "del") != 0) + tag_end = htmlblock_find_end(curtag, + tag_len, doc, data, size); + + if (!tag_end) + return 0; + + /* The end of the block has been found. */ + + n = pushnode(doc, LOWDOWN_BLOCKHTML); + if (n == NULL) + return -1; + + work.size = tag_end; + if (!hbuf_createb(&n->rndr_blockhtml.text, &work)) + return -1; + popnode(doc, n); + return tag_end; +} + +/* + * Parse a table row. + * Return zero on failure, non-zero on success. + */ +static int +parse_table_row(struct lowdown_buf *ob, struct lowdown_doc *doc, + char *data, size_t size, size_t columns, + const enum htbl_flags *col_data, enum htbl_flags header_flag) +{ + size_t i = 0, col, len, cell_start, cell_end; + struct lowdown_buf empty_cell; + struct lowdown_node *n, *nn; + + if (i < size && data[i] == '|') + i++; + + if ((n = pushnode(doc, LOWDOWN_TABLE_ROW)) == NULL) + return 0; + + for (col = 0; col < columns && i < size; ++col) { + while (i < size && xisspace(data[i])) + i++; + + cell_start = i; + + len = find_emph_char(data + i, size - i, '|'); + + /* + * Two possibilities for len == 0: + * (1) No more pipe char found in the current line. + * (2) The next pipe is right after the current one, + * i.e. empty cell. + * For case 1, we skip to the end of line; for case 2 we + * just continue. + */ + + if (len == 0 && i < size && data[i] != '|') + len = size - i; + i += len; + + cell_end = i - 1; + + while (cell_end > cell_start && + xisspace(data[cell_end])) + cell_end--; + + nn = pushnode(doc, LOWDOWN_TABLE_CELL); + if (nn == NULL) + return 0; + + nn->rndr_table_cell.flags = col_data[col] | header_flag; + nn->rndr_table_cell.col = col; + nn->rndr_table_cell.columns = columns; + + if (!parse_inline(doc, + data + cell_start, 1 + cell_end - cell_start)) + return 0; + popnode(doc, nn); + i++; + } + + for ( ; col < columns; ++col) { + memset(&empty_cell, 0, sizeof(struct lowdown_buf)); + nn = pushnode(doc, LOWDOWN_TABLE_CELL); + if (nn == NULL) + return 0; + nn->rndr_table_cell.flags = col_data[col] | header_flag; + nn->rndr_table_cell.col = col; + nn->rndr_table_cell.columns = columns; + popnode(doc, nn); + } + + popnode(doc, n); + return 1; +} + +/* + * Parse the initial line of a table. + * Return <0 on failure, 0 if not a table row, >0 for the offset. + */ +static ssize_t +parse_table_header(struct lowdown_node **np, + struct lowdown_buf *ob, struct lowdown_doc *doc, + char *data, size_t size, size_t *columns, + enum htbl_flags **column_data) +{ + size_t i = 0, col, header_end, under_end, + dashes; + ssize_t pipes = 0; + struct lowdown_node *n; + + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (i == size || pipes == 0) + return 0; + + header_end = i; + + while (header_end > 0 && xisspace(data[header_end - 1])) + header_end--; + + if (data[0] == '|') + pipes--; + + if (header_end && data[header_end - 1] == '|') + pipes--; + + if (pipes < 0) + return 0; + + *columns = pipes + 1; + *column_data = calloc(*columns, sizeof(enum htbl_flags)); + if (*column_data == NULL) + return -1; + + /* Parse the header underline */ + + i++; + if (i < size && data[i] == '|') + i++; + + under_end = i; + while (under_end < size && data[under_end] != '\n') + under_end++; + + for (col = 0; col < *columns && i < under_end; ++col) { + dashes = 0; + i = countspaces(data, i, under_end, 0); + + if (data[i] == ':') { + i++; + (*column_data)[col] |= HTBL_FL_ALIGN_LEFT; + dashes++; + } + + while (i < under_end && data[i] == '-') { + i++; + dashes++; + } + + if (i < under_end && data[i] == ':') { + i++; + (*column_data)[col] |= HTBL_FL_ALIGN_RIGHT; + dashes++; + } + + i = countspaces(data, i, under_end, 0); + + if (i < under_end && data[i] != '|' && data[i] != '+') + break; + + if (dashes < 3) + break; + + i++; + } + + if (col < *columns) + return 0; + + /* (This calls pushnode for the table row.) */ + + *np = pushnode(doc, LOWDOWN_TABLE_BLOCK); + if (*np == NULL) + return -1; + + (*np)->rndr_table.columns = *columns; + + n = pushnode(doc, LOWDOWN_TABLE_HEADER); + if (n == NULL) + return -1; + + n->rndr_table_header.flags = calloc + (*columns, sizeof(enum htbl_flags)); + if (n->rndr_table_header.flags == NULL) + return -1; + + for (i = 0; i < *columns; i++) + n->rndr_table_header.flags[i] = (*column_data)[i]; + n->rndr_table_header.columns = *columns; + + if (!parse_table_row(ob, doc, data, header_end, + *columns, *column_data, HTBL_FL_HEADER)) + return -1; + + popnode(doc, n); + return under_end + 1; +} + +/* + * Parse a table block. + * Return <0 on failure, zero if not a table, >0 offset otherwise. + */ +static ssize_t +parse_table(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t i, columns, row_start, pipes; + ssize_t ret; + struct lowdown_buf *header_work = NULL, *body_work = NULL; + enum htbl_flags *col_data = NULL; + struct lowdown_node *n = NULL, *nn; + + if ((header_work = hbuf_new(64)) == NULL || + (body_work = hbuf_new(256)) == NULL) + goto err; + + ret = parse_table_header(&n, header_work, + doc, data, size, &columns, &col_data); + if (ret < 0) + goto err; + + if ((i = ret) > 0) { + nn = pushnode(doc, LOWDOWN_TABLE_BODY); + if (nn == NULL) + goto err; + while (i < size) { + pipes = 0; + row_start = i; + + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (pipes == 0 || i == size) { + i = row_start; + break; + } + + if (!parse_table_row(body_work, + doc, data + row_start, i - row_start, + columns, col_data, 0)) + goto err; + i++; + } + + popnode(doc, nn); + popnode(doc, n); + } + + free(col_data); + hbuf_free(header_work); + hbuf_free(body_work); + return i; +err: + free(col_data); + hbuf_free(header_work); + hbuf_free(body_work); + return -1; +} + +/* + * Parsing of one block, returning next char to parse. + * We can assume, entering the block, that our output is newline + * aligned. + * Return zero on failure, non-zero on success. + */ +static int +parse_block(struct lowdown_doc *doc, char *data, size_t size) +{ + size_t beg = 0, end, i; + char *txt_data; + char oli_data[10]; + struct lowdown_node *n; + ssize_t rc; + + /* + * What kind of block are we? + * Go through all types of blocks, one by one. + */ + + while (beg < size) { + txt_data = data + beg; + end = size - beg; + + /* We are at a #header. */ + + if (is_atxheader(doc, txt_data, end)) { + rc = parse_atxheader(doc, txt_data, end); + if (rc < 0) + return 0; + assert(rc > 0); + beg += rc; + continue; + } + + /* We have some <HTML>. */ + + if (data[beg] == '<') { + rc = parse_htmlblock(doc, txt_data, end); + if (rc > 0) { + beg += rc; + continue; + } else if (rc < 0) + return 0; + } + + /* Empty line. */ + + if ((i = is_empty(txt_data, end)) != 0) { + beg += i; + continue; + } + + /* Horizontal rule. */ + + if (is_hrule(txt_data, end)) { + n = pushnode(doc, LOWDOWN_HRULE); + if (n == NULL) + return 0; + while (beg < size && data[beg] != '\n') + beg++; + beg++; + popnode(doc, n); + continue; + } + + /* Fenced code. */ + + if (doc->ext_flags & LOWDOWN_FENCED) { + rc = parse_fencedcode(doc, txt_data, end); + if (rc > 0) { + beg += rc; + continue; + } else if (rc < 0) + return 0; + + } + + /* Table parsing. */ + + if (doc->ext_flags & LOWDOWN_TABLES) { + rc = parse_table(doc, txt_data, end); + if (rc > 0) { + beg += rc; + continue; + } else if (rc < 0) + return 0; + + } + + /* We're a > block quote. */ + + if (prefix_quote(txt_data, end)) { + rc = parse_blockquote(doc, txt_data, end); + if (rc < 0) + return 0; + beg += rc; + continue; + } + + /* Prefixed code (like block-quotes). */ + + if (!(doc->ext_flags & LOWDOWN_NOCODEIND) && + prefix_code(txt_data, end)) { + rc = parse_blockcode(doc, txt_data, end); + if (rc < 0) + return 0; + beg += rc; + continue; + } + + /* Some sort of unordered list. */ + + if (prefix_uli(doc, txt_data, end, NULL)) { + rc = parse_list(doc, txt_data, end, NULL); + if (rc < 0) + return 0; + beg += rc; + continue; + } + + /* + * A definition list. + * Only use this is preceded by a one-line paragraph. + */ + + if (doc->current != NULL && + prefix_dli(doc, txt_data, end)) { + n = TAILQ_LAST(&doc->current->children, + lowdown_nodeq); + if (n != NULL && + n->type == LOWDOWN_PARAGRAPH && + n->rndr_paragraph.lines == 1) { + rc = parse_definition(doc, txt_data, end); + if (rc < 0) + return 0; + beg += rc; + continue; + } + } + + /* An ordered list. */ + + if (prefix_oli(doc, txt_data, end, oli_data)) { + rc = parse_list(doc, txt_data, end, oli_data); + if (rc < 0) + return 0; + beg += rc; + continue; + } + + /* No match: just a regular paragraph. */ + + if ((rc = parse_paragraph(doc, txt_data, end)) < 0) + return 0; + beg += rc; + } + + return 1; +} + +/* + * Returns >0 if a line is a footnote definition, 0 if not, <0 on + * failure. This gathers any footnote content into the footq footnote + * queue. + */ +static int +is_footnote(struct lowdown_doc *doc, const char *data, + size_t beg, size_t end, size_t *last) +{ + size_t i = 0, ind = 0, start = 0, + id_offs, id_end; + struct lowdown_buf *contents = NULL; + int in_empty = 0; + struct foot_ref *ref = NULL; + + /* up to 3 optional leading spaces */ + + if (beg + 3 >= end) + return 0; + i = countspaces(data, beg, end, 3); + + /* id part: caret followed by anything between brackets */ + + if (data[i] != '[') + return 0; + i++; + if (i >= end || data[i] != '^') + return 0; + i++; + id_offs = i; + while (i < end && data[i] != '\n' && + data[i] != '\r' && data[i] != ']') + i++; + if (i >= end || data[i] != ']') + return 0; + id_end = i; + + /* spacer: colon (space | tab)* newline? (space | tab)* */ + + i++; + if (i >= end || data[i] != ':') + return 0; + i++; + + /* getting content buffer */ + + if ((contents = hbuf_new(64)) == NULL) + return -1; + + start = i; + + /* process lines similar to a list item */ + + while (i < end) { + while (i < end && data[i] != '\n' && data[i] != '\r') + i++; + + /* process an empty line */ + + if (is_empty(data + start, i - start)) { + in_empty = 1; + if (i < end && + (data[i] == '\n' || data[i] == '\r')) { + i++; + if (i < end && data[i] == '\n' && + data[i - 1] == '\r') + i++; + } + start = i; + continue; + } + + /* calculating the indentation */ + + ind = countspaces(data, start, end, 4) - start; + + /* joining only indented stuff after empty lines; + * note that now we only require 1 space of indentation + * to continue, just like lists */ + + if (ind == 0) { + if (start == id_end + 2 && + data[start] == '\t') { + /* XXX: wtf? */ + } else + break; + } else if (in_empty) + if (!hbuf_putc(contents, '\n')) + goto err; + + in_empty = 0; + + /* adding the line into the content buffer */ + + if (!hbuf_put(contents, + data + start + ind, i - start - ind)) + goto err; + + /* add carriage return */ + + if (i < end) { + if (!hbuf_putc(contents, '\n')) + goto err; + if (i < end && + (data[i] == '\n' || data[i] == '\r')) { + i++; + if (i < end && data[i] == '\n' && + data[i - 1] == '\r') + i++; + } + } + start = i; + } + + if (last) + *last = start; + + if ((ref = calloc(1, sizeof(struct foot_ref))) == NULL) + goto err; + + TAILQ_INSERT_TAIL(&doc->footq, ref, entries); + if (!hbuf_createb(&ref->contents, contents)) + return -1; + if (!hbuf_create(&ref->name, data + id_offs, id_end - id_offs)) + return -1; + hbuf_free(contents); + return 1; +err: + hbuf_free(contents); + return -1; +} + +/* + * Returns >0 if the line is a reference, 0 if not, <0 on failure. + */ +static int +is_ref(struct lowdown_doc *doc, const char *data, + size_t beg, size_t end, size_t *last) +{ + size_t i, id_offset, id_end, link_offset, link_end, + title_offset = 0, title_end = 0, line_end, + garbage, attr_offset = 0, attr_end = 0; + struct link_ref *ref; + + /* Up to 3 optional leading spaces. */ + + if (beg + 3 >= end) + return 0; + i = countspaces(data, beg, end, 3); + + /* Id part: anything but a newline between brackets. */ + + if (data[i] != '[') + return 0; + i++; + id_offset = i; + while (i < end && data[i] != '\n' && + data[i] != '\r' && data[i] != ']') + i++; + if (i >= end || data[i] != ']') + return 0; + id_end = i; + + /* Spacer: colon (space | tab)* newline? (space | tab)* */ + + i++; + if (i >= end || data[i] != ':') + return 0; + i++; + i = countspaces(data, i, end, 0); + if (i < end && (data[i] == '\n' || data[i] == '\r')) { + i++; + if (i < end && data[i] == '\r' && data[i - 1] == '\n') + i++; + } + i = countspaces(data, i, end, 0); + if (i >= end) + return 0; + + /* + * Link: spacing-free sequence, optionally between angle + * brackets. + */ + + if (data[i] == '<') + i++; + + link_offset = i; + + while (i < end && data[i] != ' ' && + data[i] != '\n' && data[i] != '\r') + i++; + + if (data[i - 1] == '>') + link_end = i - 1; + else + link_end = i; + + /* + * Space: (space | tab)* (newline | '\'' | '"' | '(' ) + * Optionally '{' for attributes. + */ + + i = countspaces(data, i, end, 0); + + if (doc->ext_flags & LOWDOWN_ATTRS) { + if (i < end && data[i] != '\n' && data[i] != '\r' && + data[i] != '\'' && data[i] != '"' && + data[i] != '(' && data[i] != '{') + return 0; + } else { + if (i < end && data[i] != '\n' && data[i] != '\r' && + data[i] != '\'' && data[i] != '"' && + data[i] != '(') + return 0; + } + + line_end = 0; + + /* computing end-of-line */ + + if (i >= end || data[i] == '\r' || data[i] == '\n') + line_end = i; + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + line_end = i + 1; + + /* optional (space|tab)* spacer after a newline */ + + if (line_end) + i = countspaces(data, line_end + 1, end, 0); + + /* + * Optional title: any non-newline sequence enclosed in '"() + * alone on its line. This is... confusing, because we can have + * any number of embedded delimiters in the text and only the + * last one is valid. + * + * [link1]: moo.com "hello "world" (hi) there + * ^last + * + * The rule is that there must be only spaces between the last + * delimiter, whatever it is, and the newline OR opening curly + * brace (if parsing), which signifies extended attributes. + */ + + if (i + 1 < end && + (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { + title_offset = ++i; + for (garbage = 0; i < end; i++) { + if (data[i] == '\'' || data[i] == '"' || + data[i] == ')') { + title_end = i; + garbage = 0; + continue; + } + if (data[i] == '\n' || data[i] == '\r' || + ((doc->ext_flags & LOWDOWN_ATTRS) && + data[i] == '{')) + break; + if (data[i] != ' ') + garbage = 1; + } + if (garbage) + return 0; + } + + /* + * Now optionally the attributes. These use similar semantics + * where there can be any number of embedded delimiters: only + * the last one is recorded, and there may be no garbage between + * it and the newline. + */ + + if ((doc->ext_flags & LOWDOWN_ATTRS) && + i + 1 < end && data[i] == '{') { + attr_offset = ++i; + for (garbage = 0; i < end; i++) { + if (data[i] == '}') { + attr_end = i; + garbage = 0; + continue; + } + if (data[i] == '\n' || data[i] == '\r') + break; + if (data[i] != ' ') + garbage = 1; + } + if (garbage) + return 0; + } + + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + line_end = i + 1; + else + line_end = i; + + /* Garbage after the link or empty link. */ + + if (!line_end || link_end == link_offset) + return 0; + + /* A valid ref has been found, filling-in return structures. */ + + if (last) + *last = line_end; + + if ((ref = calloc(1, sizeof(struct link_ref))) == NULL) + return -1; + TAILQ_INSERT_TAIL(&doc->refq, ref, entries); + + if (id_end - id_offset) { + ref->name = hbuf_new(id_end - id_offset); + if (ref->name == NULL) + return -1; + if (!hbuf_put(ref->name, + data + id_offset, id_end - id_offset)) + return -1; + } + + ref->link = hbuf_new(link_end - link_offset); + if (ref->link == NULL) + return -1; + + if (!hbuf_put(ref->link, + data + link_offset, link_end - link_offset)) + return -1; + + if (title_end > title_offset) { + ref->title = hbuf_new(title_end - title_offset); + if (ref->title == NULL) + return -1; + if (!hbuf_put(ref->title, + data + title_offset, title_end - title_offset)) + return -1; + } + + if (attr_end > attr_offset) { + ref->attrs = hbuf_new(attr_end - attr_offset); + if (ref->attrs == NULL) + return -1; + if (!hbuf_put(ref->attrs, + data + attr_offset, attr_end - attr_offset)) + return -1; + } + + return 1; +} + +/* + * Replace tabs with 4 spaces. + * Return zero on failure (memory), non-zero on success. + */ +static int +expand_tabs(struct lowdown_buf *ob, const char *line, size_t size) +{ + size_t i, tab = 0, org; + + /* + * This code makes two assumptions: + * + * (1) Input is valid UTF-8. (Any byte with top two bits 10 is + * skipped, whether or not it is a valid UTF-8 continuation + * byte.) + * (2) Input contains no combining characters. (Combining + * characters should be skipped but are not.) + */ + + for (i = 0; i < size; i++) { + org = i; + while (i < size && line[i] != '\t') { + /* ignore UTF-8 continuation bytes */ + if ((line[i] & 0xc0) != 0x80) + tab++; + i++; + } + if (i > org && !hbuf_put(ob, line + org, i - org)) + return 0; + if (i >= size) + break; + + do { + if (!hbuf_putc(ob, ' ')) + return 0; + tab++; + } while (tab % 4); + } + + return 1; +} + +struct lowdown_doc * +lowdown_doc_new(const struct lowdown_opts *opts) +{ + struct lowdown_doc *doc; + unsigned int extensions = opts ? opts->feat : 0; + size_t i; + + doc = calloc(1, sizeof(struct lowdown_doc)); + if (doc == NULL) + return NULL; + + doc->maxdepth = opts == NULL ? 128 : opts->maxdepth; + doc->active_char['*'] = MD_CHAR_EMPHASIS; + doc->active_char['_'] = MD_CHAR_EMPHASIS; + if (extensions & LOWDOWN_STRIKE) + doc->active_char['~'] = MD_CHAR_EMPHASIS; + if (extensions & LOWDOWN_HILITE) + doc->active_char['='] = MD_CHAR_EMPHASIS; + doc->active_char['`'] = MD_CHAR_CODESPAN; + doc->active_char['\n'] = MD_CHAR_LINEBREAK; + doc->active_char['['] = MD_CHAR_LINK; + doc->active_char['!'] = MD_CHAR_IMAGE; + doc->active_char['<'] = MD_CHAR_LANGLE; + doc->active_char['\\'] = MD_CHAR_ESCAPE; + doc->active_char['&'] = MD_CHAR_ENTITY; + if (extensions & LOWDOWN_AUTOLINK) { + doc->active_char[':'] = MD_CHAR_AUTOLINK_URL; + doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; + doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW; + } + if (extensions & LOWDOWN_SUPER) + doc->active_char['^'] = MD_CHAR_SUPERSCRIPT; + if (extensions & LOWDOWN_MATH) + doc->active_char['$'] = MD_CHAR_MATH; + + doc->ext_flags = extensions; + + if (opts != NULL && opts->metasz > 0) { + doc->meta = calloc(opts->metasz, sizeof(char *)); + if (doc->meta == NULL) + goto err; + doc->metasz = opts->metasz; + for (i = 0; i < doc->metasz; i++) { + doc->meta[i] = strdup(opts->meta[i]); + if (doc->meta[i] == NULL) + goto err; + } + } + if (opts != NULL && opts->metaovrsz > 0) { + doc->metaovr = calloc(opts->metaovrsz, sizeof(char *)); + if (doc->metaovr == NULL) + goto err; + doc->metaovrsz = opts->metaovrsz; + for (i = 0; i < doc->metaovrsz; i++) { + doc->metaovr[i] = strdup(opts->metaovr[i]); + if (doc->metaovr[i] == NULL) + goto err; + } + } + + return doc; +err: + lowdown_doc_free(doc); + return NULL; +} + +/* + * Parse a MMD meta-data value. + * If the value is a single line, both leading and trailing whitespace + * will be stripped. + * If the value spans multiple lines, leading whitespace from the first + * line will be stripped and any following lines will be taken as is. + * Returns a pointer to the value and the length of the value will be + * written to "len"; + */ +static const char * +parse_metadata_val(const char *data, size_t sz, size_t *len) +{ + const char *val; + size_t i, nlines = 0, nspaces, peek = 0; + int startws; + + /* Skip leading whitespace. */ + + i = countspaces(data, 0, sz, 0); + + val = data; + sz -= i; + + /* Find end of line and count trailing whitespace. */ + + for (i = nspaces = 0; i < sz && data[i] != '\n'; i++) + if (data[i] == ' ') + nspaces++; + else + nspaces = 0; + *len = i; + + /* + * Iterate through zero or more following multilines. + * Multilines are terminated by a line containing a colon (that + * is not offset by whitespace) or a blank line. + */ + + startws = i + 1 < sz && + (data[i + 1] == ' ' || + data[i + 1] == '\t'); + + for (i++; i < sz; i++) { + /* + * This block is executed within the line. + * We use "peek" to see how far into the line we are; + * thus, if we encounter a colon without leading + * whitespace, we know that we're in the next metadata + * and should stop. + */ + + if (startws == 0 && data[i] == ':') + break; + + peek++; + if (data[i] != '\n') + continue; + + /* + * We're at a newline: start the loop again by seeing if + * the next line starts with whitespace. + */ + + nlines++; + *len += peek; + peek = 0; + + /* (Filtered out prior to calling parse_metdata().) */ + + assert(!(i + 1 < sz && data[i + 1] == '\n')); + + /* Check if the next line has leading whitespace. */ + + startws = i + 1 < sz && + (data[i + 1] == ' ' || + data[i + 1] == '\t'); + } + + /* Last metadata in section. */ + + if (i == sz && peek) + *len += peek + 1; + + /* Only remove trailing whitespace from a single line. */ + + if (nlines == 0) + *len -= nspaces; + + return val; +} + +/* + * Parse MMD key-value meta-data pairs. + * Store the output in the doc's "metaq", as we might be using the + * values for variable replacement elsewhere in this document. + * Returns 0 if this is not metadata, >0 of it is, <0 on failure. + */ +static int +parse_metadata(struct lowdown_doc *doc, const char *data, size_t sz) +{ + size_t i, j, pos = 0, vsz, keysz; + struct lowdown_meta *m; + struct lowdown_node *n, *nn; + const char *val, *key; + char *cp, *buf; + + if (sz == 0 || data[sz - 1] != '\n') + return 0; + + /* + * Check the first line for a colon to see if we should do + * metadata parsing at all. + * This is a convenience for regular markdown so that initial + * lines (not headers) don't get sucked into metadata. + */ + + for (pos = 0; pos < sz; pos++) + if (data[pos] == '\n' || data[pos] == ':') + break; + + if (pos == sz || data[pos] == '\n') + return 0; + + /* + * Put the metadata into the document's metaq because we might + * set variables. + */ + + for (pos = 0; pos < sz; ) { + key = &data[pos]; + for (i = pos; i < sz; i++) + if (data[i] == ':') + break; + + keysz = i - pos; + if ((cp = buf = malloc(keysz + 1)) == NULL) + return -1; + + /* + * Normalise the key to lowercase alphanumerics, "-", + * and "_", discard whitespace, replace other characters + * with a question mark. + */ + + for (j = 0; j < keysz; j++) { + if (isalnum((unsigned char)key[j]) || + '-' == key[j] || '_' == key[j]) { + *cp++ = tolower((unsigned char)key[j]); + continue; + } else if (isspace((unsigned char)key[j])) + continue; + *cp++ = '?'; + } + *cp = '\0'; + + /* + * If we've already encountered this key, remove it from + * both the local queue and the meta nodes. + */ + + TAILQ_FOREACH(m, doc->metaq, entries) + if (strcmp(m->key, buf) == 0) { + TAILQ_REMOVE(doc->metaq, m, entries); + free(m->key); + free(m->value); + free(m); + break; + } + + assert(doc->current->type == LOWDOWN_DOC_HEADER); + TAILQ_FOREACH(n, &doc->current->children, entries) { + assert(n->type == LOWDOWN_META); + if (hbuf_streq(&n->rndr_meta.key, buf)) { + TAILQ_REMOVE(&doc->current->children, n, entries); + lowdown_node_free(n); + break; + } + } + + if ((n = pushnode(doc, LOWDOWN_META)) == NULL) { + free(buf); + return -1; + } + if (!hbuf_create(&n->rndr_meta.key, buf, cp - buf)) { + free(buf); + return -1; + } + free(buf); + + m = calloc(1, sizeof(struct lowdown_meta)); + if (m == NULL) + return -1; + TAILQ_INSERT_TAIL(doc->metaq, m, entries); + + m->key = strndup + (n->rndr_meta.key.data, + n->rndr_meta.key.size); + if (m->key == NULL) + return -1; + + if (i == sz) { + if ((m->value = strdup("")) == NULL) + return -1; + popnode(doc, n); + break; + } + + /* + * Parse the value, creating a node if nonempty. Make + * sure that the metadata has an empty value if there's + * no value to be parsed. + */ + + assert(data[i] == ':'); + i++; + while (i < sz && isspace((unsigned char)data[i])) + i++; + if (i == sz) { + if ((m->value = strdup("")) == NULL) + return -1; + popnode(doc, n); + break; + } + + val = parse_metadata_val(&data[i], sz - i, &vsz); + + if ((m->value = strndup(val, vsz)) == NULL) + return -1; + if ((nn = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) + return -1; + if (!hbuf_push(&nn->rndr_normal_text.text, val, vsz)) + return -1; + + popnode(doc, nn); + popnode(doc, n); + + pos = i + vsz + 1; + } + + return 1; +} + +/* + * Parse the buffer in data of length size. + * If both mp and mszp are not NULL, set them with the meta information + * instead of locally destroying it. + * (Obviously only applicable if LOWDOWN_METADATA has been set.) + */ +struct lowdown_node * +lowdown_doc_parse(struct lowdown_doc *doc, size_t *maxn, + const char *data, size_t size, struct lowdown_metaq *metaq) +{ + static const char UTF8_BOM[] = { 0xEF, 0xBB, 0xBF }; + struct lowdown_buf *text; + size_t beg, end, i; + const char *sv; + struct lowdown_node *n, *root = NULL; + struct lowdown_metaq mq; + int c, rc = 0; + + /* + * Have a temporary "mq" if "metaq" is not set. We clear this + * automatically at the tail of the function. + */ + + TAILQ_INIT(&mq); + + if (metaq == NULL) + metaq = &mq; + + /* Initialise the parser. */ + + doc->nodes = 0; + doc->depth = 0; + doc->current = NULL; + doc->in_link_body = 0; + doc->foots = 0; + doc->metaq = metaq; + + TAILQ_INIT(doc->metaq); + TAILQ_INIT(&doc->refq); + TAILQ_INIT(&doc->footq); + + if ((text = hbuf_new(64)) == NULL) + goto out; + if (!hbuf_grow(text, size)) + goto out; + if ((root = pushnode(doc, LOWDOWN_ROOT)) == NULL) + goto out; + + /* + * Skip a possible UTF-8 BOM, even though the Unicode standard + * discourages having these in UTF-8 documents. + */ + + beg = 0; + if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0) + beg += 3; + + /* + * Zeroth pass: metadata. First process given metadata, then + * in-document metadata, then overriding metadata. The + * in-document metadata is conditionally processed. + */ + + if ((n = pushnode(doc, LOWDOWN_DOC_HEADER)) == NULL) + goto out; + + for (i = 0; i < doc->metasz; i++) + if (parse_metadata(doc, + doc->meta[i], strlen(doc->meta[i])) < 0) + goto out; + + /* FIXME: CRLF EOLNs. */ + + if ((doc->ext_flags & LOWDOWN_METADATA) && + beg < size - 1 && + isalnum((unsigned char)data[beg])) { + sv = &data[beg]; + for (end = beg + 1; end < size; end++) { + if (data[end] == '\n' && + data[end - 1] == '\n') + break; + } + if ((c = parse_metadata(doc, sv, end - beg)) > 0) + beg = end + 1; + else if (c < 0) + goto out; + } + + for (i = 0; i < doc->metaovrsz; i++) + if (parse_metadata(doc, + doc->metaovr[i], strlen(doc->metaovr[i])) < 0) + goto out; + + popnode(doc, n); + + /* + * First pass: looking for references and footnotes, copying + * everything else. + */ + + while (beg < size) { + if (doc->ext_flags & LOWDOWN_FOOTNOTES) { + c = is_footnote(doc, data, beg, size, &end); + if (c > 0) { + beg = end; + continue; + } else if (c < 0) + goto out; + } + + if ((c = is_ref(doc, data, beg, size, &end)) > 0) { + beg = end; + continue; + } else if (c < 0) + goto out; + + /* Skipping to the next line. */ + + end = beg; + while (end < size && data[end] != '\n' && + data[end] != '\r') + end++; + + /* Adding the line body if present. */ + + if (end > beg && + !expand_tabs(text, data + beg, end - beg)) + goto out; + + /* Add one \n per newline. */ + + while (end < size && (data[end] == '\n' || + data[end] == '\r')) { + if (data[end] == '\n' || + (end + 1 < size && data[end + 1] != '\n')) + if (!hbuf_putc(text, '\n')) + goto out; + end++; + } + + beg = end; + } + + /* + * Second pass (after header): rendering the document body and + * footnotes. + */ + + if (text->size) { + /* Adding a final newline if not already present. */ + if (text->data[text->size - 1] != '\n' && + text->data[text->size - 1] != '\r') + if (!hbuf_putc(text, '\n')) + goto out; + if (!parse_block(doc, text->data, text->size)) + goto out; + } + + rc = 1; +out: + hbuf_free(text); + free_link_refs(&doc->refq); + free_foot_refq(&doc->footq); + lowdown_metaq_free(&mq); + + if (rc) { + if (maxn != NULL) + *maxn = doc->nodes; + popnode(doc, root); + assert(doc->depth == 0); + } else { + lowdown_node_free(root); + root = NULL; + } + return root; +} + +void +lowdown_node_free(struct lowdown_node *p) +{ + struct lowdown_node *n; + + if (p == NULL) + return; + + switch (p->type) { + case LOWDOWN_BLOCKCODE: + hbuf_free(&p->rndr_blockcode.text); + hbuf_free(&p->rndr_blockcode.lang); + break; + case LOWDOWN_BLOCKHTML: + hbuf_free(&p->rndr_blockhtml.text); + break; + case LOWDOWN_CODESPAN: + hbuf_free(&p->rndr_codespan.text); + break; + case LOWDOWN_ENTITY: + hbuf_free(&p->rndr_entity.text); + break; + case LOWDOWN_HEADER: + hbuf_free(&p->rndr_header.attr_cls); + hbuf_free(&p->rndr_header.attr_id); + break; + case LOWDOWN_IMAGE: + hbuf_free(&p->rndr_image.link); + hbuf_free(&p->rndr_image.title); + hbuf_free(&p->rndr_image.dims); + hbuf_free(&p->rndr_image.alt); + hbuf_free(&p->rndr_image.attr_width); + hbuf_free(&p->rndr_image.attr_height); + hbuf_free(&p->rndr_image.attr_cls); + hbuf_free(&p->rndr_image.attr_id); + break; + case LOWDOWN_LINK: + hbuf_free(&p->rndr_link.link); + hbuf_free(&p->rndr_link.title); + hbuf_free(&p->rndr_link.attr_cls); + hbuf_free(&p->rndr_link.attr_id); + break; + case LOWDOWN_LINK_AUTO: + hbuf_free(&p->rndr_autolink.link); + break; + case LOWDOWN_MATH_BLOCK: + hbuf_free(&p->rndr_math.text); + break; + case LOWDOWN_META: + hbuf_free(&p->rndr_meta.key); + break; + case LOWDOWN_NORMAL_TEXT: + hbuf_free(&p->rndr_normal_text.text); + break; + case LOWDOWN_RAW_HTML: + hbuf_free(&p->rndr_raw_html.text); + break; + case LOWDOWN_TABLE_HEADER: + free(p->rndr_table_header.flags); + break; + default: + break; + } + + while ((n = TAILQ_FIRST(&p->children)) != NULL) { + TAILQ_REMOVE(&p->children, n, entries); + lowdown_node_free(n); + } + + free(p); +} + +void +lowdown_metaq_free(struct lowdown_metaq *q) +{ + struct lowdown_meta *m; + + if (q == NULL) + return; + + while ((m = TAILQ_FIRST(q)) != NULL) { + TAILQ_REMOVE(q, m, entries); + free(m->key); + free(m->value); + free(m); + } +} + +void +lowdown_doc_free(struct lowdown_doc *doc) +{ + size_t i; + + if (doc == NULL) + return; + + for (i = 0; i < doc->metasz; i++) + free(doc->meta[i]); + for (i = 0; i < doc->metaovrsz; i++) + free(doc->metaovr[i]); + + free(doc->meta); + free(doc->metaovr); + free(doc); +} diff --git a/document.o b/document.o Binary files differ. diff --git a/entity.c b/entity.c @@ -0,0 +1,482 @@ +/* $Id$ */ +/* + * Copyright (c) 2020, Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +struct ent { + const char *iso; /* html entity */ + uint32_t unicode; /* decimal unicode */ + const char *nroff; /* -ms/-man */ + const char *tex; /* latex */ + /** + * For latex: if zero, escape as-is. If just TEX_ENT_ASCII, + * don't escape at all. If just TEX_ENT_MATH, pass as math mode + * escaped. If both TEX_ENT_ASCII and TEX_ENT_MATH, pass as + * math mode and don't escape. + */ + unsigned char texflags; +}; + +static const struct ent ents[] = { + { "AElig", 198, "AE", "AE{}", 0 }, + { "Aacute", 193, "'A", "'{A}", 0 }, + { "Acirc", 194, "^A", "^{A}", 0 }, + { "Agrave", 192, "`A", "`{A}", 0 }, + { "Alpha", 913, "*A", "A", TEX_ENT_ASCII }, + { "Aring", 197, "oA", "AA{}", 0 }, + { "Atilde", 195, "~A", "~{A}", 0 }, + { "Auml", 196, ":A", "\"{A}", 0 }, + { "Beta", 914, "*B", "B", TEX_ENT_ASCII }, + { "Ccedil", 199, ",C", "c{C}", 0 }, + { "Chi", 935, "*X", "X", TEX_ENT_ASCII }, + { "Dagger", 8225, "dg", "ddag{}", 0 }, + { "Delta", 916, "*D", "Delta", TEX_ENT_MATH }, + { "ETH", 208, "-D", "DH{}", 0 }, + { "Eacute", 201, "'E", "'{E}", 0 }, + { "Ecirc", 202, "^E", "^{E}", 0 }, + { "Egrave", 200, "`E", "`{E}", 0 }, + { "Epsilon", 917, "*E", "E", TEX_ENT_ASCII }, + { "Eta", 919, "*Y", "E", TEX_ENT_ASCII }, + { "Euml", 203, ":E", "\"{E}", 0 }, + { "Gamma", 915, "*G", "Gamma", TEX_ENT_MATH }, + { "Iacute", 205, "'I", "'{I}", 0 }, + { "Icirc", 206, "^I", "^{I}", 0 }, + { "Igrave", 204, "`I", "`{I}", 0 }, + { "Iota", 921, "*I", "I", TEX_ENT_ASCII }, + { "Iuml", 207, ":I", "\"{I}", 0 }, + { "Kappa", 922, "*K", "K", TEX_ENT_ASCII }, + { "Lambda", 923, "*L", "Lambda", TEX_ENT_MATH }, + { "Mu", 924, "*M", "M", TEX_ENT_ASCII }, + { "Ntilde", 209, "~N", "~{N}", 0 }, + { "Nu", 925, "*N", "N", TEX_ENT_ASCII }, + { "OElig", 338, "OE", "OE{}", 0 }, + { "Oacute", 211, "'O", "'{O}", 0 }, + { "Ocirc", 212, "^O", "^{O}", 0 }, + { "Ograve", 210, "`O", "`{O}", 0 }, + { "Omega", 937, "*W", "Omega", TEX_ENT_MATH }, + { "Omicron", 927, "*O", "O", TEX_ENT_ASCII }, + { "Oslash", 216, "/O", "O{}", 0 }, + { "Otilde", 213, "~O", "~{O}", 0 }, + { "Ouml", 214, ":O", "\"{O}", 0 }, + { "Phi", 934, "*F", "Phi", TEX_ENT_MATH }, + { "Pi", 928, "*P", "Pi", TEX_ENT_MATH }, + { "Prime", 8243, NULL, "^{\\prime\\prime}", TEX_ENT_MATH | TEX_ENT_ASCII }, + { "Psi", 936, "*Q", "Psi", TEX_ENT_MATH }, + { "Rho", 929, "*R", "R", TEX_ENT_ASCII }, + { "Scaron", 352, "vS", "v{S}", 0 }, + { "Sigma", 931, "*S", "Sigma", TEX_ENT_MATH }, + { "THORN", 222, "TP", "TH{}", 0 }, + { "Tau", 932, "*T", "T", TEX_ENT_ASCII }, + { "Theta", 920, "*H", "Theta", TEX_ENT_MATH }, + { "Uacute", 218, "'U", "'{U}", 0 }, + { "Ucirc", 219, "^U", "^{U}", 0 }, + { "Ugrave", 217, "`U", "`{U}", 0 }, + { "Upsilon", 933, "*U", "Upsilon", TEX_ENT_MATH }, + { "Uuml", 220, ":U", "\"{U}", 0 }, + { "Xi", 926, "*C", "Xi", TEX_ENT_MATH }, + { "Yacute", 221, "'Y", "'{Y}", 0 }, + { "Yuml", 376, ":Y", "\"{Y}", 0 }, + { "Zeta", 918, "*Z", "Z", TEX_ENT_ASCII }, + { "aacute", 225, "'a", "'{a}", 0 }, + { "acirc", 226, "^a", "^{a}", 0 }, + { "acute", 180, "'", "'{}", 0 }, + { "aelig", 230, "ae", "ae{}", 0 }, + { "agrave", 224, "`a", "`{a}", 0 }, + { "alefsym", 8501, "Ah", "aleph", TEX_ENT_MATH }, + { "alpha", 945, "*a", "alpha", TEX_ENT_MATH }, + { "amp", 38, NULL, "&{}", 0 }, + { "and", 8743, "AN", "wedge", TEX_ENT_MATH }, + { "ang", 8736, "/_", "angle", TEX_ENT_MATH }, + { "aring", 229, "oa", "aa{}", 0 }, + { "asymp", 8776, "|=", "asymp", TEX_ENT_MATH }, + { "atilde", 227, "~a", "~{a}", 0 }, + { "auml", 228, ":a", "\"{a}", 0 }, + { "bdquo", 8222, NULL, NULL, 0 }, /* XXX */ + { "beta", 946, "*b", "beta", TEX_ENT_MATH }, + { "brvbar", 166, NULL, "textbrokenbar{}", 0 }, + { "bull", 8226, "bu", "textbullet{}", 0 }, + { "cap", 8745, "ca", "cap", TEX_ENT_MATH }, + { "ccedil", 231, ",c", "c{c}", 0 }, + { "cedil", 184, "ac", "c{}", 0 }, + { "cent", 162, "ct", "textcent{}", 0 }, + { "chi", 967, "*x", "chi", TEX_ENT_MATH }, + { "circ", 710, "a^", "^{}", 0 }, + { "cong", 8773, "=~", "cong", TEX_ENT_MATH }, + { "copy", 169, "co", "copyright{}", 0 }, + { "crarr", 8629, NULL, NULL, 0 }, /* XXX */ + { "cup", 8746, "cu", "cup", TEX_ENT_MATH }, + { "curren", 164, NULL, "textcurrency{}", 0 }, + { "dArr", 8659, NULL, "Downarrow", TEX_ENT_MATH }, + { "dagger", 8224, "dg", "dag{}", 0 }, + { "darr", 8595, "da", "downarrow", TEX_ENT_MATH }, + { "deg", 176, "de", "textdegree{}", 0 }, + { "delta", 948, "*d", "delta", TEX_ENT_MATH }, + { "divide", 247, "tdi", "div", TEX_ENT_MATH }, + { "eacute", 233, "'e", "'{e}", 0 }, + { "ecirc", 234, "^e", "^{e}", 0 }, + { "egrave", 232, "`e", "`{e}", 0 }, + { "empty", 8709, "es", "emptyset", TEX_ENT_MATH }, + { "emsp", 8195, NULL, "hspace{1em}", 0 }, + { "ensp", 8194, NULL, "hspace{0.5em}", 0 }, + { "epsilon", 949, "*e", "epsilon", TEX_ENT_MATH }, + { "equiv", 8801, "==", "equiv", TEX_ENT_MATH }, + { "eta", 951, "*y", "eta", TEX_ENT_MATH }, + { "eth", 240, "Sd", "dh{}", 0 }, + { "euml", 235, ":e", "\"{e}", 0 }, + { "euro", 8364, "Eu", "texteuro{}", 0 }, + { "exist", 8707, "te", "exists", TEX_ENT_MATH }, + { "fnof", 402, NULL, "f", TEX_ENT_MATH }, + { "forall", 8704, NULL, "forall", TEX_ENT_MATH }, + { "frac12", 189, "12", "sfrac{1}{2}", TEX_ENT_MATH }, + { "frac14", 188, "14", "sfrac{1}{4}", TEX_ENT_MATH }, + { "frac34", 190, "34", "sfrac{3}{4}", TEX_ENT_MATH }, + { "frasl", 8260, NULL, NULL, 0 }, /* XXX */ + { "gamma", 947, "*g", "gamma", TEX_ENT_MATH }, + { "ge", 8805, ">=", "geq", TEX_ENT_MATH }, + { "gt", 62, NULL, "textgreater{}", 0 }, + { "hArr", 8660, NULL, "Leftrightarrow", TEX_ENT_MATH }, + { "harr", 8596, "<>", "leftrightarrow", TEX_ENT_MATH }, + { "hellip", 8230, NULL, "ldots{}", 0 }, + { "iacute", 237, "'i", "'{i}", 0 }, + { "icirc", 238, "^i", "^{i}", 0 }, + { "iexcl", 161, "r!", "textexclamdown{}", 0 }, + { "igrave", 236, "`i", "`{i}", 0 }, + { "image", 8465, NULL, "Im", TEX_ENT_MATH }, + { "infin", 8734, "if", "infty", TEX_ENT_MATH }, + { "int", 8747, "integral", "int", TEX_ENT_MATH }, + { "iota", 953, "*i", "iota", TEX_ENT_MATH }, + { "iquest", 191, "r?", "textquestiondown{}", 0 }, + { "isin", 8712, NULL, "in", TEX_ENT_MATH }, + { "iuml", 239, ":i", "\"{i}", 0 }, + { "kappa", 954, "*k", "kappa", TEX_ENT_MATH }, + { "lArr", 8656, NULL, "Leftarrow", TEX_ENT_MATH }, + { "lambda", 955, "*l", "lambda", TEX_ENT_MATH }, + { "lang", 9001, "la", "langle", TEX_ENT_MATH }, + { "laquo", 171, "Fo", "guillemetleft{}", 0 }, + { "larr", 8592, "<-", "leftarrow", TEX_ENT_MATH }, + { "lceil", 8968, NULL, "lceil", TEX_ENT_MATH }, + { "ldquo", 8220, "lq", "``", TEX_ENT_ASCII }, + { "le", 8804, NULL, "leq", TEX_ENT_MATH }, + { "lfloor", 8970, "lf", "lfloor", TEX_ENT_MATH }, + { "lowast", 8727, NULL, "_\\ast", TEX_ENT_MATH }, + { "lrm", 8206, NULL, NULL, 0 }, /* XXX */ + { "lsaquo", 8249, NULL, NULL, 0 }, + { "lsquo", 8216, "oq", "`", TEX_ENT_ASCII }, + { "lt", 60, NULL, "textless{}", 0 }, + { "macr", 175, NULL, "={}", 0 }, + { "mdash", 8212, "em", "---", TEX_ENT_ASCII }, + { "micro", 181, NULL, "textmu{}", 0 }, + { "middot", 183, NULL, "textperiodcentered{}", 0 }, + { "minus", 8722, "mi", "-{}", 0 }, + { "mu", 956, "*m", "mu", TEX_ENT_MATH }, + { "nabla", 8711, NULL, "nabla", TEX_ENT_MATH }, + { "nbsp", 160, "~", "~", TEX_ENT_ASCII }, + { "ndash", 8211, "en", "--", TEX_ENT_ASCII }, + { "ne", 8800, "!=", "not=", TEX_ENT_MATH }, + { "ni", 8715, NULL, "ni", TEX_ENT_MATH }, + { "not", 172, "no", "lnot", TEX_ENT_MATH }, + { "notin", 8713, NULL, "not\\in", TEX_ENT_MATH }, + { "nsub", 8836, NULL, "not\\subset", TEX_ENT_MATH }, + { "ntilde", 241, "~n", "~{n}", 0 }, + { "nu", 957, "*n", "nu", TEX_ENT_MATH }, + { "oacute", 243, "'o", "'{o}", 0 }, + { "ocirc", 244, "^o", "^{o}", 0 }, + { "oelig", 339, "oe", "oe{}", 0 }, + { "ograve", 242, "`o", "`{o}", 0 }, + { "oline", 8254, NULL, "ominus", TEX_ENT_MATH }, + { "omega", 969, "*w", "omega", TEX_ENT_MATH }, + { "omicron", 959, "*o", "omicron", TEX_ENT_MATH }, + { "oplus", 8853, NULL, "oplus", TEX_ENT_MATH }, + { "or", 8744, "OR", "vee", TEX_ENT_MATH }, + { "ordf", 170, NULL, "textordfeminine{}", 0 }, + { "ordm", 186, NULL, "textordmasculine{}", 0 }, + { "oslash", 248, "/o", "oslash", TEX_ENT_MATH }, + { "otilde", 245, "~o", "~{o}", 0 }, + { "otimes", 8855, NULL, "otimes", TEX_ENT_MATH }, + { "ouml", 246, ":o", "\"{o}", 0 }, + { "para", 182, NULL, "P{}", 0 }, + { "part", 8706, "pd", "partial", TEX_ENT_MATH }, + { "permil", 8240, NULL, "textperthousand{}", 0 }, + { "perp", 8869, NULL, "perp", TEX_ENT_MATH }, + { "phi", 966, "*f", "phi", TEX_ENT_MATH }, + { "pi", 960, "*p", "pi", TEX_ENT_MATH }, + { "piv", 982, "+p", "varpi", TEX_ENT_MATH }, + { "plusmn", 177, "+-", "pm", TEX_ENT_MATH }, + { "pound", 163, NULL, "pounds{}", 0 }, + { "prime", 8242, NULL, "^\\prime{}", TEX_ENT_MATH | TEX_ENT_ASCII }, + { "prod", 8719, "poduct", "prod", TEX_ENT_MATH }, + { "prop", 8733, NULL, "propto", TEX_ENT_MATH }, + { "psi", 968, "*q", "psi", TEX_ENT_MATH }, + { "quot", 34, NULL, "\"", TEX_ENT_ASCII }, + { "rArr", 8658, NULL, "Rightarrow", TEX_ENT_MATH }, + { "radic", 8730, NULL, "surd", TEX_ENT_MATH }, + { "rang", 9002, "ra", "rangle", TEX_ENT_MATH }, + { "raquo", 187, "Fc", "guillemotright{}", 0 }, + { "rarr", 8594, "->", "rightarrow", TEX_ENT_MATH }, + { "rceil", 8969, NULL, "rceil", TEX_ENT_MATH }, + { "rdquo", 8221, "rq", "''", TEX_ENT_ASCII }, + { "real", 8476, NULL, "Re", TEX_ENT_MATH }, + { "reg", 174, "rg", "textregistered{}", 0 }, + { "rfloor", 8971, "rf", "rfloor", TEX_ENT_MATH }, + { "rho", 961, "*r", "rho", TEX_ENT_MATH }, + { "rlm", 8207, NULL, NULL, 0 }, /* XXX */ + { "rsaquo", 8250, NULL, NULL, 0 }, /* XXX */ + { "rsquo", 8217, "cq", "'", TEX_ENT_ASCII }, + { "sbquo", 8218, NULL, NULL, 0 }, /* XXX */ + { "scaron", 353, "vs", "v{s}", 0 }, + { "sdot", 8901, NULL, "cdot", TEX_ENT_MATH }, + { "sect", 167, "sc", "S{}", 0 }, + { "shy", 173, NULL, "-{}", 0 }, + { "sigma", 963, "*s", "sigma", TEX_ENT_MATH }, + { "sigmaf", 962, "ts", "sigmav", TEX_ENT_MATH }, /* XXX?? */ + { "sim", 8764, "ap", "sim", TEX_ENT_MATH }, + { "sub", 8834, "sb", "subset", TEX_ENT_MATH }, + { "sube", 8838, "ib", "subseteq", TEX_ENT_MATH }, + { "sum", 8721, "sum", "sum", TEX_ENT_MATH }, + { "sup", 8835, "sp", "supset", TEX_ENT_MATH }, + { "sup1", 185, "S1", "$^1$", TEX_ENT_ASCII }, + { "sup2", 178, "S2", "$^2$", TEX_ENT_ASCII }, + { "sup3", 179, "S3", "$^3$", TEX_ENT_ASCII }, + { "supe", 8839, "ip", "supseteq", TEX_ENT_MATH }, + { "szlig", 223, "ss", "ss{}", 0 }, + { "tau", 964, "*t", "tau", TEX_ENT_MATH }, + { "there4", 8756, "3d", "therefore", TEX_ENT_MATH }, + { "theta", 952, "*h", "theta", TEX_ENT_MATH }, + { "thetasym", 977, "+h", "vartheta", TEX_ENT_MATH }, /* XXX?? */ + { "thinsp", 8201, NULL, "hspace{0.167em}", 0 }, + { "thorn", 254, "Tp", "th{}", 0 }, + { "tilde", 732, "ti", "~{}", 0 }, + { "times", 215, "mu", "times", TEX_ENT_MATH }, + { "trade", 8482, "tm", "texttrademark{}", 0 }, + { "uArr", 8657, NULL, "Uparrow", TEX_ENT_MATH }, + { "uacute", 250, "'u", "'{u}", 0 }, + { "uarr", 8593, "ua", "uparrow", TEX_ENT_MATH }, + { "ucirc", 251, "^u", "^{u}", 0 }, + { "ugrave", 249, "`u", "`{u}", 0 }, + { "uml", 168, "ad", "\"{}", 0 }, + { "upsih", 978, NULL, NULL, 0 }, /* XXX */ + { "upsilon", 965, "*u", "upsilon", TEX_ENT_MATH }, + { "uuml", 252, ":u", "\"{u}", 0 }, + { "weierp", 8472, "wp", "wp", TEX_ENT_MATH }, + { "xi", 958, "*c", "xi", TEX_ENT_MATH }, + { "yacute", 253, "'y", "'{y}", 0 }, + { "yen", 165, "Ye", "textyen{}", 0 }, + { "yuml", 255, ":y", "\"{y}", 0 }, + { "zeta", 950, "*z", "zeta", TEX_ENT_MATH }, + { "zwj", 8205, NULL, NULL, 0 }, /* XXX */ + { "zwnj", 8204, NULL, NULL, 0 }, /* XXX */ + { NULL, 0, NULL, NULL, 0 } +}; + +static int32_t +entity_find_num(const struct lowdown_buf *buf) +{ + char b[32]; + char *ep; + unsigned long long ulval; + int base; + + if (buf->size < 4) + return -1; + + /* Copy a hex or decimal value. */ + + if (buf->data[2] == 'x' || buf->data[2] == 'X') { + if (buf->size < 5) + return -1; + if (buf->size - 4 > sizeof(b) - 1) + return -1; + memcpy(b, buf->data + 3, buf->size - 4); + b[buf->size - 4] = '\0'; + base = 16; + } else { + if (buf->size - 3 > sizeof(b) - 1) + return -1; + memcpy(b, buf->data + 2, buf->size - 3); + b[buf->size - 3] = '\0'; + base = 10; + } + + /* + * Convert within the given base. + * This calling syntax is from OpenBSD's strtoull(3). + */ + + errno = 0; + ulval = strtoull(b, &ep, base); + if (b[0] == '\0' || *ep != '\0') + return -1; + if (errno == ERANGE && ulval == ULLONG_MAX) + return -1; + if (ulval > INT32_MAX) + return -1; + + return (int32_t)ulval; +} + +/* + * Convert a named entity to a unicode codepoint. + * Return -1 on failure. + */ +static const struct ent * +entity_find_named(const struct lowdown_buf *buf) +{ + char b[32]; + size_t i; + + /* + * Copy into NUL-terminated buffer for easy strcmp(). + * We omit the leading '&' and trailing ';'. + */ + + if (buf->size - 2 > sizeof(b) - 1) + return NULL; + memcpy(b, buf->data + 1, buf->size - 2); + b[buf->size - 2] = '\0'; + + /* TODO: can be trivially sped up by using a binary search. */ + + for (i = 0; ents[i].iso != NULL; i++) + if (strcmp(b, ents[i].iso) == 0) + return &ents[i]; + + return NULL; +} + +/* + * Basic sanity of HTML entity. + * Needs to be &xyz; + * Return zero on failure, non-zero on success. + */ +static int +entity_sane(const struct lowdown_buf *buf) +{ + + if (buf->size < 3 || + buf->data[0] != '&' || + buf->data[buf->size - 1] != ';') + return 0; + return 1; +} + +/* + * Look up an entity and return its decimal value or -1 on failure (bad + * formatting or couldn't find entity). + * Handles both numeric (decimal and hex) and common named ones. + */ +int32_t +entity_find_iso(const struct lowdown_buf *buf) +{ + const struct ent *e; + + if (!entity_sane(buf)) + return -1; + + if (buf->data[1] == '#') + return entity_find_num(buf); + + if ((e = entity_find_named(buf)) == NULL) + return -1; + + assert(e->unicode < INT32_MAX); + return e->unicode; +} + +/** + * Look for the roff entity corresponding to "buf". If will either + * return a special character (which must be escaped using the usual + * \(xx or whatever) or NULL. If NULL and "iso" is -1, the character + * couldn't be found. If NULL and "iso" is >= 0, "iso" is a unicode + * character number that must be further escaped. + */ +const char * +entity_find_nroff(const struct lowdown_buf *buf, int32_t *iso) +{ + const struct ent *e; + size_t i; + + *iso = -1; + + if (!entity_sane(buf)) + return NULL; + + if (buf->data[1] == '#') { + if ((*iso = entity_find_num(buf)) == -1) + return NULL; + for (i = 0; ents[i].iso != NULL; i++) + if ((int32_t)ents[i].unicode == *iso) + return ents[i].nroff; + return NULL; + } + + if ((e = entity_find_named(buf)) == NULL) + return NULL; + + assert(e->unicode < INT32_MAX); + *iso = e->unicode; + return e->nroff; +} + +/* + * Looks for the TeX entity corresponding to "buf". + * If "buf" is a numerical code, looks it up by number; if an HTML (ISO) + * code, looks it up by that. + * Returns the entity or NULL on failure. + * On success, sets the TeX flags. + */ +const char * +entity_find_tex(const struct lowdown_buf *buf, unsigned char *fl) +{ + const struct ent *e; + int32_t unicode; + size_t i; + + if (!entity_sane(buf)) + return NULL; + + if (buf->data[1] == '#') { + if ((unicode = entity_find_num(buf)) == -1) + return NULL; + for (i = 0; ents[i].iso != NULL; i++) + if ((int32_t)ents[i].unicode == unicode) { + *fl = ents[i].texflags; + return ents[i].tex; + } + return NULL; + } + + if ((e = entity_find_named(buf)) == NULL) + return NULL; + + assert(e->unicode < INT32_MAX); + *fl = e->texflags; + return e->tex; +} diff --git a/entity.o b/entity.o Binary files differ. diff --git a/extern.h b/extern.h @@ -0,0 +1,77 @@ +/* $Id$ */ +/* + * Copyright (c) 2016--2020 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef EXTERN_H +#define EXTERN_H + +struct hentry { + struct lowdown_buf *buf; + TAILQ_ENTRY(hentry) entries; +}; + +TAILQ_HEAD(hentryq, hentry); + +int smarty(struct lowdown_node *, size_t, enum lowdown_type); + +const char *entity_find_nroff(const struct lowdown_buf *, int32_t *); +int32_t entity_find_iso(const struct lowdown_buf *); +const char *entity_find_tex(const struct lowdown_buf *, unsigned char *); +#define TEX_ENT_MATH 0x01 +#define TEX_ENT_ASCII 0x02 + +int hbuf_eq(const struct lowdown_buf *, const struct lowdown_buf *); +int hbuf_streq(const struct lowdown_buf *, const char *); +int hbuf_strprefix(const struct lowdown_buf *, const char *); +void hbuf_free(struct lowdown_buf *); +int hbuf_grow(struct lowdown_buf *, size_t); +int hbuf_clone(const struct lowdown_buf *, struct lowdown_buf *); +struct lowdown_buf + *hbuf_dup(const struct lowdown_buf *); +int hbuf_extract_text(struct lowdown_buf *, const struct lowdown_node *); +const struct lowdown_buf + *hbuf_id(const struct lowdown_buf *, const struct lowdown_node *, + struct hentryq *); +struct lowdown_buf + *hbuf_new(size_t) __attribute__((malloc)); +struct lowdown_buf + *hbuf_dupname(const struct lowdown_buf *); +int hbuf_printf(struct lowdown_buf *, const char *, ...) + __attribute__((format (printf, 2, 3))); +int hbuf_put(struct lowdown_buf *, const char *, size_t); +int hbuf_putb(struct lowdown_buf *, const struct lowdown_buf *); +int hbuf_putc(struct lowdown_buf *, char); +int hbuf_putf(struct lowdown_buf *, FILE *); +int hbuf_puts(struct lowdown_buf *, const char *); +void hbuf_truncate(struct lowdown_buf *); +int hbuf_shortlink(struct lowdown_buf *, const struct lowdown_buf *); +void hentryq_clear(struct hentryq *); + +#define HBUF_PUTSL(output, literal) \ + hbuf_put(output, literal, sizeof(literal) - 1) + +ssize_t halink_email(size_t *, struct lowdown_buf *, char *, size_t, size_t); +ssize_t halink_url(size_t *, struct lowdown_buf *, char *, size_t, size_t); +ssize_t halink_www(size_t *, struct lowdown_buf *, char *, size_t, size_t); + +int hesc_attr(struct lowdown_buf *, const char *, size_t); +int hesc_href(struct lowdown_buf *, const char *, size_t); +int hesc_html(struct lowdown_buf *, const char *, size_t, int, int, int); + +char *rcsdate2str(const char *); +char *date2str(const char *); +char *rcsauthor2str(const char *); + +#endif /* !EXTERN_H */ diff --git a/gemini.c b/gemini.c @@ -0,0 +1,1076 @@ +/* $Id$ */ +/* + * Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * A standalone link is one that lives in its own paragraph. + */ +#define IS_STANDALONE_LINK(_n, _prev) \ + ((_n)->parent != NULL && \ + (_n)->parent->type == LOWDOWN_PARAGRAPH && \ + (_n)->parent->parent != NULL && \ + (_n)->parent->parent->type == LOWDOWN_ROOT && \ + (_prev) == NULL && \ + TAILQ_NEXT((_n), entries) == NULL) + +/* + * A link queued for display. + * This only happens when using footnote or endnote links. + */ +struct link { + const struct lowdown_node *n; /* node needing link */ + size_t id; /* link-%zu */ + TAILQ_ENTRY(link) entries; +}; + +TAILQ_HEAD(linkq, link); + +struct gemini { + unsigned int flags; /* output flags */ + ssize_t last_blank; /* line breaks or -1 (start) */ + struct lowdown_buf *tmp; /* for temporary allocations */ + size_t nolinkqsz; /* if >0, don't record links */ + int nolinkflush; /* if TRUE, don't flush links */ + struct linkq linkq; /* link queue */ + size_t linkqsz; /* position in link queue */ + wchar_t *buf; /* buffer for counting wchar */ + size_t bufsz; /* size of buf */ + ssize_t headers_offs; /* header offset */ + struct lowdown_buf **foots; /* footnotes */ + size_t footsz; /* footnotes size */ +}; + +/* + * Forward declaration. + */ +static int +rndr(struct lowdown_buf *, struct lowdown_metaq *, + struct gemini *, const struct lowdown_node *); + +static void +link_freeq(struct linkq *q) +{ + struct link *l; + + while ((l = TAILQ_FIRST(q)) != NULL) { + TAILQ_REMOVE(q, l, entries); + free(l); + } +} + +static int +rndr_link_ref(const struct gemini *st, + struct lowdown_buf *out, size_t ref, int nl) +{ + char buf[32], c; + size_t sz = 0, i; + + assert(ref); + + if (st->flags & LOWDOWN_GEMINI_LINK_NOREF) + return hbuf_printf(out, "%s", nl ? "\n" : ""); + + buf[0] = '\0'; + if (st->flags & LOWDOWN_GEMINI_LINK_ROMAN) { + while(ref) + if (ref >= 1000) { + strlcat(buf, "m", sizeof(buf)); + ref -= 1000; + } else if (ref >= 900) { + strlcat(buf, "cm", sizeof(buf)); + ref -= 900; + } else if (ref >= 500) { + strlcat(buf, "d", sizeof(buf)); + ref -= 500; + } else if (ref >= 400) { + strlcat(buf, "cd", sizeof(buf)); + ref -= 400; + } else if (ref >= 100) { + strlcat(buf, "c", sizeof(buf)); + ref -= 100; + } else if (ref >= 90) { + strlcat(buf, "xc", sizeof(buf)); + ref -= 90; + } else if (ref >= 50) { + strlcat(buf, "l", sizeof(buf)); + ref -= 50; + } else if (ref >= 40) { + strlcat(buf, "xl", sizeof(buf)); + ref -= 40; + } else if (ref >= 10) { + strlcat(buf, "x", sizeof(buf)); + ref -= 10; + } else if (ref >= 9) { + strlcat(buf, "ix", sizeof(buf)); + ref -= 9; + } else if (ref >= 5) { + strlcat(buf, "v", sizeof(buf)); + ref -= 5; + } else if (ref >= 4) { + strlcat(buf, "iv", sizeof(buf)); + ref -= 4; + } else if (ref >= 1) { + strlcat(buf, "i", sizeof(buf)); + ref -= 1; + } + } else { + while (ref && sz < sizeof(buf) - 1) { + buf[sz++] = 'a' + (ref - 1) % 26; + ref = (ref - 1) / 26; + } + buf[sz] = '\0'; + for (i = 0; i < sz; i++, sz--) { + c = buf[i]; + buf[i] = buf[sz - 1]; + buf[sz - 1] = c; + } + } + + return hbuf_printf(out, "%s[%s]%s", + nl ? " " : "", buf, nl ? "\n" : ""); +} + +/* + * Convert newlines to spaces, elide control characters. + * If a newline follows a period, it's converted to two spaces. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_escape(struct lowdown_buf *out, const char *buf, size_t sz) +{ + size_t i, start = 0; + + for (i = 0; i < sz; i++) { + if (buf[i] == '\n') { + if (!hbuf_put(out, buf + start, i - start)) + return 0; + if (out->size && + out->data[out->size - 1] == '.' && + !hbuf_putc(out, ' ')) + return 0; + if (!hbuf_putc(out, ' ')) + return 0; + start = i + 1; + } else if (iscntrl((unsigned char)buf[i])) { + if (!hbuf_put(out, buf + start, i - start)) + return 0; + start = i + 1; + } + } + + if (start < sz && + !hbuf_put(out, buf + start, sz - start)) + return 0; + + return 1; +} + +/* + * Output optional number of newlines before or after content. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_buf_vspace(struct gemini *st, struct lowdown_buf *out, size_t sz) +{ + + if (st->last_blank >= 0) + while ((size_t)st->last_blank < sz) { + if (!HBUF_PUTSL(out, "\n")) + return 0; + st->last_blank++; + } + + return 1; +} + +/* + * Emit text in "in" the current line with output "out". + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_buf(struct gemini *st, struct lowdown_buf *out, + const struct lowdown_node *n, const struct lowdown_buf *in) +{ + const struct lowdown_node *nn; + size_t i = 0; + + for (nn = n; nn != NULL; nn = nn->parent) + if (nn->type == LOWDOWN_BLOCKCODE || + nn->type == LOWDOWN_BLOCKHTML) { + st->last_blank = 1; + return hbuf_putb(out, in); + } + + /* + * If we last printed some space and we're not in literal mode, + * suppress any leading blanks. + * This is only likely to happen around links. + */ + + assert(in != NULL); + if (st->last_blank != 0) + for ( ; i < in->size; i++) + if (!isspace((unsigned char)in->data[i])) + break; + + if (!rndr_escape(out, in->data + i, in->size - i)) + return 0; + if (in->size && st->last_blank != 0) + st->last_blank = 0; + return 1; +} + +/* + * Output the unicode entry "val", which must be strictly greater than + * zero, as a UTF-8 sequence. + * This does no error checking. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_entity(struct lowdown_buf *buf, int32_t val) +{ + + assert(val > 0); + if (val < 0x80) + return hbuf_putc(buf, val); + if (val < 0x800) + return hbuf_putc(buf, 192 + val / 64) && + hbuf_putc(buf, 128 + val % 64); + if (val - 0xd800u < 0x800) + return 1; + if (val < 0x10000) + return hbuf_putc(buf, 224 + val / 4096) && + hbuf_putc(buf, 128 + val / 64 % 64) && + hbuf_putc(buf, 128 + val % 64); + if (val < 0x110000) + return hbuf_putc(buf, 240 + val / 262144) && + hbuf_putc(buf, 128 + val / 4096 % 64) && + hbuf_putc(buf, 128 + val / 64 % 64) && + hbuf_putc(buf, 128 + val % 64); + return 1; +} + +static int +rndr_doc_header(struct gemini *st, struct lowdown_buf *out, + const struct lowdown_metaq *mq) +{ + const struct lowdown_meta *m; + + if (!(st->flags & LOWDOWN_GEMINI_METADATA)) + return 1; + TAILQ_FOREACH(m, mq, entries) { + if (!rndr_escape(out, m->key, strlen(m->key))) + return 0; + if (!HBUF_PUTSL(out, ": ")) + return 0; + if (!rndr_escape(out, m->value, strlen(m->value))) + return 0; + st->last_blank = 0; + if (!rndr_buf_vspace(st, out, 1)) + return 0; + } + return 1; +} + +/* + * Render the key and value, then store the results in our "mq" + * conditional to it existing. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_meta(struct gemini *st, + const struct lowdown_node *n, struct lowdown_metaq *mq) +{ + ssize_t last_blank; + struct lowdown_buf *tmp = NULL; + struct lowdown_meta *m; + const struct lowdown_node *child; + ssize_t val; + const char *ep; + + /* + * Manually render the children of the meta into a + * buffer and use that as our value. Start by zeroing + * our terminal position and using another output buffer + * (st->tmp would be clobbered by children). + */ + + last_blank = st->last_blank; + st->last_blank = -1; + + if ((tmp = hbuf_new(128)) == NULL) + goto err; + if ((m = calloc(1, sizeof(struct lowdown_meta))) == NULL) + goto err; + TAILQ_INSERT_TAIL(mq, m, entries); + + m->key = strndup(n->rndr_meta.key.data, + n->rndr_meta.key.size); + if (m->key == NULL) + goto err; + + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(tmp, mq, st, child)) + goto err; + + m->value = strndup(tmp->data, tmp->size); + if (m->value == NULL) + goto err; + + if (strcmp(m->key, "shiftheadinglevelby") == 0) { + val = (ssize_t)strtonum + (m->value, -100, 100, &ep); + if (ep == NULL) + st->headers_offs = val + 1; + } else if (strcmp(m->key, "baseheaderlevel") == 0) { + val = (ssize_t)strtonum + (m->value, 1, 100, &ep); + if (ep == NULL) + st->headers_offs = val; + } + + hbuf_free(tmp); + st->last_blank = last_blank; + return 1; +err: + hbuf_free(tmp); + return 0; +} + +/* + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_flush_linkq(struct gemini *st, struct lowdown_buf *out) +{ + struct link *l; + int rc; + + assert(st->nolinkqsz == 0); + + while ((l = TAILQ_FIRST(&st->linkq)) != NULL) { + TAILQ_REMOVE(&st->linkq, l, entries); + if (!HBUF_PUTSL(out, "=> ")) + return 0; + if (l->n->type == LOWDOWN_LINK) + rc = hbuf_putb(out, &l->n->rndr_link.link); + else if (l->n->type == LOWDOWN_LINK_AUTO) + rc = hbuf_putb(out, &l->n->rndr_autolink.link); + else if (l->n->type == LOWDOWN_IMAGE) + rc = hbuf_putb(out, &l->n->rndr_image.link); + else + rc = 1; + if (!rc) + return 0; + if (!rndr_link_ref(st, out, l->id, 1)) + return 0; + st->last_blank = 1; + free(l); + } + + st->linkqsz = 0; + return 1; +} + +/* + * Get the column width of a multi-byte sequence. + * If the sequence is bad, return the number of raw bytes to print. + * Return <0 on failure (memory), >=0 otherwise. + */ +static ssize_t +rndr_mbswidth(struct gemini *st, const struct lowdown_buf *in) +{ + size_t wsz, csz; + const char *cp; + void *pp; + mbstate_t mbs; + + memset(&mbs, 0, sizeof(mbstate_t)); + cp = in->data; + wsz = mbsnrtowcs(NULL, &cp, in->size, 0, &mbs); + if (wsz == (size_t)-1) + return in->size; + + if (st->bufsz < wsz) { + st->bufsz = wsz; + pp = reallocarray(st->buf, wsz, sizeof(wchar_t)); + if (pp == NULL) + return -1; + st->buf = pp; + } + + memset(&mbs, 0, sizeof(mbstate_t)); + cp = in->data; + mbsnrtowcs(st->buf, &cp, in->size, wsz, &mbs); + csz = wcswidth(st->buf, wsz); + return csz == (size_t)-1 ? in->size : csz; +} + +/* + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_table(struct lowdown_buf *ob, struct gemini *st, + const struct lowdown_node *n) +{ + size_t *widths = NULL; + const struct lowdown_node *row, *top, *cell; + struct lowdown_buf *celltmp = NULL, + *rowtmp = NULL; + size_t i, j, sz; + ssize_t last_blank, ssz; + unsigned int flags, oflags; + int rc = 0; + + assert(n->type == LOWDOWN_TABLE_BLOCK); + + /* + * Temporarily make us not use in-line links. + * This is obviously because tables and inline links don't work + * well together. + */ + + oflags = st->flags; + if (st->flags & LOWDOWN_GEMINI_LINK_IN) + st->flags &= ~LOWDOWN_GEMINI_LINK_IN; + + widths = calloc(n->rndr_table.columns, sizeof(size_t)); + if (widths == NULL) + goto out; + + if ((rowtmp = hbuf_new(128)) == NULL || + (celltmp = hbuf_new(128)) == NULL) + goto out; + + /* + * Begin by counting the number of printable columns in each + * column in each row. Don't let us accumulate any links, as + * we're going to re-run this after. + */ + + st->nolinkqsz = st->linkqsz + 1; + TAILQ_FOREACH(top, &n->children, entries) { + assert(top->type == LOWDOWN_TABLE_HEADER || + top->type == LOWDOWN_TABLE_BODY); + TAILQ_FOREACH(row, &top->children, entries) + TAILQ_FOREACH(cell, &row->children, entries) { + i = cell->rndr_table_cell.col; + assert(i < n->rndr_table.columns); + hbuf_truncate(celltmp); + last_blank = st->last_blank; + st->last_blank = 0; + if (!rndr(celltmp, NULL, st, cell)) + goto out; + ssz = rndr_mbswidth(st, celltmp); + if (ssz < 0) + goto out; + if (widths[i] < (size_t)ssz) + widths[i] = (size_t)ssz; + st->last_blank = last_blank; + } + } + st->nolinkqsz = 0; + + /* Now actually print, row-by-row into the output. */ + + TAILQ_FOREACH(top, &n->children, entries) { + assert(top->type == LOWDOWN_TABLE_HEADER || + top->type == LOWDOWN_TABLE_BODY); + TAILQ_FOREACH(row, &top->children, entries) { + hbuf_truncate(rowtmp); + TAILQ_FOREACH(cell, &row->children, entries) { + i = cell->rndr_table_cell.col; + hbuf_truncate(celltmp); + last_blank = st->last_blank; + st->last_blank = 0; + if (!rndr(celltmp, NULL, st, cell)) + goto out; + ssz = rndr_mbswidth(st, celltmp); + if (ssz < 0) + goto out; + assert(widths[i] >= (size_t)ssz); + sz = widths[i] - (size_t)ssz; + + /* + * Alignment is either beginning, + * ending, or splitting the remaining + * spaces around the word. + * Be careful about uneven splitting in + * the case of centre. + */ + + flags = cell->rndr_table_cell.flags & + HTBL_FL_ALIGNMASK; + if (flags == HTBL_FL_ALIGN_RIGHT) + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (flags == HTBL_FL_ALIGN_CENTER) + for (j = 0; j < sz / 2; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (!hbuf_putb(rowtmp, celltmp)) + goto out; + if (flags == 0 || + flags == HTBL_FL_ALIGN_LEFT) + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (flags == HTBL_FL_ALIGN_CENTER) { + sz = (sz % 2) ? + (sz / 2) + 1 : (sz / 2); + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + } + + st->last_blank = last_blank; + if (TAILQ_NEXT(cell, entries) != NULL && + !HBUF_PUTSL(rowtmp, " | ")) + goto out; + } + + /* + * Some magic here. + * First, emulate rndr() by setting the + * stackpos to the table, which is required for + * checking the line start. + * Then directly print, as we've already escaped + * all characters, and have embedded escapes of + * our own. Then end the line. + */ + + if (!hbuf_putb(ob, rowtmp)) + goto out; + st->last_blank = 0; + if (!rndr_buf_vspace(st, ob, 1)) + goto out; + } + + if (top->type == LOWDOWN_TABLE_HEADER) { + for (i = 0; i < n->rndr_table.columns; i++) { + for (j = 0; j <= widths[i]; j++) + if (!HBUF_PUTSL(ob, "-")) + goto out; + if (i < n->rndr_table.columns - 1 && + !HBUF_PUTSL(ob, "|-")) + goto out; + } + st->last_blank = 0; + if (!rndr_buf_vspace(st, ob, 1)) + goto out; + } + } + + rc = 1; +out: + hbuf_free(celltmp); + hbuf_free(rowtmp); + free(widths); + st->flags = oflags; + return rc; +} + +/* + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr(struct lowdown_buf *ob, struct lowdown_metaq *mq, + struct gemini *st, const struct lowdown_node *n) +{ + const struct lowdown_node *child, *prev; + struct link *l; + void *pp; + struct lowdown_buf *tmpbuf; + size_t i; + ssize_t level; + int32_t entity; + int rc; + + prev = n->parent == NULL ? NULL : + TAILQ_PREV(n, lowdown_nodeq, entries); + + /* Vertical space before content. */ + + switch (n->type) { + case LOWDOWN_ROOT: + st->last_blank = -1; + break; + case LOWDOWN_BLOCKCODE: + case LOWDOWN_BLOCKHTML: + case LOWDOWN_BLOCKQUOTE: + case LOWDOWN_DEFINITION: + case LOWDOWN_HEADER: + case LOWDOWN_LIST: + case LOWDOWN_PARAGRAPH: + case LOWDOWN_TABLE_BLOCK: + /* + * Blocks in a definition list get special treatment + * because we only put one newline between the title and + * the data regardless of its contents. + */ + + if (n->parent != NULL && + n->parent->type == LOWDOWN_LISTITEM && + n->parent->parent != NULL && + n->parent->parent->type == + LOWDOWN_DEFINITION_DATA && + prev == NULL) { + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + } else { + if (!rndr_buf_vspace(st, ob, 2)) + return 0; + } + break; + case LOWDOWN_MATH_BLOCK: + if (n->rndr_math.blockmode && + !rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_DEFINITION_DATA: + /* + * Vertical space if previous block-mode data. + */ + + if (n->parent != NULL && + n->parent->type == LOWDOWN_DEFINITION && + (n->parent->rndr_definition.flags & + HLIST_FL_BLOCK) && + prev != NULL && + prev->type == LOWDOWN_DEFINITION_DATA) { + if (!rndr_buf_vspace(st, ob, 2)) + return 0; + } else { + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + } + break; + case LOWDOWN_DEFINITION_TITLE: + case LOWDOWN_HRULE: + case LOWDOWN_LINEBREAK: + case LOWDOWN_LISTITEM: + case LOWDOWN_META: + case LOWDOWN_TABLE_ROW: + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_IMAGE: + case LOWDOWN_LINK: + case LOWDOWN_LINK_AUTO: + if ((st->flags & LOWDOWN_GEMINI_LINK_IN) && + !rndr_buf_vspace(st, ob, 1)) + return 0; + break; + default: + break; + } + + /* Output leading content. */ + + hbuf_truncate(st->tmp); + + switch (n->type) { + case LOWDOWN_TABLE_BLOCK: + case LOWDOWN_BLOCKCODE: + case LOWDOWN_BLOCKHTML: + if (!HBUF_PUTSL(st->tmp, "```")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_BLOCKQUOTE: + if (!HBUF_PUTSL(st->tmp, "> ")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + st->last_blank = -1; + break; + case LOWDOWN_HEADER: + level = (ssize_t)n->rndr_header.level + + st->headers_offs; + if (level < 1) + level = 1; + for (i = 0; i < (size_t)level; i++) + if (!HBUF_PUTSL(st->tmp, "#")) + return 0; + if (!HBUF_PUTSL(st->tmp, " ")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + st->last_blank = -1; + break; + case LOWDOWN_IMAGE: + case LOWDOWN_LINK: + case LOWDOWN_LINK_AUTO: + if (!(IS_STANDALONE_LINK(n, prev) || + (st->flags & LOWDOWN_GEMINI_LINK_IN))) + break; + if (!HBUF_PUTSL(st->tmp, "=> ")) + return 0; + rc = 1; + if (n->type == LOWDOWN_LINK_AUTO) + rc = hbuf_putb(st->tmp, &n->rndr_autolink.link); + else if (n->type == LOWDOWN_LINK) + rc = hbuf_putb(st->tmp, &n->rndr_link.link); + else if (n->type == LOWDOWN_IMAGE) + rc = hbuf_putb(st->tmp, &n->rndr_image.link); + if (!rc) + return 0; + if (!HBUF_PUTSL(st->tmp, " ")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + st->last_blank = -1; + break; + case LOWDOWN_LISTITEM: + rc = 1; + if (n->rndr_listitem.flags & HLIST_FL_DEF) + rc = HBUF_PUTSL(st->tmp, ": "); + else if (n->rndr_listitem.flags & HLIST_FL_CHECKED) + rc = HBUF_PUTSL(st->tmp, "☑ "); + else if (n->rndr_listitem.flags & HLIST_FL_UNCHECKED) + rc = HBUF_PUTSL(st->tmp, "☐ "); + else if (n->rndr_listitem.flags & HLIST_FL_UNORDERED) + rc = HBUF_PUTSL(st->tmp, "* "); + else + rc = hbuf_printf(st->tmp, "%zu. ", + n->rndr_listitem.num); + if (!rc) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + st->last_blank = -1; + break; + case LOWDOWN_SUPERSCRIPT: + if (!HBUF_PUTSL(st->tmp, "^")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + break; + default: + break; + } + + /* Descend into children. */ + + switch (n->type) { + case LOWDOWN_TABLE_BLOCK: + if (!rndr_table(ob, st, n)) + return 0; + break; + case LOWDOWN_META: + if (n->chng != LOWDOWN_CHNG_DELETE && + !rndr_meta(st, n, mq)) + return 0; + break; + case LOWDOWN_FOOTNOTE: + if ((tmpbuf = hbuf_new(32)) == NULL) + return 0; + if (!hbuf_printf(tmpbuf, "[%zu] ", st->footsz + 1)) + return 0; + st->last_blank = -1; + st->nolinkflush = 1; + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(tmpbuf, mq, st, child)) + return 0; + st->nolinkflush = 0; + pp = reallocarray(st->foots, + st->footsz + 1, + sizeof(struct lowdown_buf *)); + if (pp == NULL) + return 0; + st->foots = pp; + st->foots[st->footsz++] = tmpbuf; + break; + default: + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(ob, mq, st, child)) + return 0; + break; + } + + /* Output non-child or trailing content. */ + + hbuf_truncate(st->tmp); + + switch (n->type) { + case LOWDOWN_HRULE: + if (!HBUF_PUTSL(st->tmp, "~~~~~~~~")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + break; + case LOWDOWN_FOOTNOTE: + if (!hbuf_printf(st->tmp, "[%zu]", st->footsz)) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + break; + case LOWDOWN_RAW_HTML: + if (!rndr_buf(st, ob, n, &n->rndr_raw_html.text)) + return 0; + break; + case LOWDOWN_MATH_BLOCK: + if (!rndr_buf(st, ob, n, &n->rndr_math.text)) + return 0; + break; + case LOWDOWN_ENTITY: + entity = entity_find_iso(&n->rndr_entity.text); + if (entity > 0) { + if (!rndr_entity(st->tmp, entity)) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + } else { + if (!rndr_buf(st, ob, n, &n->rndr_entity.text)) + return 0; + } + break; + case LOWDOWN_BLOCKCODE: + if (!rndr_buf(st, ob, n, &n->rndr_blockcode.text)) + return 0; + break; + case LOWDOWN_BLOCKHTML: + if (!rndr_buf(st, ob, n, &n->rndr_blockhtml.text)) + return 0; + break; + case LOWDOWN_CODESPAN: + if (!rndr_buf(st, ob, n, &n->rndr_codespan.text)) + return 0; + break; + case LOWDOWN_IMAGE: + if (!rndr_buf(st, ob, n, &n->rndr_image.alt)) + return 0; + /* FALLTHROUGH */ + case LOWDOWN_LINK: + case LOWDOWN_LINK_AUTO: + if (IS_STANDALONE_LINK(n, prev) || + (st->flags & LOWDOWN_GEMINI_LINK_IN)) + break; + if (st->nolinkqsz == 0) { + if ((l = calloc(1, sizeof(struct link))) == NULL) + return 0; + l->n = n; + l->id = ++st->linkqsz; + TAILQ_INSERT_TAIL(&st->linkq, l, entries); + i = l->id; + } else + i = st->nolinkqsz++; + if (!rndr_link_ref(st, st->tmp, i, 0)) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + break; + case LOWDOWN_NORMAL_TEXT: + if (!rndr_buf(st, ob, n, &n->rndr_normal_text.text)) + return 0; + break; + case LOWDOWN_ROOT: + if (!TAILQ_EMPTY(&st->linkq) && + (st->flags & LOWDOWN_GEMINI_LINK_END)) { + if (!rndr_buf_vspace(st, ob, 2)) + return 0; + if (!rndr_flush_linkq(st, ob)) + return 0; + } + if (st->footsz == 0) + break; + if (!HBUF_PUTSL(ob, "~~~~~~~~\n\n")) + return 0; + for (i = 0; i < st->footsz; i++) { + if (!hbuf_putb(ob, st->foots[i])) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + break; + case LOWDOWN_DOC_HEADER: + if (!rndr_doc_header(st, ob, mq)) + return 0; + break; + default: + break; + } + + /* Trailing block spaces. */ + + hbuf_truncate(st->tmp); + + switch (n->type) { + case LOWDOWN_TABLE_BLOCK: + case LOWDOWN_BLOCKCODE: + case LOWDOWN_BLOCKHTML: + if (!HBUF_PUTSL(st->tmp, "```")) + return 0; + if (!rndr_buf(st, ob, n, st->tmp)) + return 0; + st->last_blank = 0; + if (!rndr_buf_vspace(st, ob, 2)) + return 0; + break; + case LOWDOWN_DOC_HEADER: + if ((st->flags & LOWDOWN_STANDALONE) && + !rndr_buf_vspace(st, ob, 2)) + return 0; + break; + case LOWDOWN_BLOCKQUOTE: + case LOWDOWN_DEFINITION: + case LOWDOWN_HEADER: + case LOWDOWN_LIST: + case LOWDOWN_PARAGRAPH: + if (!rndr_buf_vspace(st, ob, 2)) + return 0; + break; + case LOWDOWN_MATH_BLOCK: + if (n->rndr_math.blockmode && + !rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_DEFINITION_DATA: + case LOWDOWN_DEFINITION_TITLE: + case LOWDOWN_HRULE: + case LOWDOWN_LISTITEM: + case LOWDOWN_META: + case LOWDOWN_TABLE_ROW: + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_IMAGE: + case LOWDOWN_LINK: + case LOWDOWN_LINK_AUTO: + if (IS_STANDALONE_LINK(n, prev) || + (st->flags & LOWDOWN_GEMINI_LINK_IN)) + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + break; + case LOWDOWN_ROOT: + /* + * Special case: snip any trailing newlines that may + * have been printed as trailing vertical space. + * This tidies up the output. + */ + + if (!rndr_buf_vspace(st, ob, 1)) + return 0; + while (ob->size && ob->data[ob->size - 1] == '\n') + ob->size--; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + default: + break; + } + + if (!st->nolinkflush && + st->nolinkqsz == 0 && st->last_blank > 1 && + !TAILQ_EMPTY(&st->linkq) && + !(st->flags & LOWDOWN_GEMINI_LINK_END)) { + if (!rndr_flush_linkq(st, ob)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + st->last_blank = 2; + } + + return 1; +} + +int +lowdown_gemini_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct gemini *st = arg; + int rc; + size_t i; + struct lowdown_metaq metaq; + + TAILQ_INIT(&metaq); + st->last_blank = 0; + st->headers_offs = 1; + + rc = rndr(ob, &metaq, st, n); + + link_freeq(&st->linkq); + st->linkqsz = 0; + st->nolinkqsz = 0; + + for (i = 0; i < st->footsz; i++) + hbuf_free(st->foots[i]); + + free(st->foots); + st->footsz = 0; + st->foots = NULL; + lowdown_metaq_free(&metaq); + return rc; +} + +void * +lowdown_gemini_new(const struct lowdown_opts *opts) +{ + struct gemini *p; + + if ((p = calloc(1, sizeof(struct gemini))) == NULL) + return NULL; + + TAILQ_INIT(&p->linkq); + p->flags = opts != NULL ? opts->oflags : 0; + + /* Only use one kind of flag output. */ + + if ((p->flags & LOWDOWN_GEMINI_LINK_IN) && + (p->flags & LOWDOWN_GEMINI_LINK_END)) + p->flags &= ~LOWDOWN_GEMINI_LINK_IN; + + if ((p->tmp = hbuf_new(32)) == NULL) { + free(p); + return NULL; + } + + return p; +} + +void +lowdown_gemini_free(void *arg) +{ + struct gemini *p = arg; + + if (p == NULL) + return; + + hbuf_free(p->tmp); + free(p->buf); + free(p); +} diff --git a/gemini.o b/gemini.o Binary files differ. diff --git a/html.c b/html.c @@ -0,0 +1,1379 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * Our internal state object. + */ +struct html { + struct hentryq headers_used; /* headers we've seen */ + ssize_t headers_offs; /* header offset */ + unsigned int flags; /* "oflags" in lowdown_opts */ + int noescape; /* don't escape text */ + struct lowdown_buf **foots; /* footnotes */ + size_t footsz; /* footnotes size */ +}; + +/* + * Escape regular text that shouldn't be HTML. + * Return zero on failure, non-zero on success. + */ +static int +escape_html(struct lowdown_buf *ob, const char *source, + size_t length, const struct html *st) +{ + + assert(st->noescape == 0); + return hesc_html(ob, source, length, + st->flags & LOWDOWN_HTML_OWASP, 0, + st->flags & LOWDOWN_HTML_NUM_ENT); +} + +/* + * See escape_html(). + */ +static int +escape_htmlb(struct lowdown_buf *ob, + const struct lowdown_buf *in, const struct html *st) +{ + + return st->noescape ? + hbuf_putb(ob, in) : + escape_html(ob, in->data, in->size, st); +} + +/* + * Escape literal text. + * Like escape_html() except more restrictive. + * Return zero on failure, non-zero on success. + */ +static int +escape_literal(struct lowdown_buf *ob, + const struct lowdown_buf *in, const struct html *st) +{ + + assert(st->noescape == 0); + return hesc_html(ob, in->data, in->size, + st->flags & LOWDOWN_HTML_OWASP, 1, + st->flags & LOWDOWN_HTML_NUM_ENT); +} + +/* + * Escape an href link. + * Return zero on failure, non-zero on success. + */ +static int +escape_href(struct lowdown_buf *ob, const struct lowdown_buf *in, + const struct html *st) +{ + + assert(st->noescape == 0); + return hesc_href(ob, in->data, in->size); +} + +/* + * Escape an HTML attribute. + * Return zero on failure, non-zero on success. + */ +static int +escape_attr(struct lowdown_buf *ob, const struct lowdown_buf *in) +{ + + return hesc_attr(ob, in->data, in->size); +} + +static int +rndr_autolink(struct lowdown_buf *ob, + const struct rndr_autolink *parm, + const struct html *st) +{ + + if (parm->link.size == 0) + return 1; + + if (!HBUF_PUTSL(ob, "<a href=\"")) + return 0; + if (parm->type == HALINK_EMAIL && !HBUF_PUTSL(ob, "mailto:")) + return 0; + if (!escape_href(ob, &parm->link, st)) + return 0; + if (!HBUF_PUTSL(ob, "\">")) + return 0; + + /* + * Pretty printing: if we get an email address as + * an actual URI, e.g. `mailto:foo@bar.com`, we don't + * want to print the `mailto:` prefix + */ + + if (hbuf_strprefix(&parm->link, "mailto:")) { + if (!escape_html(ob, + parm->link.data + 7, + parm->link.size - 7, st)) + return 0; + } else { + if (!escape_htmlb(ob, &parm->link, st)) + return 0; + } + + return HBUF_PUTSL(ob, "</a>"); +} + +static int +rndr_blockcode(struct lowdown_buf *ob, + const struct rndr_blockcode *parm, + const struct html *st) +{ + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + + if (parm->lang.size) { + if (!HBUF_PUTSL(ob, "<pre><code class=\"language-")) + return 0; + if (!escape_href(ob, &parm->lang, st)) + return 0; + if (!HBUF_PUTSL(ob, "\">")) + return 0; + } else { + if (! HBUF_PUTSL(ob, "<pre><code>")) + return 0; + } + + if (!escape_literal(ob, &parm->text, st)) + return 0; + return HBUF_PUTSL(ob, "</code></pre>\n"); +} + +static int +rndr_definition_data(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<dd>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "\n</dd>\n"); +} + +static int +rndr_definition_title(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + size_t sz; + + if (!HBUF_PUTSL(ob, "<dt>")) + return 0; + if ((sz = content->size) > 0) { + while (sz && content->data[sz - 1] == '\n') + sz--; + if (!hbuf_put(ob, content->data, sz)) + return 0; + } + return HBUF_PUTSL(ob, "</dt>\n"); +} + +static int +rndr_definition(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<dl>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</dl>\n"); +} + +static int +rndr_blockquote(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<blockquote>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</blockquote>\n"); +} + +static int +rndr_codespan(struct lowdown_buf *ob, + const struct rndr_codespan *param, + const struct html *st) +{ + + if (!HBUF_PUTSL(ob, "<code>")) + return 0; + if (!escape_htmlb(ob, ¶m->text, st)) + return 0; + return HBUF_PUTSL(ob, "</code>"); +} + +static int +rndr_strikethrough(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<del>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</del>"); +} + +static int +rndr_double_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<strong>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</strong>"); +} + +static int +rndr_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<em>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</em>"); +} + +static int +rndr_highlight(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<mark>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</mark>"); +} + +static int +rndr_linebreak(struct lowdown_buf *ob) +{ + + return HBUF_PUTSL(ob, "<br/>\n"); +} + +static int +rndr_header(struct lowdown_buf *ob, const struct lowdown_buf *content, + const struct lowdown_node *n, struct html *st) +{ + ssize_t level; + const struct lowdown_buf *buf; + + /* + * The <hN> level take into account shifteheadinglevelby + * metadata, so offset it here. Bound us below <h6>. + */ + + level = (ssize_t)n->rndr_header.level + st->headers_offs; + if (level < 1) + level = 1; + else if (level > 6) + level = 6; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!hbuf_printf(ob, "<h%zu", level)) + return 0; + + /* + * Identifiers can either come from header attributes or as + * computed from the content of the header. + */ + + if (n->rndr_header.attr_id.size) { + if (!HBUF_PUTSL(ob, " id=\"")) + return 0; + if (!escape_href(ob, &n->rndr_header.attr_id, st)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } else if (st->flags & LOWDOWN_HTML_HEAD_IDS) { + if (!HBUF_PUTSL(ob, " id=\"")) + return 0; + buf = hbuf_id(NULL, n, &st->headers_used); + if (buf == NULL) + return 0; + if (!hbuf_putb(ob, buf)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } + + /* Optional header class. */ + + if (n->rndr_header.attr_cls.size) { + if (!HBUF_PUTSL(ob, " class=\"")) + return 0; + if (!escape_attr(ob, &n->rndr_header.attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } + + if (!HBUF_PUTSL(ob, ">")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return hbuf_printf(ob, "</h%zu>\n", level); +} + +static int +rndr_link(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_link *param, + const struct html *st) +{ + + if (!HBUF_PUTSL(ob, "<a href=\"") || + !escape_href(ob, ¶m->link, st)) + return 0; + + if (param->title.size) + if (!HBUF_PUTSL(ob, "\" title=\"") || + !escape_attr(ob, ¶m->title)) + return 0; + if (param->attr_cls.size) + if (!HBUF_PUTSL(ob, "\" class=\"") || + !escape_attr(ob, ¶m->attr_cls)) + return 0; + if (param->attr_id.size) + if (!HBUF_PUTSL(ob, "\" id=\"") || + !escape_attr(ob, ¶m->attr_id)) + return 0; + + if (!HBUF_PUTSL(ob, "\">") || + !hbuf_putb(ob, content) || + !HBUF_PUTSL(ob, "</a>")) + return 0; + + return 1; +} + +static int +rndr_list(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_list *param) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (param->flags & HLIST_FL_ORDERED) { + if (param->start > 1) { + if (!hbuf_printf(ob, + "<ol start=\"%zu\">\n", param->start)) + return 0; + } else { + if (!HBUF_PUTSL(ob, "<ol>\n")) + return 0; + } + } else if (!HBUF_PUTSL(ob, "<ul>\n")) + return 0; + + if (!hbuf_putb(ob, content)) + return 0; + + return (param->flags & HLIST_FL_ORDERED) ? + HBUF_PUTSL(ob, "</ol>\n") : + HBUF_PUTSL(ob, "</ul>\n"); +} + +static int +rndr_listitem(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct lowdown_node *n) +{ + size_t size; + int blk = 0; + + /* + * If we're in block mode (which can be assigned post factum in + * the parser), make sure that we have an extra <p> around + * non-block content. + */ + + if (((n->rndr_listitem.flags & HLIST_FL_DEF) && + n->parent != NULL && + n->parent->parent != NULL && + n->parent->parent->type == LOWDOWN_DEFINITION && + (n->parent->parent->rndr_definition.flags & + HLIST_FL_BLOCK)) || + (!(n->rndr_listitem.flags & HLIST_FL_DEF) && + n->parent != NULL && + n->parent->type == LOWDOWN_LIST && + (n->parent->rndr_list.flags & HLIST_FL_BLOCK))) { + if (!(hbuf_strprefix(content, "<ul") || + hbuf_strprefix(content, "<ol") || + hbuf_strprefix(content, "<dl") || + hbuf_strprefix(content, "<div") || + hbuf_strprefix(content, "<table") || + hbuf_strprefix(content, "<blockquote") || + hbuf_strprefix(content, "<pre>") || + hbuf_strprefix(content, "<h") || + hbuf_strprefix(content, "<p>"))) + blk = 1; + } + + /* Only emit <li> if we're not a <dl> list. */ + + if (!(n->rndr_listitem.flags & HLIST_FL_DEF) && + !HBUF_PUTSL(ob, "<li>")) + return 0; + + if (blk && !HBUF_PUTSL(ob, "<p>")) + return 0; + + if (n->rndr_listitem.flags & + (HLIST_FL_CHECKED|HLIST_FL_UNCHECKED)) + HBUF_PUTSL(ob, "<input type=\"checkbox\" "); + if (n->rndr_listitem.flags & HLIST_FL_CHECKED) + HBUF_PUTSL(ob, "checked=\"checked\" "); + if (n->rndr_listitem.flags & + (HLIST_FL_CHECKED|HLIST_FL_UNCHECKED)) + HBUF_PUTSL(ob, "/>"); + + /* Cut off any trailing space. */ + + if ((size = content->size) > 0) { + while (size && content->data[size - 1] == '\n') + size--; + if (!hbuf_put(ob, content->data, size)) + return 0; + } + + if (blk && !HBUF_PUTSL(ob, "</p>")) + return 0; + if (!(n->rndr_listitem.flags & HLIST_FL_DEF) && + !HBUF_PUTSL(ob, "</li>\n")) + return 0; + + return 1; +} + +static int +rndr_paragraph(struct lowdown_buf *ob, + const struct lowdown_buf *content, + struct html *st) +{ + size_t i = 0, org; + + if (content->size == 0) + return 1; + while (i < content->size && + isspace((unsigned char)content->data[i])) + i++; + if (i == content->size) + return 1; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<p>")) + return 0; + + if (st->flags & LOWDOWN_HTML_HARD_WRAP) { + for ( ; i < content->size; i++) { + org = i; + while (i < content->size && + content->data[i] != '\n') + i++; + + if (i > org && !hbuf_put + (ob, content->data + org, i - org)) + return 0; + + /* + * Do not insert a line break if this newline is + * the last character on the paragraph. + */ + + if (i >= content->size - 1) + break; + if (!rndr_linebreak(ob)) + return 0; + } + } else { + if (!hbuf_put(ob, + content->data + i, content->size - i)) + return 0; + } + + return HBUF_PUTSL(ob, "</p>\n"); +} + +static int +rndr_raw_block(struct lowdown_buf *ob, + const struct rndr_blockhtml *param, + const struct html *st) +{ + size_t org, sz; + + if ((st->flags & LOWDOWN_HTML_SKIP_HTML)) + return 1; + if ((st->flags & LOWDOWN_HTML_ESCAPE)) + return escape_htmlb(ob, ¶m->text, st); + + /* + * FIXME: Do we *really* need to trim the HTML? How does that + * make a difference? + */ + + sz = param->text.size; + while (sz > 0 && param->text.data[sz - 1] == '\n') + sz--; + + org = 0; + while (org < sz && param->text.data[org] == '\n') + org++; + + if (org >= sz) + return 1; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + + if (!hbuf_put(ob, param->text.data + org, sz - org)) + return 0; + return hbuf_putc(ob, '\n'); +} + +static int +rndr_triple_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<strong><em>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</em></strong>"); +} + +static int +rndr_hrule(struct lowdown_buf *ob) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + return hbuf_puts(ob, "<hr/>\n"); +} + +static int +rndr_image(struct lowdown_buf *ob, + const struct rndr_image *param, + const struct html *st) +{ + char dimbuf[32]; + unsigned int x, y; + int rc = 0; + + /* + * Scan in our dimensions, if applicable. + * It's unreasonable for them to be over 32 characters, so use + * that as a cap to the size. + */ + + if (param->dims.size && + param->dims.size < sizeof(dimbuf) - 1) { + memset(dimbuf, 0, sizeof(dimbuf)); + memcpy(dimbuf, param->dims.data, param->dims.size); + rc = sscanf(dimbuf, "%ux%u", &x, &y); + } + + /* Require an "alt", even if blank. */ + + if (!HBUF_PUTSL(ob, "<img src=\"") || + !escape_href(ob, ¶m->link, st) || + !HBUF_PUTSL(ob, "\" alt=\"") || + !escape_attr(ob, ¶m->alt) || + !HBUF_PUTSL(ob, "\"")) + return 0; + + if (param->attr_cls.size) + if (!HBUF_PUTSL(ob, " class=\"") || + !escape_attr(ob, ¶m->attr_cls) || + !HBUF_PUTSL(ob, "\"")) + return 0; + if (param->attr_id.size) + if (!HBUF_PUTSL(ob, " id=\"") || + !escape_attr(ob, ¶m->attr_id) || + !HBUF_PUTSL(ob, "\"")) + return 0; + + if (param->attr_width.size || param->attr_height.size) { + if (!HBUF_PUTSL(ob, " style=\"")) + return 0; + if (param->attr_width.size) + if (!HBUF_PUTSL(ob, "width:") || + !escape_attr(ob, ¶m->attr_width) || + !HBUF_PUTSL(ob, ";")) + return 0; + if (param->attr_height.size) + if (!HBUF_PUTSL(ob, "height:") || + !escape_attr(ob, ¶m->attr_height) || + !HBUF_PUTSL(ob, ";")) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } else if (param->dims.size && rc > 0) { + if (!hbuf_printf(ob, " width=\"%u\"", x)) + return 0; + if (rc > 1 && !hbuf_printf(ob, " height=\"%u\"", y)) + return 0; + } + + if (param->title.size) + if (!HBUF_PUTSL(ob, " title=\"") || + !escape_htmlb(ob, ¶m->title, st) || + !HBUF_PUTSL(ob, "\"")) + return 0; + + return hbuf_puts(ob, " />"); +} + +static int +rndr_raw_html(struct lowdown_buf *ob, + const struct rndr_raw_html *param, + const struct html *st) +{ + + if (st->flags & LOWDOWN_HTML_SKIP_HTML) + return 1; + + return (st->flags & LOWDOWN_HTML_ESCAPE) ? + escape_htmlb(ob, ¶m->text, st) : + hbuf_putb(ob, ¶m->text); +} + +static int +rndr_table(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<table>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</table>\n"); +} + +static int +rndr_table_header(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<thead>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</thead>\n"); +} + +static int +rndr_table_body(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<tbody>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</tbody>\n"); +} + +static int +rndr_tablerow(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<tr>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</tr>\n"); +} + +static int +rndr_tablecell(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_table_cell *param) +{ + + if (param->flags & HTBL_FL_HEADER) { + if (!HBUF_PUTSL(ob, "<th")) + return 0; + } else { + if (!HBUF_PUTSL(ob, "<td")) + return 0; + } + + switch (param->flags & HTBL_FL_ALIGNMASK) { + case HTBL_FL_ALIGN_CENTER: + if (!HBUF_PUTSL(ob, " style=\"text-align: center\">")) + return 0; + break; + case HTBL_FL_ALIGN_LEFT: + if (!HBUF_PUTSL(ob, " style=\"text-align: left\">")) + return 0; + break; + case HTBL_FL_ALIGN_RIGHT: + if (!HBUF_PUTSL(ob, " style=\"text-align: right\">")) + return 0; + break; + default: + if (!HBUF_PUTSL(ob, ">")) + return 0; + break; + } + + if (!hbuf_putb(ob, content)) + return 0; + + return (param->flags & HTBL_FL_HEADER) ? + HBUF_PUTSL(ob, "</th>\n") : + HBUF_PUTSL(ob, "</td>\n"); +} + +static int +rndr_superscript(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<sup>")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</sup>"); +} + +static int +rndr_normal_text(struct lowdown_buf *ob, + const struct rndr_normal_text *param, + const struct html *st) +{ + + return escape_htmlb(ob, ¶m->text, st); +} + +static int +rndr_footnote_def(struct lowdown_buf *ob, + const struct lowdown_buf *content, size_t num) +{ + size_t i = 0; + int pfound = 0; + + /* Insert anchor at the end of first paragraph block. */ + + while ((i + 3) < content->size) { + if (content->data[i++] != '<') + continue; + if (content->data[i++] != '/') + continue; + if (content->data[i++] != 'p' && + content->data[i] != 'P') + continue; + if (content->data[i] != '>') + continue; + i -= 3; + pfound = 1; + break; + } + + if (!hbuf_printf(ob, "\n<li id=\"fn%zu\">\n", num)) + return 0; + + if (pfound) { + if (!hbuf_put(ob, content->data, i)) + return 0; + if (!hbuf_printf(ob, " " + "<a href=\"#fnref%zu\" rev=\"footnote\">" + "↩" + "</a>", num)) + return 0; + if (!hbuf_put(ob, + content->data + i, content->size - i)) + return 0; + } else { + if (!hbuf_putb(ob, content)) + return 0; + } + + return HBUF_PUTSL(ob, "</li>\n"); +} + +static int +rndr_footnote_ref(struct lowdown_buf *ob, + const struct lowdown_buf *content, struct html *st) +{ + void *pp; + size_t num = st->footsz + 1; + + /* + * Keep a reference to this footnote definition, as we're going + * to print it out at the end of the document. For now, + * suppress printing of the content. + */ + + pp = recallocarray(st->foots, st->footsz, + st->footsz + 1, sizeof(struct lowdown_buf *)); + if (pp == NULL) + return 0; + st->foots = pp; + if ((st->foots[st->footsz++] = hbuf_dup(content)) == NULL) + return 0; + + return hbuf_printf(ob, + "<sup id=\"fnref%zu\">" + "<a href=\"#fn%zu\" rel=\"footnote\">" + "%zu</a></sup>", num, num, num); +} + +static int +rndr_math(struct lowdown_buf *ob, + const struct rndr_math *param, + const struct html *st) +{ + + if (param->blockmode && !HBUF_PUTSL(ob, "\\[")) + return 0; + else if (!param->blockmode && !HBUF_PUTSL(ob, "\\(")) + return 0; + + if (!escape_htmlb(ob, ¶m->text, st)) + return 0; + + return param->blockmode ? + HBUF_PUTSL(ob, "\\]") : + HBUF_PUTSL(ob, "\\)"); +} + +static int +rndr_doc_footer(struct lowdown_buf *ob, const struct html *st) +{ + size_t i; + + /* + * Start by flushing out our footnotes. Footnotes are "sparse" + * in that we may not have them all defined (?). + */ + + if (st->footsz > 0) { + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, + "<div class=\"footnotes\">\n<hr/>\n<ol>\n")) + return 0; + for (i = 0; i < st->footsz; i++) + if (!rndr_footnote_def(ob, st->foots[i], i + 1)) + return 0; + if (!HBUF_PUTSL(ob, "\n</ol>\n</div>\n")) + return 0; + } + + return (st->flags & LOWDOWN_STANDALONE) ? + HBUF_PUTSL(ob, "</body>\n") : 1; +} + +static int +rndr_root(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct html *st) +{ + + if ((st->flags & LOWDOWN_STANDALONE) && + !HBUF_PUTSL(ob, "<!DOCTYPE html>\n<html>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + if (!rndr_doc_footer(ob, st)) + return 0; + if (st->flags & LOWDOWN_STANDALONE) + return HBUF_PUTSL(ob, "</html>\n"); + return 1; +} + +/* + * Split "val" into multiple strings delimited by two or more whitespace + * characters, padding the output with "starttag" and "endtag". + * Return zero on failure, non-zero on success. + */ +static int +rndr_meta_multi(struct lowdown_buf *ob, const char *b, int href, + const char *starttag, const char *endtag) +{ + const char *start; + size_t sz, i, bsz; + + if (b == NULL) + return 1; + + bsz = strlen(b); + + for (i = 0; i < bsz; i++) { + while (i < bsz && + isspace((unsigned char)b[i])) + i++; + if (i == bsz) + continue; + start = &b[i]; + + for (; i < bsz; i++) + if (i < bsz - 1 && + isspace((unsigned char)b[i]) && + isspace((unsigned char)b[i + 1])) + break; + + if ((sz = &b[i] - start) == 0) + continue; + + if (!hbuf_puts(ob, starttag)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + if (!href && !hesc_attr(ob, start, sz)) + return 0; + else if (href && !hesc_href(ob, start, sz)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + if (!hbuf_puts(ob, endtag)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + + return 1; +} + +/* + * Allocate a meta-data value on the queue "mq". + * Return zero on failure, non-zero on success. + */ +static int +rndr_meta(struct lowdown_buf *ob, + const struct lowdown_buf *content, + struct lowdown_metaq *mq, + const struct lowdown_node *n, struct html *st) +{ + struct lowdown_meta *m; + ssize_t val; + const char *ep; + + m = calloc(1, sizeof(struct lowdown_meta)); + if (m == NULL) + return 0; + TAILQ_INSERT_TAIL(mq, m, entries); + + m->key = strndup(n->rndr_meta.key.data, + n->rndr_meta.key.size); + if (m->key == NULL) + return 0; + m->value = strndup(content->data, content->size); + if (m->value == NULL) + return 0; + + if (strcmp(m->key, "shiftheadinglevelby") == 0) { + val = (ssize_t)strtonum + (m->value, -100, 100, &ep); + if (ep == NULL) + st->headers_offs = val + 1; + } else if (strcmp(m->key, "baseheaderlevel") == 0) { + val = (ssize_t)strtonum + (m->value, 1, 100, &ep); + if (ep == NULL) + st->headers_offs = val; + } + + return 1; +} + +static int +rndr_doc_header(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct lowdown_metaq *mq, + const struct html *st) +{ + const struct lowdown_meta *m; + const char *author = NULL, *title = NULL, + *affil = NULL, *date = NULL, + *copy = NULL, *rcsauthor = NULL, + *rcsdate = NULL, *css = NULL, + *script = NULL; + + if (!(st->flags & LOWDOWN_STANDALONE)) + return 1; + + TAILQ_FOREACH(m, mq, entries) + if (strcasecmp(m->key, "author") == 0) + author = m->value; + else if (strcasecmp(m->key, "copyright") == 0) + copy = m->value; + else if (strcasecmp(m->key, "affiliation") == 0) + affil = m->value; + else if (strcasecmp(m->key, "date") == 0) + date = m->value; + else if (strcasecmp(m->key, "rcsauthor") == 0) + rcsauthor = rcsauthor2str(m->value); + else if (strcasecmp(m->key, "rcsdate") == 0) + rcsdate = rcsdate2str(m->value); + else if (strcasecmp(m->key, "title") == 0) + title = m->value; + else if (strcasecmp(m->key, "css") == 0) + css = m->value; + else if (strcasecmp(m->key, "javascript") == 0) + script = m->value; + + if (!hbuf_putb(ob, content)) + return 0; + + if (!HBUF_PUTSL(ob, + "<head>\n" + "<meta charset=\"utf-8\" />\n" + "<meta name=\"viewport\"" + " content=\"width=device-width,initial-scale=1\" />\n")) + return 0; + + /* Overrides. */ + + if (title == NULL) + title = "Untitled article"; + if (rcsdate != NULL) + date = rcsdate; + if (rcsauthor != NULL) + author = rcsauthor; + + if (!rndr_meta_multi(ob, affil, 0, + "<meta name=\"creator\" content=", " />")) + return 0; + + if (!rndr_meta_multi(ob, author, 0, + "<meta name=\"author\" content=", " />")) + return 0; + + if (!rndr_meta_multi(ob, copy, 0, + "<meta name=\"copyright\" content=", " />")) + return 0; + + /* + * FIXME: don't use "scheme" if the date isn't in the + * appropriate format, or modify it depending upon the position + * of the year? + */ + + if (date != NULL) { + if (!hbuf_printf(ob, "<meta name=" + "\"date\" scheme=\"YYYY-MM-DD\" content=\"")) + return 0; + if (!hesc_attr(ob, date, strlen(date))) + return 0; + if (!HBUF_PUTSL(ob, "\" />\n")) + return 0; + } + + if (!rndr_meta_multi(ob, css, 1, + "<link rel=\"stylesheet\" href=", " />")) + return 0; + + if (!rndr_meta_multi(ob, script, 1, + "<script src=", "></script>")) + return 0; + + if (!HBUF_PUTSL(ob, "<title>")) + return 0; + if (!hesc_html(ob, title, strlen(title), + st->flags & LOWDOWN_HTML_OWASP, 0, + st->flags & LOWDOWN_HTML_NUM_ENT)) + return 0; + if (!HBUF_PUTSL(ob, "</title>\n")) + return 0; + return HBUF_PUTSL(ob, "</head>\n<body>\n"); +} + +static int +rndr(struct lowdown_buf *ob, + struct lowdown_metaq *mq, void *ref, + const struct lowdown_node *n) +{ + const struct lowdown_node *child; + struct lowdown_buf *tmp; + int32_t ent; + struct html *st = ref; + int ret = 1, rc = 1; + + if ((tmp = hbuf_new(64)) == NULL) + return 0; + + /* + * If we're processing metadata, don't escape the content as we + * read and parse it. This prevents double-escaping. We'll + * properly escape things as we inline them (standalone mode) or + * when we write body text. + */ + + if (n->type == LOWDOWN_META) + st->noescape = 1; + + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(tmp, mq, st, child)) + goto out; + + /* + * If we're in the doc header, don't emit any insert or delete, + * as HTML doesn't allow them. + */ + + if (n->chng == LOWDOWN_CHNG_INSERT && n->type != LOWDOWN_META && + !HBUF_PUTSL(ob, "<ins>")) + goto out; + if (n->chng == LOWDOWN_CHNG_DELETE && n->type != LOWDOWN_META && + !HBUF_PUTSL(ob, "<del>")) + goto out; + + switch (n->type) { + case LOWDOWN_ROOT: + rc = rndr_root(ob, tmp, st); + break; + case LOWDOWN_BLOCKCODE: + rc = rndr_blockcode(ob, &n->rndr_blockcode, st); + break; + case LOWDOWN_BLOCKQUOTE: + rc = rndr_blockquote(ob, tmp); + break; + case LOWDOWN_DEFINITION: + rc = rndr_definition(ob, tmp); + break; + case LOWDOWN_DEFINITION_TITLE: + rc = rndr_definition_title(ob, tmp); + break; + case LOWDOWN_DEFINITION_DATA: + rc = rndr_definition_data(ob, tmp); + break; + case LOWDOWN_DOC_HEADER: + rc = rndr_doc_header(ob, tmp, mq, st); + break; + case LOWDOWN_META: + st->noescape = 0; + if (n->chng != LOWDOWN_CHNG_DELETE) + rc = rndr_meta(ob, tmp, mq, n, st); + break; + case LOWDOWN_HEADER: + rc = rndr_header(ob, tmp, n, st); + break; + case LOWDOWN_HRULE: + rc = rndr_hrule(ob); + break; + case LOWDOWN_LIST: + rc = rndr_list(ob, tmp, &n->rndr_list); + break; + case LOWDOWN_LISTITEM: + rc = rndr_listitem(ob, tmp, n); + break; + case LOWDOWN_PARAGRAPH: + rc = rndr_paragraph(ob, tmp, st); + break; + case LOWDOWN_TABLE_BLOCK: + rc = rndr_table(ob, tmp); + break; + case LOWDOWN_TABLE_HEADER: + rc = rndr_table_header(ob, tmp); + break; + case LOWDOWN_TABLE_BODY: + rc = rndr_table_body(ob, tmp); + break; + case LOWDOWN_TABLE_ROW: + rc = rndr_tablerow(ob, tmp); + break; + case LOWDOWN_TABLE_CELL: + rc = rndr_tablecell(ob, tmp, &n->rndr_table_cell); + break; + case LOWDOWN_BLOCKHTML: + rc = rndr_raw_block(ob, &n->rndr_blockhtml, st); + break; + case LOWDOWN_LINK_AUTO: + rc = rndr_autolink(ob, &n->rndr_autolink, st); + break; + case LOWDOWN_CODESPAN: + rc = rndr_codespan(ob, &n->rndr_codespan, st); + break; + case LOWDOWN_DOUBLE_EMPHASIS: + rc = rndr_double_emphasis(ob, tmp); + break; + case LOWDOWN_EMPHASIS: + rc = rndr_emphasis(ob, tmp); + break; + case LOWDOWN_HIGHLIGHT: + rc = rndr_highlight(ob, tmp); + break; + case LOWDOWN_IMAGE: + rc = rndr_image(ob, &n->rndr_image, st); + break; + case LOWDOWN_LINEBREAK: + rc = rndr_linebreak(ob); + break; + case LOWDOWN_LINK: + rc = rndr_link(ob, tmp, &n->rndr_link, st); + break; + case LOWDOWN_TRIPLE_EMPHASIS: + rc = rndr_triple_emphasis(ob, tmp); + break; + case LOWDOWN_STRIKETHROUGH: + rc = rndr_strikethrough(ob, tmp); + break; + case LOWDOWN_SUPERSCRIPT: + rc = rndr_superscript(ob, tmp); + break; + case LOWDOWN_FOOTNOTE: + rc = rndr_footnote_ref(ob, tmp, st); + break; + case LOWDOWN_MATH_BLOCK: + rc = rndr_math(ob, &n->rndr_math, st); + break; + case LOWDOWN_RAW_HTML: + rc = rndr_raw_html(ob, &n->rndr_raw_html, st); + break; + case LOWDOWN_NORMAL_TEXT: + rc = rndr_normal_text(ob, &n->rndr_normal_text, st); + break; + case LOWDOWN_ENTITY: + if (!(st->flags & LOWDOWN_HTML_NUM_ENT)) { + rc = hbuf_putb(ob, &n->rndr_entity.text); + break; + } + + /* + * Prefer numeric entities. + * This is because we're emitting XML (XHTML5) and it's + * not clear whether the processor can handle HTML + * entities. + */ + + ent = entity_find_iso(&n->rndr_entity.text); + rc = ent > 0 ? + hbuf_printf(ob, "&#%" PRId32 ";", ent) : + hbuf_putb(ob, &n->rndr_entity.text); + break; + default: + rc = hbuf_putb(ob, tmp); + break; + } + if (!rc) + goto out; + + /* + * If we're in the doc header, don't emit any insert or delete, + * as HTML doesn't allow them. + */ + + if (n->chng == LOWDOWN_CHNG_INSERT && n->type != LOWDOWN_META && + n->parent != NULL && + n->parent->type != LOWDOWN_DOC_HEADER && + !HBUF_PUTSL(ob, "</ins>")) + goto out; + if (n->chng == LOWDOWN_CHNG_DELETE && n->type != LOWDOWN_META && + !HBUF_PUTSL(ob, "</del>")) + goto out; + + ret = 1; +out: + hbuf_free(tmp); + return ret; +} + +int +lowdown_html_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct html *st = arg; + struct lowdown_metaq metaq; + int rc; + size_t i; + + TAILQ_INIT(&st->headers_used); + TAILQ_INIT(&metaq); + st->headers_offs = 1; + + rc = rndr(ob, &metaq, st, n); + + for (i = 0; i < st->footsz; i++) + hbuf_free(st->foots[i]); + + free(st->foots); + st->footsz = 0; + st->foots = NULL; + lowdown_metaq_free(&metaq); + hentryq_clear(&st->headers_used); + return rc; +} + +void * +lowdown_html_new(const struct lowdown_opts *opts) +{ + struct html *p; + + if ((p = calloc(1, sizeof(struct html))) == NULL) + return NULL; + + p->flags = opts == NULL ? 0 : opts->oflags; + return p; +} + +void +lowdown_html_free(void *arg) +{ + + free(arg); +} diff --git a/html.o b/html.o Binary files differ. diff --git a/html_escape.c b/html_escape.c @@ -0,0 +1,309 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016--2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * The following characters will not be escaped: + * + * -_.+!*'(),%#@?=;:/,+&$~ alphanum + * + * Note that this character set is the addition of: + * + * - The characters which are safe to be in an URL + * - The characters which are *not* safe to be in an URL because they + * are RESERVED characters. + * + * We assume (lazily) that any RESERVED char that appears inside an URL + * is actually meant to have its native function (i.e. as an URL + * component/separator) and hence needs no escaping. + * + * There are two exceptions: the chacters & (amp) and ' (single quote) + * do not appear in the table. They are meant to appear in the URL as + * components, yet they require special HTML-entity escaping to generate + * valid HTML markup. + * + * All other characters will be escaped to %XX. + */ +static const int href_tbl[UINT8_MAX + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* + * For each 8-bit character, if non-zero, the HTML entity we need to + * substitute for safe output. According to the OWASP rules: + * & --> & + * < --> < + * > --> > optional + * " --> " optional + * ' --> ' optional: ' is not recommended + * / --> / optional: end an HTML entity + */ +static const int esc_tbl[UINT8_MAX + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* + * Maximum value of optional entity subsititute. + * Above this (>ESC_TBL_OWASP_MAX) is mandatory. + */ +#define ESC_TBL_OWASP_MAX 3 + +/* + * For literal contexts, maximum value of optional entity subsititute. + * Above this is mandatory. + */ +#define ESC_TBL_LITERAL_MAX 3 + +/* + * Named entities (mostly). + */ +static const char *esc_name[] = { + "", + "", /* oops */ + "'", + "/", + ">", + "<", + "&", +}; + +/* + * Numeric entities. + */ +static const char *esc_num[] = { + "", + "", /* oops */ + "'", + "/", + ">", + "<", + "&", +}; + +/* + * Escape general HTML attributes. + * This is modelled after the main Markdown parser. + */ +int +hesc_attr(struct lowdown_buf *ob, const char *data, size_t size) +{ + size_t i, mark; + int rc; + + if (size == 0) + return 1; + + for (i = 0; i < size; i++) { + mark = i; + while (i < size && data[i] != '"' && data[i] != '&') + i++; + + if (mark == 0 && i >= size) + return hbuf_put(ob, data, size); + + if (i > mark && + !hbuf_put(ob, data + mark, i - mark)) + return 0; + + if (i >= size) + break; + + rc = 1; + if (data[i] == '"') + rc = HBUF_PUTSL(ob, """); + else if (data[i] == '&') + rc = HBUF_PUTSL(ob, "&"); + if (!rc) + return 0; + } + + return 1; +} + +/* + * Escape (part of) a URL inside HTML. + * Return zero on failure (memory), non-zero otherwise. + */ +int +hesc_href(struct lowdown_buf *ob, const char *data, size_t size) +{ + static const char hex_chars[] = "0123456789ABCDEF"; + size_t i, mark; + char hex_str[3]; + int rc; + + if (size == 0) + return 1; + + hex_str[0] = '%'; + + for (i = 0; i < size; i++) { + mark = i; + while (i < size && href_tbl[(unsigned char)data[i]]) + i++; + + /* + * Optimization for cases where there's nothing to + * escape. + */ + + if (mark == 0 && i >= size) + return hbuf_put(ob, data, size); + + if (i > mark && + !hbuf_put(ob, data + mark, i - mark)) + return 0; + + /* Escaping... */ + + if (i >= size) + break; + + switch (data[i]) { + case '&': + /* + * Amp appears all the time in URLs, but needs + * HTML-entity escaping to be inside an href. + */ + rc = HBUF_PUTSL(ob, "&"); + break; + case '\'': + /* + * The single quote is a valid URL character + * according to the standard; it needs HTML + * entity escaping too. + */ + rc = HBUF_PUTSL(ob, "'"); + break; + default: + /* + * Every other character goes with a %XX + * escaping. + */ + hex_str[1] = hex_chars[(data[i] >> 4) & 0xF]; + hex_str[2] = hex_chars[data[i] & 0xF]; + rc = hbuf_put(ob, hex_str, 3); + break; + } + if (!rc) + return 0; + } + + return 1; +} + +/* + * Escape HTML. + * If "literal", we also want to escape some extra characters. + * If "secure", also escape characters as suggested by OWASP rules. + * If "num", use only numeric escapes. + * Does nothing if "size" is zero. + * Return zero on failure (memory), non-zero otherwise. + */ +int +hesc_html(struct lowdown_buf *ob, const char *data, + size_t size, int secure, int literal, int num) +{ + size_t i, mark; + int max = 0, rc; + unsigned char ch; + + if (size == 0) + return 1; + + if (!literal && !secure) + max = ESC_TBL_OWASP_MAX; + else if (literal && !secure) + max = ESC_TBL_LITERAL_MAX; + + for (i = 0; ; i++) { + mark = i; + while (i < size && + esc_tbl[(unsigned char)data[i]] == 0) + i++; + + /* Case where there's nothing to escape. */ + + if (mark == 0 && i >= size) + return hbuf_put(ob, data, size); + + if (i > mark && + !hbuf_put(ob, data + mark, i - mark)) + return 0; + + if (i >= size) + break; + + ch = (unsigned char)data[i]; + + if (esc_tbl[ch] <= max) + rc = hbuf_putc(ob, data[i]); + else + rc = hbuf_puts(ob, num ? + esc_num[esc_tbl[ch]] : + esc_name[esc_tbl[ch]]); + if (!rc) + return 0; + } + + return 1; +} diff --git a/html_escape.o b/html_escape.o Binary files differ. diff --git a/latex.c b/latex.c @@ -0,0 +1,1032 @@ +/* $Id$ */ +/* + * Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "lowdown.h" +#include "extern.h" + +struct latex { + unsigned int oflags; /* same as in lowdown_opts */ + struct hentryq headers_used; /* headers we've seen */ + ssize_t headers_offs; /* header offset */ + size_t footsz; /* current footnote */ +}; + +/* + * Return zero on failure, non-zero on success. + */ +static int +rndr_escape_text(struct lowdown_buf *ob, const char *data, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) + switch (data[i]) { + case '&': + case '%': + case '$': + case '#': + case '_': + case '{': + case '}': + if (!hbuf_putc(ob, '\\')) + return 0; + if (!hbuf_putc(ob, data[i])) + return 0; + break; + case '~': + if (!HBUF_PUTSL(ob, "\\textasciitilde{}")) + return 0; + break; + case '^': + if (!HBUF_PUTSL(ob, "\\textasciicircum{}")) + return 0; + break; + case '\\': + if (!HBUF_PUTSL(ob, "\\textbackslash{}")) + return 0; + break; + default: + if (!hbuf_putc(ob, data[i])) + return 0; + break; + } + + return 1; +} + +/* + * Return zero on failure, non-zero on success. + */ +static int +rndr_escape(struct lowdown_buf *ob, const struct lowdown_buf *dat) +{ + + return rndr_escape_text(ob, dat->data, dat->size); +} + +static int +rndr_autolink(struct lowdown_buf *ob, + const struct rndr_autolink *param) +{ + + if (param->link.size == 0) + return 1; + if (!HBUF_PUTSL(ob, "\\url{")) + return 0; + if (param->type == HALINK_EMAIL && !HBUF_PUTSL(ob, "mailto:")) + return 0; + if (!rndr_escape(ob, ¶m->link)) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_entity(struct lowdown_buf *ob, + const struct rndr_entity *param) +{ + const char *tex; + unsigned char texflags; + + tex = entity_find_tex(¶m->text, &texflags); + if (tex == NULL) + return rndr_escape(ob, ¶m->text); + + if ((texflags & TEX_ENT_MATH) && (texflags & TEX_ENT_ASCII)) + return hbuf_printf(ob, "$\\mathrm{%s}$", tex); + if (texflags & TEX_ENT_ASCII) + return hbuf_puts(ob, tex); + if (texflags & TEX_ENT_MATH) + return hbuf_printf(ob, "$\\mathrm{\\%s}$", tex); + return hbuf_printf(ob, "\\%s", tex); +} + +static int +rndr_blockcode(struct lowdown_buf *ob, + const struct rndr_blockcode *param) +{ + + if (ob->size && !HBUF_PUTSL(ob, "\n")) + return 0; + +#if 0 + HBUF_PUTSL(ob, "\\begin{lstlisting}"); + if (lang->size) { + HBUF_PUTSL(ob, "[language="); + rndr_escape(ob, lang); + HBUF_PUTSL(ob, "]\n\n"); + } else + HBUF_PUTSL(ob, "\n"); +#else + HBUF_PUTSL(ob, "\\begin{verbatim}\n"); +#endif + if (!hbuf_putb(ob, ¶m->text)) + return 0; +#if 0 + HBUF_PUTSL(ob, "\\end{lstlisting}\n"); +#else + return HBUF_PUTSL(ob, "\\end{verbatim}\n"); +#endif +} + +static int +rndr_definition_title(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\item [")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "] "); +} + +static int +rndr_definition(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\begin{description}\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "\\end{description}\n"); +} + +static int +rndr_blockquote(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (ob->size && !HBUF_PUTSL(ob, "\n")) + return 0; + if (!HBUF_PUTSL(ob, "\\begin{quotation}\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "\\end{quotation}\n"); +} + +static int +rndr_codespan(struct lowdown_buf *ob, + const struct rndr_codespan *param) +{ +#if 0 + HBUF_PUTSL(ob, "\\lstinline{"); + hbuf_putb(ob, text); +#else + if (!HBUF_PUTSL(ob, "\\texttt{")) + return 0; + if (!rndr_escape(ob, ¶m->text)) + return 0; +#endif + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_triple_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\textbf{\\emph{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}}"); +} + +static int +rndr_double_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\textbf{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_emphasis(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\emph{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_highlight(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\underline{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_linebreak(struct lowdown_buf *ob) +{ + + return HBUF_PUTSL(ob, "\\linebreak\n"); +} + +static int +rndr_header(struct lowdown_buf *ob, const struct lowdown_buf *content, + const struct lowdown_node *n, struct latex *st) +{ + const char *type; + ssize_t level; + struct lowdown_buf *buf = NULL; + const struct lowdown_buf *id; + int rc = 0; + + if (n->rndr_header.attr_id.size) { + if ((buf = hbuf_new(32)) == NULL) + goto out; + if (!rndr_escape(buf, &n->rndr_header.attr_id)) + goto out; + id = buf; + } else { + id = hbuf_id(NULL, n, &st->headers_used); + if (id == NULL) + goto out; + } + + if (ob->size && !HBUF_PUTSL(ob, "\n")) + goto out; + + if (!HBUF_PUTSL(ob, "\\hypertarget{")) + goto out; + if (!hbuf_putb(ob, id)) + goto out; + if (!HBUF_PUTSL(ob, "}{%\n")) + goto out; + + level = (ssize_t)n->rndr_header.level + st->headers_offs; + if (level < 1) + level = 1; + + switch (level) { + case 1: + type = "\\section"; + break; + case 2: + type = "\\subsection"; + break; + case 3: + type = "\\subsubsection"; + break; + case 4: + type = "\\paragraph"; + break; + default: + type = "\\subparagraph"; + break; + } + + if (!hbuf_puts(ob, type)) + goto out; + if (!(st->oflags & LOWDOWN_LATEX_NUMBERED) && + !HBUF_PUTSL(ob, "*")) + goto out; + if (!HBUF_PUTSL(ob, "{")) + goto out; + if (!hbuf_putb(ob, content)) + goto out; + if (!HBUF_PUTSL(ob, "}\\label{")) + goto out; + if (!hbuf_putb(ob, id)) + goto out; + if (!HBUF_PUTSL(ob, "}}\n")) + goto out; + rc = 1; +out: + hbuf_free(buf); + return rc; +} + +static int +rndr_link(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_link *param) +{ + int loc; + + loc = param->link.size > 0 && + param->link.data[0] == '#'; + + if (param->attr_id.size > 0) { + if (!HBUF_PUTSL(ob, "\\hypertarget{")) + return 0; + if (!hbuf_putb(ob, ¶m->attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "}{%\n")) + return 0; + } + + if (loc && !HBUF_PUTSL(ob, "\\hyperlink{")) + return 0; + else if (!loc && !HBUF_PUTSL(ob, "\\href{")) + return 0; + + if (loc && !rndr_escape_text + (ob, ¶m->link.data[1], param->link.size - 1)) + return 0; + else if (!loc && !rndr_escape(ob, ¶m->link)) + return 0; + if (!HBUF_PUTSL(ob, "}{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + if (param->attr_id.size > 0 && !HBUF_PUTSL(ob, "}")) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_list(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_list *param) +{ + const char *type; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + + /* TODO: HLIST_FL_ORDERED and param->start */ + + type = (param->flags & HLIST_FL_ORDERED) ? + "enumerate" : "itemize"; + + if (!hbuf_printf(ob, "\\begin{%s}\n", type)) + return 0; + if (!(param->flags & HLIST_FL_BLOCK) && + !HBUF_PUTSL(ob, "\\itemsep -0.2em\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return hbuf_printf(ob, "\\end{%s}\n", type); +} + +static int +rndr_listitem(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_listitem *param) +{ + size_t size; + + /* Only emit <li> if we're not a <dl> list. */ + + if (!(param->flags & HLIST_FL_DEF)) { + if (!HBUF_PUTSL(ob, "\\item")) + return 0; + if ((param->flags & HLIST_FL_CHECKED) && + !HBUF_PUTSL(ob, "[$\\rlap{$\\checkmark$}\\square$]")) + return 0; + if ((param->flags & HLIST_FL_UNCHECKED) && + !HBUF_PUTSL(ob, "[$\\square$]")) + return 0; + if (!HBUF_PUTSL(ob, " ")) + return 0; + } + + /* Cut off any trailing space. */ + + if ((size = content->size) > 0) { + while (size && content->data[size - 1] == '\n') + size--; + if (!hbuf_put(ob, content->data, size)) + return 0; + } + + return HBUF_PUTSL(ob, "\n"); +} + +static int +rndr_paragraph(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + size_t i = 0; + + if (content->size == 0) + return 1; + while (i < content->size && + isspace((unsigned char)content->data[i])) + i++; + if (i == content->size) + return 1; + + if (!HBUF_PUTSL(ob, "\n")) + return 0; + if (!hbuf_put(ob, content->data + i, content->size - i)) + return 0; + return HBUF_PUTSL(ob, "\n"); +} + +static int +rndr_raw_block(struct lowdown_buf *ob, + const struct rndr_blockhtml *param, + const struct latex *st) +{ + size_t org = 0, sz = param->text.size; + + if (st->oflags & LOWDOWN_LATEX_SKIP_HTML) + return 1; + while (sz > 0 && param->text.data[sz - 1] == '\n') + sz--; + while (org < sz && param->text.data[org] == '\n') + org++; + if (org >= sz) + return 1; + + if (ob->size && !HBUF_PUTSL(ob, "\n")) + return 0; + if (!HBUF_PUTSL(ob, "\\begin{verbatim}\n")) + return 0; + if (!hbuf_put(ob, param->text.data + org, sz - org)) + return 0; + return HBUF_PUTSL(ob, "\\end{verbatim}\n"); +} + +static int +rndr_hrule(struct lowdown_buf *ob) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + return HBUF_PUTSL(ob, "\\noindent\\hrulefill\n"); +} + +static int +rndr_image(struct lowdown_buf *ob, + const struct rndr_image *param) +{ + const char *cp; + char dimbuf[32]; + unsigned int x, y; + float pct; + int rc = 0; + + /* + * Scan in our dimensions, if applicable. + * It's unreasonable for them to be over 32 characters, so use + * that as a cap to the size. + */ + + if (param->dims.size && + param->dims.size < sizeof(dimbuf) - 1) { + memset(dimbuf, 0, sizeof(dimbuf)); + memcpy(dimbuf, param->dims.data, param->dims.size); + rc = sscanf(dimbuf, "%ux%u", &x, &y); + } + + /* Extended attributes override dimensions. */ + + if (!HBUF_PUTSL(ob, "\\includegraphics[")) + return 0; + if (param->attr_width.size || param->attr_height.size) { + if (param->attr_width.size && + param->attr_width.size < sizeof(dimbuf) - 1) { + memset(dimbuf, 0, sizeof(dimbuf)); + memcpy(dimbuf, param->attr_width.data, + param->attr_width.size); + + /* Try to parse as a percentage. */ + + if (sscanf(dimbuf, "%e%%", &pct) == 1) { + if (!hbuf_printf(ob, "width=%.2f" + "\\linewidth", pct / 100.0)) + return 0; + } else { + if (!hbuf_printf(ob, "width=%.*s", + (int)param->attr_width.size, + param->attr_width.data)) + return 0; + } + } + if (param->attr_height.size && + param->attr_height.size < sizeof(dimbuf) - 1) { + if (param->attr_width.size && + !HBUF_PUTSL(ob, ", ")) + return 0; + if (!hbuf_printf(ob, "height=%.*s", + (int)param->attr_height.size, + param->attr_height.data)) + return 0; + } + } else if (rc > 0) { + if (!hbuf_printf(ob, "width=%upx", x)) + return 0; + if (rc > 1 && !hbuf_printf(ob, ", height=%upx", y)) + return 0; + } + + if (!HBUF_PUTSL(ob, "]{")) + return 0; + cp = memrchr(param->link.data, '.', param->link.size); + if (cp != NULL) { + if (!HBUF_PUTSL(ob, "{")) + return 0; + if (!rndr_escape_text + (ob, param->link.data, cp - param->link.data)) + return 0; + if (!HBUF_PUTSL(ob, "}")) + return 0; + if (!rndr_escape_text(ob, cp, + param->link.size - (cp - param->link.data))) + return 0; + } else { + if (!rndr_escape(ob, ¶m->link)) + return 0; + } + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_raw_html(struct lowdown_buf *ob, + const struct rndr_raw_html *param, + const struct latex *st) +{ + + if (st->oflags & LOWDOWN_LATEX_SKIP_HTML) + return 1; + return rndr_escape(ob, ¶m->text); +} + +static int +rndr_table(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + /* Open the table in rndr_table_header. */ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "\\end{longtable}\n"); +} + +static int +rndr_table_header(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_table_header *param) +{ + size_t i; + char align; + int fl; + + if (!HBUF_PUTSL(ob, "\\begin{longtable}[]{")) + return 0; + + for (i = 0; i < param->columns; i++) { + fl = param->flags[i] & HTBL_FL_ALIGNMASK; + if (fl == HTBL_FL_ALIGN_CENTER) + align = 'c'; + else if (fl == HTBL_FL_ALIGN_RIGHT) + align = 'r'; + else + align = 'l'; + if (!hbuf_putc(ob, align)) + return 0; + } + if (!HBUF_PUTSL(ob, "}\n")) + return 0; + return hbuf_putb(ob, content); +} + +static int +rndr_tablecell(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_table_cell *param) +{ + + if (!hbuf_putb(ob, content)) + return 0; + return (param->col < param->columns - 1) ? + HBUF_PUTSL(ob, " & ") : + HBUF_PUTSL(ob, " \\\\\n"); +} + +static int +rndr_superscript(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "\\textsuperscript{")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}"); +} + +static int +rndr_normal_text(struct lowdown_buf *ob, + const struct rndr_normal_text *param) +{ + + return rndr_escape(ob, ¶m->text); +} + +static int +rndr_footnote_ref(struct lowdown_buf *ob, + const struct lowdown_buf *content, struct latex *st) +{ + + if (!hbuf_printf(ob, "\\footnote[%zu]{", ++st->footsz)) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "}\n"); +} + +static int +rndr_math(struct lowdown_buf *ob, + const struct rndr_math *param) +{ + + if (param->blockmode && !HBUF_PUTSL(ob, "\\[")) + return 0; + else if (!param->blockmode && !HBUF_PUTSL(ob, "\\(")) + return 0; + if (!hbuf_putb(ob, ¶m->text)) + return 0; + if (param->blockmode && !HBUF_PUTSL(ob, "\\]")) + return 0; + else if (!param->blockmode && !HBUF_PUTSL(ob, "\\)")) + return 0; + return 1; +} + +static int +rndr_doc_footer(struct lowdown_buf *ob, const struct latex *st) +{ + + if (st->oflags & LOWDOWN_STANDALONE) + return HBUF_PUTSL(ob, "\\end{document}\n"); + return 1; +} + +static int +rndr_doc_header(struct lowdown_buf *ob, + const struct lowdown_metaq *mq, const struct latex *st) +{ + const struct lowdown_meta *m; + const char *author = NULL, *title = NULL, + *affil = NULL, *date = NULL, + *rcsauthor = NULL, + *rcsdate = NULL; + + if (!(st->oflags & LOWDOWN_STANDALONE)) + return 1; + + if (!HBUF_PUTSL(ob, + "% Options for packages loaded elsewhere\n" + "\\PassOptionsToPackage{unicode}{hyperref}\n" + "\\PassOptionsToPackage{hyphens}{url}\n" + "%\n" + "\\documentclass[11pt,a4paper]{article}\n" + "\\usepackage{amsmath,amssymb}\n" + "\\usepackage{lmodern}\n" + "\\usepackage{iftex}\n" + "\\ifPDFTeX\n" + " \\usepackage[T1]{fontenc}\n" + " \\usepackage[utf8]{inputenc}\n" + " \\usepackage{textcomp} % provide euro and other symbols\n" + "\\else % if luatex or xetex\n" + " \\usepackage{unicode-math}\n" + " \\defaultfontfeatures{Scale=MatchLowercase}\n" + " \\defaultfontfeatures[\\rmfamily]{Ligatures=TeX,Scale=1}\n" + "\\fi\n" + "\\usepackage{xcolor}\n" + "\\usepackage{graphicx}\n" + "\\usepackage{longtable}\n" + "\\usepackage{hyperref}\n" + "\\begin{document}\n")) + return 0; + + TAILQ_FOREACH(m, mq, entries) + if (strcasecmp(m->key, "author") == 0) + author = m->value; + else if (strcasecmp(m->key, "affiliation") == 0) + affil = m->value; + else if (strcasecmp(m->key, "date") == 0) + date = m->value; + else if (strcasecmp(m->key, "rcsauthor") == 0) + rcsauthor = rcsauthor2str(m->value); + else if (strcasecmp(m->key, "rcsdate") == 0) + rcsdate = rcsdate2str(m->value); + else if (strcasecmp(m->key, "title") == 0) + title = m->value; + + /* Overrides. */ + + if (title == NULL) + title = "Untitled article"; + if (rcsauthor != NULL) + author = rcsauthor; + if (rcsdate != NULL) + date = rcsdate; + + if (!hbuf_printf(ob, "\\title{%s}\n", title)) + return 0; + + if (author != NULL) { + if (!hbuf_printf(ob, "\\author{%s", author)) + return 0; + if (affil != NULL && + !hbuf_printf(ob, " \\\\ %s", affil)) + return 0; + if (!HBUF_PUTSL(ob, "}\n")) + return 0; + } + + if (date != NULL && !hbuf_printf(ob, "\\date{%s}\n", date)) + return 0; + + return HBUF_PUTSL(ob, "\\maketitle\n"); +} + +static int +rndr_meta(struct lowdown_buf *ob, + const struct lowdown_buf *content, + struct lowdown_metaq *mq, + const struct lowdown_node *n, struct latex *st) +{ + struct lowdown_meta *m; + ssize_t val; + const char *ep; + + if ((m = calloc(1, sizeof(struct lowdown_meta))) == NULL) + return 0; + TAILQ_INSERT_TAIL(mq, m, entries); + + m->key = strndup(n->rndr_meta.key.data, + n->rndr_meta.key.size); + if (m->key == NULL) + return 0; + m->value = strndup(content->data, content->size); + if (m->value == NULL) + return 0; + + if (strcmp(m->key, "shiftheadinglevelby") == 0) { + val = (ssize_t)strtonum + (m->value, -100, 100, &ep); + if (ep == NULL) + st->headers_offs = val + 1; + } else if (strcmp(m->key, "baseheaderlevel") == 0) { + val = (ssize_t)strtonum + (m->value, 1, 100, &ep); + if (ep == NULL) + st->headers_offs = val; + } + + return 1; +} + +static int +rndr(struct lowdown_buf *ob, + struct lowdown_metaq *mq, void *arg, + const struct lowdown_node *n) +{ + struct lowdown_buf *tmp; + struct latex *st = arg; + const struct lowdown_node *child; + int ret = 0; + + if ((tmp = hbuf_new(64)) == NULL) + return 0; + + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(tmp, mq, st, child)) + goto out; + + /* + * These elements can be put in either a block or an inline + * context, so we're safe to just use them and forget. + */ + + if (n->chng == LOWDOWN_CHNG_INSERT && + !HBUF_PUTSL(ob, "{\\color{blue} ")) + goto out; + if (n->chng == LOWDOWN_CHNG_DELETE && + !HBUF_PUTSL(ob, "{\\color{red} ")) + goto out; + + switch (n->type) { + case LOWDOWN_BLOCKCODE: + if (!rndr_blockcode(ob, &n->rndr_blockcode)) + return 0; + break; + case LOWDOWN_BLOCKQUOTE: + if (!rndr_blockquote(ob, tmp)) + return 0; + break; + case LOWDOWN_DEFINITION: + if (!rndr_definition(ob, tmp)) + return 0; + break; + case LOWDOWN_DEFINITION_TITLE: + if (!rndr_definition_title(ob, tmp)) + return 0; + break; + case LOWDOWN_DOC_HEADER: + if (!rndr_doc_header(ob, mq, st)) + return 0; + break; + case LOWDOWN_META: + if (n->chng != LOWDOWN_CHNG_DELETE && + !rndr_meta(ob, tmp, mq, n, st)) + return 0; + break; + case LOWDOWN_HEADER: + if (!rndr_header(ob, tmp, n, st)) + return 0; + break; + case LOWDOWN_HRULE: + if (!rndr_hrule(ob)) + return 0; + break; + case LOWDOWN_LIST: + if (!rndr_list(ob, tmp, &n->rndr_list)) + return 0; + break; + case LOWDOWN_LISTITEM: + if (!rndr_listitem(ob, tmp, &n->rndr_listitem)) + return 0; + break; + case LOWDOWN_PARAGRAPH: + if (!rndr_paragraph(ob, tmp)) + return 0; + break; + case LOWDOWN_TABLE_BLOCK: + if (!rndr_table(ob, tmp)) + return 0; + break; + case LOWDOWN_TABLE_HEADER: + if (!rndr_table_header(ob, tmp, &n->rndr_table_header)) + return 0; + break; + case LOWDOWN_TABLE_CELL: + if (!rndr_tablecell(ob, tmp, &n->rndr_table_cell)) + return 0; + break; + case LOWDOWN_BLOCKHTML: + if (!rndr_raw_block(ob, &n->rndr_blockhtml, st)) + return 0; + break; + case LOWDOWN_LINK_AUTO: + if (!rndr_autolink(ob, &n->rndr_autolink)) + return 0; + break; + case LOWDOWN_CODESPAN: + if (!rndr_codespan(ob, &n->rndr_codespan)) + return 0; + break; + case LOWDOWN_DOUBLE_EMPHASIS: + if (!rndr_double_emphasis(ob, tmp)) + return 0; + break; + case LOWDOWN_EMPHASIS: + if (!rndr_emphasis(ob, tmp)) + return 0; + break; + case LOWDOWN_HIGHLIGHT: + if (!rndr_highlight(ob, tmp)) + return 0; + break; + case LOWDOWN_IMAGE: + if (!rndr_image(ob, &n->rndr_image)) + return 0; + break; + case LOWDOWN_LINEBREAK: + if (!rndr_linebreak(ob)) + return 0; + break; + case LOWDOWN_LINK: + if (!rndr_link(ob, tmp, &n->rndr_link)) + return 0; + break; + case LOWDOWN_TRIPLE_EMPHASIS: + if (!rndr_triple_emphasis(ob, tmp)) + return 0; + break; + case LOWDOWN_SUPERSCRIPT: + if (!rndr_superscript(ob, tmp)) + return 0; + break; + case LOWDOWN_FOOTNOTE: + if (!rndr_footnote_ref(ob, tmp, st)) + return 0; + break; + case LOWDOWN_MATH_BLOCK: + if (!rndr_math(ob, &n->rndr_math)) + return 0; + break; + case LOWDOWN_RAW_HTML: + if (!rndr_raw_html(ob, &n->rndr_raw_html, st)) + return 0; + break; + case LOWDOWN_NORMAL_TEXT: + if (!rndr_normal_text(ob, &n->rndr_normal_text)) + return 0; + break; + case LOWDOWN_ENTITY: + if (!rndr_entity(ob, &n->rndr_entity)) + return 0; + break; + case LOWDOWN_ROOT: + if (!hbuf_putb(ob, tmp)) + return 0; + if (!rndr_doc_footer(ob, st)) + return 0; + break; + default: + if (!hbuf_putb(ob, tmp)) + return 0; + break; + } + + if ((n->chng == LOWDOWN_CHNG_INSERT || + n->chng == LOWDOWN_CHNG_DELETE) && !HBUF_PUTSL(ob, "}")) + goto out; + + ret = 1; +out: + hbuf_free(tmp); + return ret; +} + +int +lowdown_latex_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct latex *st = arg; + struct lowdown_metaq metaq; + int rc; + + TAILQ_INIT(&st->headers_used); + TAILQ_INIT(&metaq); + st->headers_offs = 1; + st->footsz = 0; + + rc = rndr(ob, &metaq, st, n); + + lowdown_metaq_free(&metaq); + hentryq_clear(&st->headers_used); + return rc; +} + +void * +lowdown_latex_new(const struct lowdown_opts *opts) +{ + struct latex *p; + + if ((p = calloc(1, sizeof(struct latex))) == NULL) + return NULL; + + p->oflags = opts == NULL ? 0 : opts->oflags; + return p; +} + +void +lowdown_latex_free(void *arg) +{ + + free(arg); +} diff --git a/latex.o b/latex.o Binary files differ. diff --git a/libdiff.c b/libdiff.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2013 Tatsuhiko Kubo <cubicdaiya@gmail.com> + * Copyright (c) 2018 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "config.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "libdiff.h" + +struct onp_coord { + int x; + int y; + int k; +}; + +struct onp_diff { + const void *a; /* shorter subsequence */ + const void *b; /* longer subsequence */ + size_t m; /* length of "a" */ + size_t n; /* length of "b" */ + diff_cmp cmp; /* comparison function */ + int *path; + size_t delta; + size_t offset; + size_t size; /* matrix size */ + size_t sz; /* data element width */ + struct onp_coord *pathcoords; + size_t pathcoordsz; + int swapped; /* seqs swapped from input */ + struct diff *result; +}; + +#define ONP_CMP(_d, _o1, _o2) \ + ((_d)->cmp((_d)->a + (_d)->sz * (_o1), \ + (_d)->b + (_d)->sz * (_o2))) + +/* + * Search shortest path and record the path. + */ +static int +onp_snake(struct onp_diff *diff, int k, int above, int below) +{ + int r, y, x; + void *pp; + + y = above > below ? above : below; + x = y - k; + + r = above > below ? + diff->path[k - 1 + diff->offset] : + diff->path[k + 1 + diff->offset]; + + while (x < (int)diff->m && y < (int)diff->n && + ONP_CMP(diff, x, y)) { + ++x; + ++y; + } + + diff->path[k + diff->offset] = diff->pathcoordsz; + + pp = reallocarray + (diff->pathcoords, + diff->pathcoordsz + 1, + sizeof(struct onp_coord)); + if (NULL == pp) + return -1; + diff->pathcoords = pp; + + assert(x >= 0); + assert(y >= 0); + + diff->pathcoords[diff->pathcoordsz].x = x; + diff->pathcoords[diff->pathcoordsz].y = y; + diff->pathcoords[diff->pathcoordsz].k = r; + diff->pathcoordsz++; + + return y; +} + +static int +onp_addlcs(struct onp_diff *diff, const void *e) +{ + void *pp; + + pp = reallocarray + (diff->result->lcs, + diff->result->lcssz + 1, + sizeof(void *)); + if (NULL == pp) + return 0; + diff->result->lcs = pp; + diff->result->lcs[diff->result->lcssz] = e; + diff->result->lcssz++; + return 1; +} + +static int +onp_addses(struct onp_diff *diff, const void *e, + size_t originIdx, size_t targetIdx, enum difft type) +{ + void *pp; + + pp = reallocarray + (diff->result->ses, + diff->result->sessz + 1, + sizeof(struct diff_ses)); + if (NULL == pp) + return 0; + diff->result->ses = pp; + diff->result->ses[diff->result->sessz].originIdx = originIdx; + diff->result->ses[diff->result->sessz].targetIdx = targetIdx; + diff->result->ses[diff->result->sessz].type = type; + diff->result->ses[diff->result->sessz].e = e; + diff->result->sessz++; + return 1; +} + +static int +onp_genseq(struct onp_diff *diff, const struct onp_coord* v, size_t vsz) +{ + size_t xpos, ypos; + size_t x_idx, y_idx; /* offset+1 numbers */ + int px_idx, py_idx; /* coordinates */ + int complete = 0; + int rc; + size_t i; + + x_idx = y_idx = 1; + px_idx = py_idx = 0; + xpos = ypos = 0; + + assert(vsz); + + for (i = vsz - 1; ! complete; --i) { + while (px_idx < v[i].x || py_idx < v[i].y) { + if (v[i].y - v[i].x > py_idx - px_idx) { + rc = ! diff->swapped ? + onp_addses(diff, + diff->b + (ypos * diff->sz), + 0, y_idx, DIFF_ADD) : + onp_addses(diff, + diff->b + (ypos * diff->sz), + y_idx, 0, DIFF_DELETE); + ++ypos; + ++y_idx; + ++py_idx; + } else if (v[i].y - v[i].x < py_idx - px_idx) { + rc = ! diff->swapped ? + onp_addses(diff, + diff->a + (xpos * diff->sz), + x_idx, 0, DIFF_DELETE) : + onp_addses(diff, + diff->a + (xpos * diff->sz), + 0, x_idx, DIFF_ADD); + ++xpos; + ++x_idx; + ++px_idx; + } else { + rc = ! diff->swapped ? + onp_addses(diff, + diff->a + (xpos * diff->sz), + x_idx, y_idx, DIFF_COMMON) : + onp_addses(diff, + diff->b + (ypos * diff->sz), + y_idx, x_idx, DIFF_COMMON); + if (rc) + rc = ! diff->swapped ? + onp_addlcs(diff, diff->a + + (xpos * diff->sz)) : + onp_addlcs(diff, diff->b + + (ypos * diff->sz)); + ++xpos; + ++ypos; + ++x_idx; + ++y_idx; + ++px_idx; + ++py_idx; + } + if ( ! rc) + return -1; + } + complete = 0 == i; + } + + return x_idx > diff->m && y_idx > diff->n; +} + +static struct onp_diff * +onp_alloc(diff_cmp cmp, size_t sz, + const void *a, size_t alen, + const void *b, size_t blen) +{ + struct onp_diff *diff; + + diff = calloc(1, sizeof(struct onp_diff)); + + if (NULL == diff) + return NULL; + + if (alen > blen) { + diff->a = b; + diff->b = a; + diff->m = blen; + diff->n = alen; + diff->swapped = 1; + } else { + diff->a = a; + diff->b = b; + diff->m = alen; + diff->n = blen; + diff->swapped = 0; + } + + assert(diff->n >= diff->m); + diff->cmp = cmp; + diff->sz = sz; + diff->delta = diff->n - diff->m; + diff->offset = diff->m + 1; + diff->size = diff->m + diff->n + 3; + + return diff; +} + +static void +onp_free(struct onp_diff *diff) +{ + + free(diff->path); + free(diff->pathcoords); + free(diff); +} + +static int +onp_compose(struct onp_diff *diff, struct diff *result) +{ + int rc = 0; + int p = -1; + int k; + int *fp = NULL; + int r; + struct onp_coord *epc = NULL; + size_t epcsz = 0; + size_t i; + void *pp; + + /* Initialise the path from origin to target. */ + + fp = malloc(sizeof(int) * diff->size); + diff->path = malloc(sizeof(int) * diff->size); + diff->result = result; + + if (NULL == fp || NULL == diff->path) + goto out; + + for (i = 0; i < diff->size; i++) + fp[i] = diff->path[i] = -1; + + /* + * Run the actual algorithm. + * This computes the full path in diff->path from the origin to + * the target. + */ + + do { + p++; + for (k = -p; + k <= (ssize_t)diff->delta - 1; k++) { + fp[k + diff->offset] = onp_snake(diff, k, + fp[k - 1 + diff->offset] + 1, + fp[k + 1 + diff->offset]); + if (fp[k + diff->offset] < 0) + goto out; + } + for (k = diff->delta + p; + k >= (ssize_t)diff->delta + 1; k--) { + fp[k + diff->offset] = onp_snake(diff, k, + fp[k - 1 + diff->offset] + 1, + fp[k + 1 + diff->offset]); + if (fp[k + diff->offset] < 0) + goto out; + } + + fp[diff->delta + diff->offset] = + onp_snake(diff, diff->delta, + fp[diff->delta - 1 + diff->offset] + 1, + fp[diff->delta + 1 + diff->offset]); + if (fp[diff->delta + diff->offset] < 0) + goto out; + } while (fp[diff->delta + diff->offset] != (ssize_t)diff->n); + + /* Now compute edit distance. */ + + assert(p >= 0); + diff->result->editdist = diff->delta + 2 * p; + + /* + * Here we compute the shortest edit script and the least common + * subsequence from the path. + */ + + r = diff->path[diff->delta + diff->offset]; + + while(-1 != r) { + pp = reallocarray + (epc, epcsz + 1, + sizeof(struct onp_coord)); + if (NULL == pp) + goto out; + epc = pp; + epc[epcsz].x = diff->pathcoords[r].x; + epc[epcsz].y = diff->pathcoords[r].y; + epcsz++; + r = diff->pathcoords[r].k; + } + + if (epcsz) + onp_genseq(diff, epc, epcsz); + + rc = 1; +out: + free(fp); + free(epc); + return rc; +} + +int +diff(struct diff *d, diff_cmp cmp, size_t size, + const void *base1, size_t nmemb1, + const void *base2, size_t nmemb2) +{ + struct onp_diff *p; + int rc; + + p = onp_alloc(cmp, size, base1, nmemb1, base2, nmemb2); + if (NULL == p) + return 0; + + rc = onp_compose(p, d); + onp_free(p); + return rc; +} diff --git a/libdiff.h b/libdiff.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013 Tatsuhiko Kubo <cubicdaiya@gmail.com> + * Copyright (c) 2018 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef DIFF_H +#define DIFF_H + +typedef int (*diff_cmp)(const void *, const void *); + +enum difft { + DIFF_ADD, + DIFF_DELETE, + DIFF_COMMON +}; + +struct diff_ses { + size_t originIdx; /* if >0, index+1 in origin array */ + size_t targetIdx; /* if >0, index+1 in target array */ + enum difft type; /* type of edit */ + const void *e; /* pointer to object */ +}; + +struct diff { + const void **lcs; /* longest common subsequence */ + size_t lcssz; + struct diff_ses *ses; /* shortest edit script */ + size_t sessz; + size_t editdist; /* edit distance */ +}; + +int diff(struct diff *, diff_cmp, size_t, + const void *, size_t, const void *, size_t); + +#endif /* ! DIFF_H */ diff --git a/libdiff.o b/libdiff.o Binary files differ. diff --git a/liblowdown.a b/liblowdown.a Binary files differ. diff --git a/liblowdown.so b/liblowdown.so @@ -0,0 +1 @@ +liblowdown.so.1 +\ No newline at end of file diff --git a/liblowdown.so.1 b/liblowdown.so.1 Binary files differ. diff --git a/library.c b/library.c @@ -0,0 +1,245 @@ +/* $Id$ */ +/* + * Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * Starting size for input and output buffers. + */ +#define HBUF_START_BIG 4096 + +/* + * Starting size for metadata buffers. + */ +#define HBUF_START_SMALL 128 + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +lowdown_render(const struct lowdown_opts *opts, + struct lowdown_buf *ob, const struct lowdown_node *n) +{ + void *rndr; + int c = 0; + + switch (opts == NULL ? LOWDOWN_HTML : opts->type) { + case LOWDOWN_GEMINI: + if ((rndr = lowdown_gemini_new(opts)) == NULL) + return 0; + c = lowdown_gemini_rndr(ob, rndr, n); + lowdown_gemini_free(rndr); + break; + case LOWDOWN_HTML: + if ((rndr = lowdown_html_new(opts)) == NULL) + return 0; + c = lowdown_html_rndr(ob, rndr, n); + lowdown_html_free(rndr); + break; + case LOWDOWN_LATEX: + if ((rndr = lowdown_latex_new(opts)) == NULL) + return 0; + c = lowdown_latex_rndr(ob, rndr, n); + lowdown_latex_free(rndr); + break; + case LOWDOWN_MAN: + case LOWDOWN_NROFF: + if ((rndr = lowdown_nroff_new(opts)) == NULL) + return 0; + c = lowdown_nroff_rndr(ob, rndr, n); + lowdown_nroff_free(rndr); + break; + case LOWDOWN_FODT: + if ((rndr = lowdown_odt_new(opts)) == NULL) + return 0; + c = lowdown_odt_rndr(ob, rndr, n); + lowdown_odt_free(rndr); + break; + case LOWDOWN_TERM: + if ((rndr = lowdown_term_new(opts)) == NULL) + return 0; + c = lowdown_term_rndr(ob, rndr, n); + lowdown_term_free(rndr); + break; + case LOWDOWN_TREE: + c = lowdown_tree_rndr(ob, n); + break; + default: + c = 1; + break; + } + + return c; +} + +int +lowdown_buf(const struct lowdown_opts *opts, + const char *data, size_t datasz, + char **res, size_t *rsz, + struct lowdown_metaq *metaq) +{ + struct lowdown_buf *ob = NULL; + struct lowdown_doc *doc; + size_t maxn; + enum lowdown_type t; + struct lowdown_node *n = NULL; + int rc = 0; + + t = opts == NULL ? LOWDOWN_HTML : opts->type; + + if ((doc = lowdown_doc_new(opts)) == NULL) + goto err; + + n = lowdown_doc_parse(doc, &maxn, data, datasz, metaq); + if (n == NULL) + goto err; + assert(n->type == LOWDOWN_ROOT); + + if (opts != NULL && (opts->oflags & LOWDOWN_SMARTY)) + if (!smarty(n, maxn, t)) + goto err; + + if ((ob = lowdown_buf_new(HBUF_START_BIG)) == NULL) + goto err; + + if (!lowdown_render(opts, ob, n)) + goto err; + + *res = ob->data; + *rsz = ob->size; + ob->data = NULL; + rc = 1; +err: + lowdown_buf_free(ob); + lowdown_node_free(n); + lowdown_doc_free(doc); + return rc; +} + +int +lowdown_buf_diff(const struct lowdown_opts *opts, + const char *new, size_t newsz, + const char *old, size_t oldsz, + char **res, size_t *rsz) +{ + struct lowdown_buf *ob = NULL; + struct lowdown_doc *doc = NULL; + enum lowdown_type t; + struct lowdown_node *nnew = NULL, *nold = NULL, + *ndiff = NULL; + size_t maxn; + int rc = 0; + + t = opts == NULL ? LOWDOWN_HTML : opts->type; + + if ((doc = lowdown_doc_new(opts)) == NULL) + goto err; + + nnew = lowdown_doc_parse(doc, NULL, new, newsz, NULL); + if (nnew == NULL) + goto err; + nold = lowdown_doc_parse(doc, NULL, old, oldsz, NULL); + if (nold == NULL) + goto err; + + ndiff = lowdown_diff(nold, nnew, &maxn); + + if (opts != NULL && (opts->oflags & LOWDOWN_SMARTY)) + if (!smarty(ndiff, maxn, t)) + goto err; + + if ((ob = lowdown_buf_new(HBUF_START_BIG)) == NULL) + goto err; + + if (!lowdown_render(opts, ob, ndiff)) + goto err; + + *res = ob->data; + *rsz = ob->size; + ob->data = NULL; + rc = 1; +err: + lowdown_buf_free(ob); + lowdown_node_free(ndiff); + lowdown_node_free(nnew); + lowdown_node_free(nold); + lowdown_doc_free(doc); + return rc; +} + +int +lowdown_file(const struct lowdown_opts *opts, FILE *fin, + char **res, size_t *rsz, struct lowdown_metaq *metaq) +{ + struct lowdown_buf *bin = NULL; + int rc = 0; + + if ((bin = lowdown_buf_new(HBUF_START_BIG)) == NULL) + goto out; + if (!hbuf_putf(bin, fin)) + goto out; + + if (!lowdown_buf(opts, + bin->data, bin->size, res, rsz, metaq)) + goto out; + rc = 1; +out: + lowdown_buf_free(bin); + return rc; +} + +int +lowdown_file_diff(const struct lowdown_opts *opts, + FILE *fnew, FILE *fold, char **res, size_t *rsz) +{ + struct lowdown_buf *bnew = NULL, *bold = NULL; + int rc = 0; + + if ((bnew = lowdown_buf_new(HBUF_START_BIG)) == NULL) + goto out; + if ((bold = lowdown_buf_new(HBUF_START_BIG)) == NULL) + goto out; + if (!hbuf_putf(bold, fold)) + goto out; + if (!hbuf_putf(bnew, fnew)) + goto out; + + if (!lowdown_buf_diff(opts, + bnew->data, bnew->size, + bold->data, bold->size, + res, rsz)) + goto out; + rc = 1; +out: + lowdown_buf_free(bnew); + lowdown_buf_free(bold); + return rc; +} + diff --git a/library.o b/library.o Binary files differ. diff --git a/lowdown b/lowdown Binary files differ. diff --git a/lowdown-diff b/lowdown-diff Binary files differ. diff --git a/lowdown.h b/lowdown.h @@ -0,0 +1,423 @@ +/* $Id$ */ +/* + * Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LOWDOWN_H +#define LOWDOWN_H + +/* + * All of this is documented in lowdown.3. + * If it's not documented, don't use it. + * Or report it as a bug. + */ + +/* We need this for compilation on musl systems. */ + +#ifndef __BEGIN_DECLS +# ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# else +# define __BEGIN_DECLS +# endif +#endif +#ifndef __END_DECLS +# ifdef __cplusplus +# define __END_DECLS } +# else +# define __END_DECLS +# endif +#endif + +enum lowdown_type { + LOWDOWN_GEMINI, + LOWDOWN_HTML, + LOWDOWN_LATEX, + LOWDOWN_MAN, + LOWDOWN_NROFF, + LOWDOWN_FODT, + LOWDOWN_TERM, + LOWDOWN_TREE, + LOWDOWN_NULL +}; + +/* + * All types of Markdown nodes that lowdown understands. + */ +enum lowdown_rndrt { + LOWDOWN_ROOT, + LOWDOWN_BLOCKCODE, + LOWDOWN_BLOCKQUOTE, + LOWDOWN_DEFINITION, + LOWDOWN_DEFINITION_TITLE, + LOWDOWN_DEFINITION_DATA, + LOWDOWN_HEADER, + LOWDOWN_HRULE, + LOWDOWN_LIST, + LOWDOWN_LISTITEM, + LOWDOWN_PARAGRAPH, + LOWDOWN_TABLE_BLOCK, + LOWDOWN_TABLE_HEADER, + LOWDOWN_TABLE_BODY, + LOWDOWN_TABLE_ROW, + LOWDOWN_TABLE_CELL, + LOWDOWN_BLOCKHTML, + LOWDOWN_LINK_AUTO, + LOWDOWN_CODESPAN, + LOWDOWN_DOUBLE_EMPHASIS, + LOWDOWN_EMPHASIS, + LOWDOWN_HIGHLIGHT, + LOWDOWN_IMAGE, + LOWDOWN_LINEBREAK, + LOWDOWN_LINK, + LOWDOWN_TRIPLE_EMPHASIS, + LOWDOWN_STRIKETHROUGH, + LOWDOWN_SUPERSCRIPT, + LOWDOWN_FOOTNOTE, + LOWDOWN_MATH_BLOCK, + LOWDOWN_RAW_HTML, + LOWDOWN_ENTITY, + LOWDOWN_NORMAL_TEXT, + LOWDOWN_DOC_HEADER, + LOWDOWN_META, + LOWDOWN__MAX +}; + +struct lowdown_buf { + char *data; /* actual character data */ + size_t size; /* size of the string */ + size_t maxsize; /* allocated size (0 = volatile) */ + size_t unit; /* realloc unit size (0 = read-only) */ + int buffer_free; /* obj should be freed */ +}; + +TAILQ_HEAD(lowdown_nodeq, lowdown_node); + +enum htbl_flags { + HTBL_FL_ALIGN_LEFT = 1, + HTBL_FL_ALIGN_RIGHT = 2, + HTBL_FL_ALIGN_CENTER = 3, + HTBL_FL_ALIGNMASK = 3, + HTBL_FL_HEADER = 4 +}; + +enum halink_type { + HALINK_NONE, /* used internally when it is not an autolink */ + HALINK_NORMAL, + HALINK_EMAIL +}; + +enum hlist_fl { + HLIST_FL_ORDERED = (1 << 0), /* <ol> list item */ + HLIST_FL_BLOCK = (1 << 1), /* <li> containing block data */ + HLIST_FL_UNORDERED = (1 << 2), /* <ul> list item */ + HLIST_FL_DEF = (1 << 3), /* <dl> list item */ + HLIST_FL_CHECKED = (1 << 4), /* <li> with checked box */ + HLIST_FL_UNCHECKED = (1 << 5), /* <li> with unchecked box */ +}; + +/* + * Meta-data keys and values. + * Both of these are non-NULL (but possibly empty). + */ +struct lowdown_meta { + char *key; + char *value; + TAILQ_ENTRY(lowdown_meta) entries; +}; + +TAILQ_HEAD(lowdown_metaq, lowdown_meta); + +enum lowdown_chng { + LOWDOWN_CHNG_NONE = 0, + LOWDOWN_CHNG_INSERT, + LOWDOWN_CHNG_DELETE, +}; + +struct rndr_meta { + struct lowdown_buf key; +}; + +struct rndr_paragraph { + size_t lines; + int beoln; +}; + +struct rndr_normal_text { + struct lowdown_buf text; +}; + +struct rndr_entity { + struct lowdown_buf text; +}; + +struct rndr_autolink { + struct lowdown_buf link; + enum halink_type type; +}; + +struct rndr_raw_html { + struct lowdown_buf text; +}; + +struct rndr_link { + struct lowdown_buf link; + struct lowdown_buf title; + struct lowdown_buf attr_cls; + struct lowdown_buf attr_id; +}; + +struct rndr_blockcode { + struct lowdown_buf text; + struct lowdown_buf lang; +}; + +struct rndr_definition { + enum hlist_fl flags; +}; + +struct rndr_codespan { + struct lowdown_buf text; +}; + +struct rndr_table{ + size_t columns; +}; + +struct rndr_table_header { + enum htbl_flags *flags; + size_t columns; +}; + +struct rndr_table_cell { + enum htbl_flags flags; + size_t col; + size_t columns; +}; + +struct rndr_blockhtml { + struct lowdown_buf text; +}; + +struct rndr_list { + enum hlist_fl flags; + size_t start; +}; + +struct rndr_listitem { + enum hlist_fl flags; + size_t num; +}; + +struct rndr_header{ + size_t level; + struct lowdown_buf attr_cls; + struct lowdown_buf attr_id; +}; + +struct rndr_image { + struct lowdown_buf link; + struct lowdown_buf title; + struct lowdown_buf dims; + struct lowdown_buf alt; + struct lowdown_buf attr_width; + struct lowdown_buf attr_height; + struct lowdown_buf attr_cls; + struct lowdown_buf attr_id; +}; + +struct rndr_math { + struct lowdown_buf text; + int blockmode; +}; + +/* + * Node parsed from input document. + * Each node is part of the parse tree. + */ +struct lowdown_node { + enum lowdown_rndrt type; + enum lowdown_chng chng; /* change type */ + size_t id; /* unique identifier */ + union { + struct rndr_meta rndr_meta; + struct rndr_list rndr_list; + struct rndr_paragraph rndr_paragraph; + struct rndr_listitem rndr_listitem; + struct rndr_header rndr_header; + struct rndr_normal_text rndr_normal_text; + struct rndr_entity rndr_entity; + struct rndr_autolink rndr_autolink; + struct rndr_raw_html rndr_raw_html; + struct rndr_link rndr_link; + struct rndr_blockcode rndr_blockcode; + struct rndr_definition rndr_definition; + struct rndr_codespan rndr_codespan; + struct rndr_table rndr_table; + struct rndr_table_header rndr_table_header; + struct rndr_table_cell rndr_table_cell; + struct rndr_image rndr_image; + struct rndr_math rndr_math; + struct rndr_blockhtml rndr_blockhtml; + }; + struct lowdown_node *parent; + struct lowdown_nodeq children; + TAILQ_ENTRY(lowdown_node) entries; +}; + +struct lowdown_opts_odt { + const char *sty; +}; + +struct lowdown_opts { + enum lowdown_type type; + union { + struct lowdown_opts_odt odt; + }; + size_t maxdepth; + size_t cols; + size_t hmargin; + size_t vmargin; + unsigned int feat; +#define LOWDOWN_TABLES 0x01 +#define LOWDOWN_FENCED 0x02 +#define LOWDOWN_FOOTNOTES 0x04 +#define LOWDOWN_AUTOLINK 0x08 +#define LOWDOWN_STRIKE 0x10 +/* Omitted 0x20 */ +#define LOWDOWN_HILITE 0x40 +/* Omitted 0x80 */ +#define LOWDOWN_SUPER 0x100 +#define LOWDOWN_MATH 0x200 +#define LOWDOWN_NOINTEM 0x400 +/* Disabled LOWDOWN_MATHEXP 0x1000 */ +#define LOWDOWN_NOCODEIND 0x2000 +#define LOWDOWN_METADATA 0x4000 +#define LOWDOWN_COMMONMARK 0x8000 +#define LOWDOWN_DEFLIST 0x10000 +#define LOWDOWN_IMG_EXT 0x20000 /* -> LOWDOWN_ATTRS */ +#define LOWDOWN_TASKLIST 0x40000 +#define LOWDOWN_ATTRS 0x80000 + unsigned int oflags; +#define LOWDOWN_GEMINI_LINK_END 0x8000 /* links at end */ +#define LOWDOWN_GEMINI_LINK_IN 0x10000 /* links inline */ +#define LOWDOWN_GEMINI_LINK_NOREF 0x200000 /* for !inline, no names */ +#define LOWDOWN_GEMINI_LINK_ROMAN 0x400000 /* roman link names */ +#define LOWDOWN_HTML_NUM_ENT 0x1000 /* use &#nn; if possible */ +#define LOWDOWN_HTML_OWASP 0x800 /* use OWASP escaping */ +#define LOWDOWN_ODT_SKIP_HTML 0x2000000 /* skip all HTML */ +#define LOWDOWN_SMARTY 0x40 /* smart typography */ +#define LOWDOWN_TERM_NOANSI 0x1000000 /* no ANSI escapes at all */ +#define LOWDOWN_TERM_NOCOLOUR 0x800000 /* no ANSI colours */ +#define LOWDOWN_GEMINI_METADATA 0x100000 /* show metadata */ +#define LOWDOWN_HTML_ESCAPE 0x02 /* escape HTML (if not skip) */ +#define LOWDOWN_HTML_HARD_WRAP 0x04 /* paragraph line breaks */ +#define LOWDOWN_HTML_HEAD_IDS 0x100 /* <hN id="the_name"> */ +#define LOWDOWN_HTML_SKIP_HTML 0x01 /* skip all HTML */ +#define LOWDOWN_LATEX_NUMBERED 0x4000 /* numbered sections */ +#define LOWDOWN_LATEX_SKIP_HTML 0x2000 /* skip all HTML */ +#define LOWDOWN_NROFF_GROFF 0x20 /* use groff extensions */ +/* Disable LOWDOWN_NROFF_HARD_WRAP 0x10 */ +#define LOWDOWN_NROFF_NOLINK 0x80000 /* don't show URLs */ +#define LOWDOWN_NROFF_NUMBERED 0x80 /* numbered section headers */ +#define LOWDOWN_NROFF_SHORTLINK 0x40000 /* shorten URLs */ +#define LOWDOWN_NROFF_SKIP_HTML 0x08 /* skip all HTML */ +#define LOWDOWN_STANDALONE 0x200 /* emit complete document */ +#define LOWDOWN_TERM_NOLINK 0x20000 /* don't show URLs */ +#define LOWDOWN_TERM_SHORTLINK 0x400 /* shorten URLs */ + char **meta; + size_t metasz; + char **metaovr; + size_t metaovrsz; +}; + +struct lowdown_doc; + +__BEGIN_DECLS + +/* + * High-level functions. + * These use the "lowdown_opts" to determine how to parse and render + * content, and extract that content from a buffer, file, or descriptor. + */ +int lowdown_buf(const struct lowdown_opts *, + const char *, size_t, + char **, size_t *, struct lowdown_metaq *); +int lowdown_buf_diff(const struct lowdown_opts *, + const char *, size_t, const char *, size_t, + char **, size_t *); +int lowdown_file(const struct lowdown_opts *, + FILE *, char **, size_t *, struct lowdown_metaq *); +int lowdown_file_diff(const struct lowdown_opts *, FILE *, + FILE *, char **, size_t *); + +/* + * Low-level functions. + * These actually parse and render the AST from a buffer in various + * ways. + */ + +struct lowdown_buf + *lowdown_buf_new(size_t) __attribute__((malloc)); +void lowdown_buf_free(struct lowdown_buf *); + +struct lowdown_doc + *lowdown_doc_new(const struct lowdown_opts *); +struct lowdown_node + *lowdown_doc_parse(struct lowdown_doc *, size_t *, + const char *, size_t, struct lowdown_metaq *); +struct lowdown_node + *lowdown_diff(const struct lowdown_node *, + const struct lowdown_node *, size_t *); +void lowdown_doc_free(struct lowdown_doc *); +void lowdown_metaq_free(struct lowdown_metaq *); + +void lowdown_node_free(struct lowdown_node *); + +void lowdown_html_free(void *); +void *lowdown_html_new(const struct lowdown_opts *); +int lowdown_html_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +void lowdown_gemini_free(void *); +void *lowdown_gemini_new(const struct lowdown_opts *); +int lowdown_gemini_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +void lowdown_term_free(void *); +void *lowdown_term_new(const struct lowdown_opts *); +int lowdown_term_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +void lowdown_nroff_free(void *); +void *lowdown_nroff_new(const struct lowdown_opts *); +int lowdown_nroff_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +int lowdown_tree_rndr(struct lowdown_buf *, + const struct lowdown_node *); + +void lowdown_latex_free(void *); +void *lowdown_latex_new(const struct lowdown_opts *); +int lowdown_latex_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +void lowdown_odt_free(void *); +void *lowdown_odt_new(const struct lowdown_opts *); +int lowdown_odt_rndr(struct lowdown_buf *, void *, + const struct lowdown_node *); + +__END_DECLS + +#endif /* !LOWDOWN_H */ diff --git a/lowdown.in.pc b/lowdown.in.pc @@ -0,0 +1,13 @@ +prefix=@PREFIX@ +exec_prefix=${prefix} +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: lowdown +Description: simple markdown translator library +URL: https://kristaps.bsd.lv/lowdown +Version: @VERSION@ +Requires: +Libs.private: +Libs: -L${libdir} -llowdown -lm +Cflags: -I${includedir} diff --git a/lowdown.pc b/lowdown.pc @@ -0,0 +1,13 @@ +prefix=/usr/local +exec_prefix=${prefix} +libdir=/usr/local/lib +includedir=/usr/local/include + +Name: lowdown +Description: simple markdown translator library +URL: https://kristaps.bsd.lv/lowdown +Version: 1.0.0 +Requires: +Libs.private: +Libs: -L${libdir} -llowdown -lm +Cflags: -I${includedir} diff --git a/main.c b/main.c @@ -0,0 +1,606 @@ +/* $Id$ */ +/* + * Copyright (c) 2016, 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif +#include <sys/param.h> +#if HAVE_CAPSICUM +# include <sys/resource.h> +# include <sys/capsicum.h> +#endif +#include <sys/ioctl.h> +#include <sys/stat.h> + +#include <assert.h> +#if HAVE_ERR +# include <err.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <limits.h> /* INT_MAX */ +#include <locale.h> /* set_locale() */ +#if HAVE_SANDBOX_INIT +# include <sandbox.h> +#endif +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <termios.h> /* struct winsize */ +#include <unistd.h> + +#include "lowdown.h" + +/* + * Start with all of the sandboxes. + * The sandbox_pre() happens before we open our input file for reading, + * while the sandbox_post() happens afterward. + */ + +#if HAVE_PLEDGE + +static void +sandbox_post(int fdin, int fddin, int fdout) +{ + + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); +} + +static void +sandbox_pre(void) +{ + + if (pledge("stdio rpath wpath cpath", NULL) == -1) + err(1, "pledge"); +} + +#elif HAVE_SANDBOX_INIT + +static void +sandbox_post(int fdin, int fddin, int fdout) +{ + char *ep; + int rc; + + rc = sandbox_init + (kSBXProfilePureComputation, + SANDBOX_NAMED, &ep); + if (rc != 0) + errx(1, "sandbox_init: %s", ep); +} + +static void +sandbox_pre(void) +{ + + /* Do nothing. */ +} + +#elif HAVE_CAPSICUM + +static void +sandbox_post(int fdin, int fddin, int fdout) +{ + cap_rights_t rights; + + cap_rights_init(&rights); + + cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_FSTAT); + if (cap_rights_limit(fdin, &rights) < 0) + err(1, "cap_rights_limit"); + + if (fddin != -1) { + cap_rights_init(&rights, + CAP_EVENT, CAP_READ, CAP_FSTAT); + if (cap_rights_limit(fddin, &rights) < 0) + err(1, "cap_rights_limit"); + } + + cap_rights_init(&rights, CAP_EVENT, CAP_WRITE, CAP_FSTAT); + if (cap_rights_limit(STDERR_FILENO, &rights) < 0) + err(1, "cap_rights_limit"); + + cap_rights_init(&rights, CAP_EVENT, CAP_WRITE, CAP_FSTAT); + if (cap_rights_limit(fdout, &rights) < 0) + err(1, "cap_rights_limit"); + + if (cap_enter()) + err(1, "cap_enter"); +} + +static void +sandbox_pre(void) +{ + + /* Do nothing. */ +} + +#else /* No sandbox. */ + +#warning Compiling without sandbox support. + +static void +sandbox_post(int fdin, int fddin, int fdout) +{ + + /* Do nothing. */ +} + +static void +sandbox_pre(void) +{ + + /* Do nothing. */ +} + +#endif + +static size_t +get_columns(void) +{ + struct winsize size; + + memset(&size, 0, sizeof(struct winsize)); + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &size) == -1) + return 72; + return size.ws_col; +} + +/* + * Recognise the metadata format of "foo = bar" and "foo: bar". + * Translates from the former into the latter. + * This way "foo = : bar" -> "foo : : bar", etc. + * Errors out if the metadata is malformed (no colon or equal sign). + */ +static void +metadata_parse(char opt, char ***vals, size_t *valsz, const char *arg) +{ + const char *loceq, *loccol; + char *cp; + + loceq = strchr(arg, '='); + loccol = strchr(arg, ':'); + + if ((loceq != NULL && loccol == NULL) || + (loceq != NULL && loccol != NULL && loceq < loccol)) { + if (asprintf(&cp, "%.*s: %s\n", + (int)(loceq - arg), arg, loceq + 1) == -1) + err(1, NULL); + *vals = reallocarray(*vals, *valsz + 1, sizeof(char *)); + if (*vals == NULL) + err(1, NULL); + (*vals)[*valsz] = cp; + (*valsz)++; + return; + } + if ((loccol != NULL && loceq == NULL) || + (loccol != NULL && loceq != NULL && loccol < loceq)) { + if (asprintf(&cp, "%s\n", arg) == -1) + err(1, NULL); + *vals = reallocarray(*vals, *valsz + 1, sizeof(char *)); + if (*vals == NULL) + err(1, NULL); + (*vals)[*valsz] = cp; + (*valsz)++; + return; + } + errx(1, "-%c: malformed metadata", opt); +} + +int +main(int argc, char *argv[]) +{ + FILE *fin = stdin, *fout = stdout, + *din = NULL; + const char *fnin = "<stdin>", *fnout = NULL, + *fndin = NULL, *extract = NULL, *er, + *mainopts = "M:m:sT:t:o:X:", + *diffopts = "M:m:sT:t:o:", *odtstyfn = NULL; + struct lowdown_opts opts; + struct stat st; + int c, diff = 0, fd, + status = 0, aoflag = 0, roflag = 0, + aiflag = 0, riflag = 0, centre = 0; + char *ret = NULL, *cp, *odtsty = NULL; + size_t i, retsz = 0, rcols, sz; + ssize_t ssz; + struct lowdown_meta *m; + struct lowdown_metaq mq; + struct option lo[] = { + { "html-skiphtml", no_argument, &aoflag, LOWDOWN_HTML_SKIP_HTML }, + { "html-no-skiphtml", no_argument, &roflag, LOWDOWN_HTML_SKIP_HTML }, + { "html-escapehtml", no_argument, &aoflag, LOWDOWN_HTML_ESCAPE }, + { "html-no-escapehtml", no_argument, &roflag, LOWDOWN_HTML_ESCAPE }, + { "html-hardwrap", no_argument, &aoflag, LOWDOWN_HTML_HARD_WRAP }, + { "html-no-hardwrap", no_argument, &roflag, LOWDOWN_HTML_HARD_WRAP }, + { "html-head-ids", no_argument, &aoflag, LOWDOWN_HTML_HEAD_IDS }, + { "html-no-head-ids", no_argument, &roflag, LOWDOWN_HTML_HEAD_IDS }, + { "html-owasp", no_argument, &aoflag, LOWDOWN_HTML_OWASP }, + { "html-no-owasp", no_argument, &roflag, LOWDOWN_HTML_OWASP }, + { "html-num-ent", no_argument, &aoflag, LOWDOWN_HTML_NUM_ENT }, + { "html-no-num-ent", no_argument, &roflag, LOWDOWN_HTML_NUM_ENT }, + { "latex-numbered", no_argument, &aoflag, LOWDOWN_LATEX_NUMBERED }, + { "latex-no-numbered", no_argument, &roflag, LOWDOWN_LATEX_NUMBERED }, + { "latex-skiphtml", no_argument, &aoflag, LOWDOWN_LATEX_SKIP_HTML }, + { "latex-no-skiphtml", no_argument, &roflag, LOWDOWN_LATEX_SKIP_HTML }, + { "nroff-skiphtml", no_argument, &aoflag, LOWDOWN_NROFF_SKIP_HTML }, + { "nroff-no-skiphtml", no_argument, &roflag, LOWDOWN_NROFF_SKIP_HTML }, + { "nroff-groff", no_argument, &aoflag, LOWDOWN_NROFF_GROFF }, + { "nroff-no-groff", no_argument, &roflag, LOWDOWN_NROFF_GROFF }, + { "nroff-numbered", no_argument, &aoflag, LOWDOWN_NROFF_NUMBERED }, + { "nroff-no-numbered", no_argument, &roflag, LOWDOWN_NROFF_NUMBERED }, + { "nroff-shortlinks", no_argument, &aoflag, LOWDOWN_NROFF_SHORTLINK }, + { "nroff-no-shortlinks",no_argument, &roflag, LOWDOWN_NROFF_SHORTLINK }, + { "nroff-nolinks", no_argument, &aoflag, LOWDOWN_NROFF_NOLINK }, + { "nroff-no-nolinks", no_argument, &roflag, LOWDOWN_NROFF_NOLINK }, + { "odt-skiphtml", no_argument, &aoflag, LOWDOWN_ODT_SKIP_HTML }, + { "odt-no-skiphtml", no_argument, &roflag, LOWDOWN_ODT_SKIP_HTML }, + { "odt-style", required_argument, NULL, 6 }, + { "term-width", required_argument, NULL, 1 }, + { "term-hmargin", required_argument, NULL, 2 }, + { "term-vmargin", required_argument, NULL, 3 }, + { "term-columns", required_argument, NULL, 4 }, + { "gemini-link-end", no_argument, &aoflag, LOWDOWN_GEMINI_LINK_END }, + { "gemini-no-link-end", no_argument, &roflag, LOWDOWN_GEMINI_LINK_END }, + { "gemini-link-roman", no_argument, &aoflag, LOWDOWN_GEMINI_LINK_ROMAN }, + { "gemini-no-link-roman", no_argument, &roflag, LOWDOWN_GEMINI_LINK_ROMAN }, + { "gemini-link-noref", no_argument, &aoflag, LOWDOWN_GEMINI_LINK_NOREF }, + { "gemini-no-link-noref", no_argument, &roflag, LOWDOWN_GEMINI_LINK_NOREF }, + { "gemini-link-inline", no_argument, &aoflag, LOWDOWN_GEMINI_LINK_IN }, + { "gemini-no-link-inline",no_argument, &roflag, LOWDOWN_GEMINI_LINK_IN }, + { "gemini-metadata", no_argument, &aoflag, LOWDOWN_GEMINI_METADATA }, + { "gemini-no-metadata", no_argument, &roflag, LOWDOWN_GEMINI_METADATA }, + { "term-shortlinks", no_argument, &aoflag, LOWDOWN_TERM_SHORTLINK }, + { "term-no-shortlinks", no_argument, &roflag, LOWDOWN_TERM_SHORTLINK }, + { "term-nolinks", no_argument, &aoflag, LOWDOWN_TERM_NOLINK }, + { "term-no-nolinks", no_argument, &roflag, LOWDOWN_TERM_NOLINK }, + { "term-no-colour", no_argument, &aoflag, LOWDOWN_TERM_NOCOLOUR }, + { "term-colour", no_argument, &roflag, LOWDOWN_TERM_NOCOLOUR }, + { "term-no-ansi", no_argument, &aoflag, LOWDOWN_TERM_NOANSI }, + { "term-ansi", no_argument, &roflag, LOWDOWN_TERM_NOANSI }, + { "out-smarty", no_argument, &aoflag, LOWDOWN_SMARTY }, + { "out-no-smarty", no_argument, &roflag, LOWDOWN_SMARTY }, + { "out-standalone", no_argument, &aoflag, LOWDOWN_STANDALONE }, + { "out-no-standalone", no_argument, &roflag, LOWDOWN_STANDALONE }, + { "parse-hilite", no_argument, &aiflag, LOWDOWN_HILITE }, + { "parse-no-hilite", no_argument, &riflag, LOWDOWN_HILITE }, + { "parse-tables", no_argument, &aiflag, LOWDOWN_TABLES }, + { "parse-no-tables", no_argument, &riflag, LOWDOWN_TABLES }, + { "parse-fenced", no_argument, &aiflag, LOWDOWN_FENCED }, + { "parse-no-fenced", no_argument, &riflag, LOWDOWN_FENCED }, + { "parse-footnotes", no_argument, &aiflag, LOWDOWN_FOOTNOTES }, + { "parse-no-footnotes", no_argument, &riflag, LOWDOWN_FOOTNOTES }, + { "parse-autolink", no_argument, &aiflag, LOWDOWN_AUTOLINK }, + { "parse-no-autolink", no_argument, &riflag, LOWDOWN_AUTOLINK }, + { "parse-strike", no_argument, &aiflag, LOWDOWN_STRIKE }, + { "parse-no-strike", no_argument, &riflag, LOWDOWN_STRIKE }, + { "parse-super", no_argument, &aiflag, LOWDOWN_SUPER }, + { "parse-no-super", no_argument, &riflag, LOWDOWN_SUPER }, + { "parse-math", no_argument, &aiflag, LOWDOWN_MATH }, + { "parse-no-math", no_argument, &riflag, LOWDOWN_MATH }, + { "parse-codeindent", no_argument, &riflag, LOWDOWN_NOCODEIND }, + { "parse-no-codeindent",no_argument, &aiflag, LOWDOWN_NOCODEIND }, + { "parse-intraemph", no_argument, &riflag, LOWDOWN_NOINTEM }, + { "parse-no-intraemph", no_argument, &aiflag, LOWDOWN_NOINTEM }, + { "parse-metadata", no_argument, &aiflag, LOWDOWN_METADATA }, + { "parse-no-metadata", no_argument, &riflag, LOWDOWN_METADATA }, + { "parse-cmark", no_argument, &aiflag, LOWDOWN_COMMONMARK }, + { "parse-no-cmark", no_argument, &riflag, LOWDOWN_COMMONMARK }, + { "parse-deflists", no_argument, &aiflag, LOWDOWN_DEFLIST }, + { "parse-no-deflists", no_argument, &riflag, LOWDOWN_DEFLIST }, + { "parse-img-ext", no_argument, &aiflag, LOWDOWN_IMG_EXT }, /* TODO: remove */ + { "parse-no-img-ext", no_argument, &riflag, LOWDOWN_IMG_EXT }, /* TODO: remove */ + { "parse-ext-attrs", no_argument, &aiflag, LOWDOWN_ATTRS }, + { "parse-no-ext-attrs", no_argument, &riflag, LOWDOWN_ATTRS }, + { "parse-tasklists", no_argument, &aiflag, LOWDOWN_TASKLIST }, + { "parse-no-tasklists", no_argument, &riflag, LOWDOWN_TASKLIST }, + { "parse-maxdepth", required_argument, NULL, 5 }, + { NULL, 0, NULL, 0 } + }; + + /* Get the real number of columns or 72. */ + + rcols = get_columns(); + + sandbox_pre(); + + TAILQ_INIT(&mq); + memset(&opts, 0, sizeof(struct lowdown_opts)); + + opts.maxdepth = 128; + opts.type = LOWDOWN_HTML; + opts.feat = + LOWDOWN_ATTRS | + LOWDOWN_AUTOLINK | + LOWDOWN_COMMONMARK | + LOWDOWN_DEFLIST | + LOWDOWN_FENCED | + LOWDOWN_FOOTNOTES | + LOWDOWN_METADATA | + LOWDOWN_STRIKE | + LOWDOWN_SUPER | + LOWDOWN_TABLES | + LOWDOWN_TASKLIST; + opts.oflags = + LOWDOWN_HTML_ESCAPE | + LOWDOWN_HTML_HEAD_IDS | + LOWDOWN_HTML_NUM_ENT | + LOWDOWN_HTML_OWASP | + LOWDOWN_HTML_SKIP_HTML | + LOWDOWN_NROFF_GROFF | + LOWDOWN_NROFF_NUMBERED | + LOWDOWN_NROFF_SKIP_HTML | + LOWDOWN_ODT_SKIP_HTML | + LOWDOWN_LATEX_SKIP_HTML | + LOWDOWN_LATEX_NUMBERED | + LOWDOWN_SMARTY; + + if (strcasecmp(getprogname(), "lowdown-diff") == 0) + diff = 1; + + while ((c = getopt_long(argc, argv, + diff ? diffopts : mainopts, lo, NULL)) != -1) + switch (c) { + case 'M': + metadata_parse(c, &opts.metaovr, + &opts.metaovrsz, optarg); + break; + case 'm': + metadata_parse(c, &opts.meta, + &opts.metasz, optarg); + break; + case 'o': + fnout = optarg; + break; + case 's': + opts.oflags |= LOWDOWN_STANDALONE; + break; + case 't': + case 'T': + if (strcasecmp(optarg, "ms") == 0) + opts.type = LOWDOWN_NROFF; + else if (strcasecmp(optarg, "gemini") == 0) + opts.type = LOWDOWN_GEMINI; + else if (strcasecmp(optarg, "html") == 0) + opts.type = LOWDOWN_HTML; + else if (strcasecmp(optarg, "latex") == 0) + opts.type = LOWDOWN_LATEX; + else if (strcasecmp(optarg, "man") == 0) + opts.type = LOWDOWN_MAN; + else if (strcasecmp(optarg, "fodt") == 0) + opts.type = LOWDOWN_FODT; + else if (strcasecmp(optarg, "term") == 0) + opts.type = LOWDOWN_TERM; + else if (strcasecmp(optarg, "tree") == 0) + opts.type = LOWDOWN_TREE; + else if (strcasecmp(optarg, "null") == 0) + opts.type = LOWDOWN_NULL; + else + goto usage; + break; + case 'X': + extract = optarg; + break; + case 0: + if (roflag) + opts.oflags &= ~roflag; + if (aoflag) + opts.oflags |= aoflag; + if (riflag) + opts.feat &= ~riflag; + if (aiflag) + opts.feat |= aiflag; + break; + case 1: + opts.cols = strtonum(optarg, 0, INT_MAX, &er); + if (er == NULL) + break; + errx(1, "--term-width: %s", er); + case 2: + if (strcmp(optarg, "centre") == 0 || + strcmp(optarg, "centre") == 0) { + centre = 1; + break; + } + opts.hmargin = strtonum + (optarg, 0, INT_MAX, &er); + if (er == NULL) + break; + errx(1, "--term-hmargin: %s", er); + case 3: + opts.vmargin = strtonum(optarg, 0, INT_MAX, &er); + if (er == NULL) + break; + errx(1, "--term-vmargin: %s", er); + case 4: + rcols = strtonum(optarg, 1, INT_MAX, &er); + if (er == NULL) + break; + errx(1, "--term-columns: %s", er); + case 5: + opts.maxdepth = strtonum(optarg, 0, INT_MAX, &er); + if (er == NULL) + break; + errx(1, "--parse-maxdepth: %s", er); + case 6: + odtstyfn = optarg; + break; + default: + goto usage; + } + + argc -= optind; + argv += optind; + + if (opts.type == LOWDOWN_TERM || + opts.type == LOWDOWN_GEMINI) + setlocale(LC_CTYPE, ""); + + /* + * By default, try to show 80 columns. + * Don't show more than the number of available columns. + */ + + if (opts.cols == 0) { + if ((opts.cols = rcols) > 80) + opts.cols = 80; + } else if (opts.cols > rcols) + opts.cols = rcols; + + /* If we're centred, set our margins. */ + + if (centre && opts.cols < rcols) + opts.hmargin = (rcols - opts.cols) / 2; + + /* + * Diff mode takes two arguments: the first is mandatory (the + * old file) and the second (the new one) is optional. + * Non-diff mode takes an optional single argument. + */ + + if ((diff && (argc == 0 || argc > 2)) || (!diff && argc > 1)) + goto usage; + + if (diff) { + if (argc > 1 && strcmp(argv[1], "-")) { + fnin = argv[1]; + if ((fin = fopen(fnin, "r")) == NULL) + err(1, "%s", fnin); + } + fndin = argv[0]; + if ((din = fopen(fndin, "r")) == NULL) + err(1, "%s", fndin); + } else { + if (argc && strcmp(argv[0], "-")) { + fnin = argv[0]; + if ((fin = fopen(fnin, "r")) == NULL) + err(1, "%s", fnin); + } + } + + /* + * If we have a style sheet specified for -Tfodt, load it now + * before we drop privileges. + */ + + if (opts.type == LOWDOWN_FODT && odtstyfn != NULL) { + if ((fd = open(odtstyfn, O_RDONLY)) == -1) + err(1, "%s", odtstyfn); + if (fstat(fd, &st) == -1) + err(1, "%s", odtstyfn); + if ((uint64_t)st.st_size > SIZE_MAX - 1) + errx(1, "%s: file too long", odtstyfn); + sz = (size_t)st.st_size; + if ((odtsty = cp = malloc(sz + 1)) == NULL) + err(1, NULL); + while (sz > 0) { + if ((ssz = read(fd, cp, sz)) == -1) + err(1, "%s", odtstyfn); + if (ssz == 0) + errx(1, "%s: short file", odtstyfn); + sz -= (size_t)ssz; + cp += ssz; + } + *cp = '\0'; + close(fd); + opts.odt.sty = odtsty; + } + + /* Configure the output file. */ + + if (fnout != NULL && strcmp(fnout, "-") && + (fout = fopen(fnout, "w")) == NULL) + err(1, "%s", fnout); + + sandbox_post(fileno(fin), din == NULL ? + -1 : fileno(din), fileno(fout)); + + /* We're now completely sandboxed. */ + + /* Require metadata when extracting. */ + + if (extract) + opts.feat |= LOWDOWN_METADATA; + + /* + * Allow NO_COLOUR to dictate colours. + * This only works for -Tterm output when not in diff mode. + */ + + if (getenv("NO_COLOR") != NULL || + getenv("NO_COLOUR") != NULL) + opts.oflags |= LOWDOWN_TERM_NOCOLOUR; + + if (diff) { + opts.oflags &= ~LOWDOWN_TERM_NOCOLOUR; + if (!lowdown_file_diff + (&opts, fin, din, &ret, &retsz)) + errx(1, "%s: failed parse", fnin); + } else { + if (!lowdown_file(&opts, fin, &ret, &retsz, &mq)) + errx(1, "%s: failed parse", fnin); + } + + if (extract != NULL) { + assert(!diff); + TAILQ_FOREACH(m, &mq, entries) + if (strcasecmp(m->key, extract) == 0) + break; + if (m != NULL) { + fprintf(fout, "%s\n", m->value); + } else { + status = 1; + warnx("%s: unknown keyword", extract); + } + } else + fwrite(ret, 1, retsz, fout); + + free(ret); + free(odtsty); + + if (fout != stdout) + fclose(fout); + if (din != NULL) + fclose(din); + if (fin != stdin) + fclose(fin); + + for (i = 0; i < opts.metasz; i++) + free(opts.meta[i]); + for (i = 0; i < opts.metaovrsz; i++) + free(opts.metaovr[i]); + + free(opts.meta); + free(opts.metaovr); + + lowdown_metaq_free(&mq); + return status; +usage: + if (!diff) { + fprintf(stderr, + "usage: lowdown [-s] [input_options] [output_options] [-M metadata]\n" + " [-m metadata] [-o output] [-t mode] [-X keyword] [file]\n"); + } else + fprintf(stderr, + "usage: lowdown-diff [-s] [input_options] [output_options] [-M metadata]\n" + " [-m metadata] [-o output] [-t mode] oldfile [newfile]\n"); + return 1; +} diff --git a/main.o b/main.o Binary files differ. diff --git a/man/lowdown-diff.1 b/man/lowdown-diff.1 @@ -0,0 +1,738 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2016--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN-DIFF 1 +.Os +.Sh NAME +.Nm lowdown-diff +.Nd view differences in markdown files +.Sh SYNOPSIS +.Nm lowdown-diff +.Op input_options +.Op output_options +.Op Fl s +.Op Fl M Ar metadata +.Op Fl m Ar metadata +.Op Fl o Ar file +.Op Fl t Ar mode +.Ar oldfile +.Op Ar newfile +.Sh DESCRIPTION +Shows differences between +.Xr lowdown 5 +documents as formatted output. +Results are written to standard output. +.Pp +The arguments are as follows: +.Bl -tag -width Ds +.It Fl s +Standalone mode. +This emits a document envelope surrounding the output by drawing from +document metadata. +See +.Sx Metadata +on providing information to the document envelope. +This applies to +.Fl t Ns Ar gemini , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar ms , +.Fl t Ns Ar man , +and +.Fl t Ns Ar fodt . +.It Fl M Ar metadata +Provide a single metadata key-value pair. +This may be in the usual +.Xr lowdown 5 +colon-separated metadata format or instead separated by an equal sign, +depending upon which character comes first. +Exits with an error if given neither colon nor equal sign. +May be invoked multiple times for each pair. +This overrides +.Fl m +and what's parsed from the document. +.It Fl m Ar metadata +Like +.Fl M , +but is overridden by what's parsed the document, then +.Fl M . +.It Fl o Ar file +Send output to +.Ar file +unless it's +.Dq - , +in which case fall back to the default of standard output. +.It Fl t Ar mode , Fl T Ar mode +The output mode. +This may be +.Ar html +for HTML5 output, +.Ar latex +for LaTeX, +.Ar gemini +for the Gemini format, +.Ar ms +for roff output using the classic (i.e., no-extension) +.Fl ms +package and needing table support, +.Ar term +for ANSI-compatible UTF-8 terminal output, +.Ar man +for roff output using the classic +.Fl man +package, +.Ar tree , +to show the parse tree of the input document, and +.Ar null +to parse the document but do no rendering. +See +.Sx Output modes . +The +.Fl T Ar mode +form is retained for backward compatibility. +.It Ar oldfile , newfile +Markdown documents used for comparison. +If +.Ar newfile +is not given or +.Dq - , +it is read from standard input. +.El +.Pp +The following are options for input parsing. +These affect the parse tree passed to all outputs. +.Bl -tag -width Ds +.It Fl -parse-hilite +Enable highlight span support. +This are disabled by default because it may be erroneously interpreted +as section headers. +.It Fl -parse-math +Recognise mathematics equations. +.It Fl -parse-maxdepth=depth +The maximum depth of nested elements. +This defaults to 128, which is probably more than enough for any +real-world document. +If the maximum is hit, the system exits as if memory were exhausted. +Set to zero for no maximum. +.It Fl -parse-no-autolink +Do not parse +.Li http , +.Li https , +.Li ftp , +.Li mailto , +and relative links or link fragments. +.It Fl -parse-no-cmark +Do not parse with CommonMark constraints. +This also disables using the first ordered list value instead of +starting all lists at one. +.It Fl -parse-no-codeindent +Do not parse indented content as code blocks. +.It Fl -parse-no-deflists +Do not parse PHP extra definition lists. +.It Fl -parse-no-ext-attrs +Do not parse PHP extra extended attributes. +.It Fl -parse-no-fenced +Do not parse GFM fenced (language-specific) code blocks. +.It Fl -parse-no-footnotes +Do not parse MMD footnotes. +.It Fl -parse-no-img-ext +Deprecated. +See +.Fl -parse-no-ext-attrs . +.It Fl -parse-no-intraemph +Do not parse emphasis within words and links. +.It Fl -parse-no-metadata +Do not parse MMD metadata. +For the first paragraph to count as metadata, the first line must have +a colon in it. +This does not affect metadata given on +.Fl m +or +.Fl M . +.It Fl -parse-no-strike +Do not parse strikethroughs. +.It Fl -parse-no-super +Do not parse super-scripts. +.It Fl -parse-no-tables +Do not parse GFM tables. +.It Fl -parse-no-tasklists +Do not parse GFM task lists. +.El +.Pp +There are many output options. +The following are shared by all output modes: +.Bl -tag -width Ds +.It Fl -out-standalone +Alias for +.Fl s . +.It Fl -out-no-smarty +Do not use the smart typography filter. +By default, certain character sequences are translated into +output-specific glyphs. +.El +.Pp +What follows are per-output options. +For HTML with +.Fl t Ns Ar html , +these are as follows: +.Bl -tag -width Ds +.It Fl -html-hardwrap +Hard-wrap paragraph content by outputting line breaks where applicable. +.It Fl -html-no-escapehtml +If +.Fl -html-no-skiphtml +has been specified, this causes embedded HTML not to be escaped, and is +instead output verbatim. +This has no effect if +.Fl -html-no-skiphtml +has not been specified. +.It Fl -html-no-head-ids +Do not output +.Li id +attributes for headers. +.It Fl -html-no-num-ent +Don't normalise HTML entities (when possible) as numeric ones and +instead use the entities as given on input. +.It Fl -html-no-owasp +Don't follow the OWASP recommendations for escaping text, and do only +the minimal escaping to make sure that regular content isn't interpreted +as HTML. +.It Fl -html-no-skiphtml +Output embedded HTML. +By default, embedded HTML is not output at all. +See +.Fl -html-no-escapehtml . +.El +.Pp +For both +.Fl t Ns Ar man +and +.Fl t Ns Ar ms , +the following apply: +.Bl -tag -width Ds +.It Fl -nroff-no-groff +Don't use +.Xr groff 1 +style section headings, PDF hyperlinks and multi-page tables (these further +require +.Fl t Ns Ar ms +mode and +.Fl m Ns Ar spdf +passed to +.Xr groff 1 ) , +or Unicode sequence syntax. +The output is compatible with traditional +.Xr troff 1 . +Applies to +.Fl t Ns Ar man +and +.Fl t Ns Ar ms . +.It Fl -nroff-no-numbered +Don't output numbered headings. +Only applies to +.Fl t Ns Ar ms . +.It Fl -nroff-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.It Fl -nroff-nolinks +Don't show URLs for images and links (autolinks are still shown). +.Pq Link content is still shown. +Overrides +.Fl -nroff-shortlinks +for images and links. +Applies to +.Fl t Ns Ar man +or when +.Fl nroff-no-groff +is specified. +.It Fl -nroff-shortlinks +Shorten URLs for images, links, and autolinks to only the domain name +and final path. +Applies to +.Fl t Ns Ar man +or when +.Fl nroff-no-groff +is specified. +.El +.Pp +The +.Fl t Ns Ar term +output has the following: +.Bl -tag -width Ds +.It Fl -term-columns=columns +The number of columns in the screen. +Useful for when running in a pipe. +Defaults to what the terminal reports or 72 if in a pipe. +.It Fl -term-hmargin=margin +The number of left margin spaces. +Truncated to the number of columns. +Defaults to zero. +.It Fl -term-no-ansi +Don't show ANSI styles at all. +This implies +.Fl -term-no-colour . +.It Fl -term-no-colour +Don't show ANSI colours. +This will still decorate text with underlines, bolds, and italics, but +not emit any colour codes. +.It Fl -term-nolinks +Don't show URLs for images and links (autolinks are still shown). +.Pq Link content is still shown. +Overrides +.Fl -term-shortlinks +for images and links. +.It Fl -term-shortlinks +Shorten URLs for images, links, and autolinks to only the domain name +and final path. +.It Fl -term-vmargin=margin +The number of top and bottom margin newlines. +Defaults to zero. +.It Fl -term-width=width +Set the soft limit on the number of characters per line. +This may be exceeded by literal text. +The default (or if zero) is the number of terminal columns or 80 at +most. +.El +.Pp +The +.Fl t Ns Ar gemini +output has several flags that control the placement of links. +By default, links (images, autolinks, and links) are queued when +specified in-line then emitted in a block sequence after the nearest +block element. +.Bl -tag -width Ds +.It Fl -gemini-link-end +Emit the queue of links at the end of the document instead of after the +nearest block element. +.It Fl -gemini-link-inline +Render all links within the flow of text. +This will cause breakage when nested links, such as images within links, +links in blockquotes, etc. +It should not be used unless in carefully crafted documents. +.It Fl -gemini-link-noref +Do not format link labels. +Takes precedence over +.Fl -gemini-link-roman . +.It Fl -gemini-link-roman +When formatting link labels, use lower-case Roman numerals instead of the +default lower-case hexavigesimal (i.e., +.Dq a , +.Dq b , +\&..., +.Dq aa , +.Dq ab , +\&...). +.It Fl -gemini-metadata +Print metadata as the canonicalised key followed by a colon then the +value, each on one line (newlines replaced by spaces). +The metadata block is terminated by a double newline. +If there is no metadata, this does nothing. +.El +.Pp +The +.Fl t Ns Ar latex +output has the following options: +.Bl -tag -width Ds +.It Fl -latex-no-numbered +Don't number sections (and subsections, etc.). +.It Fl -latex-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.El +.Pp +The +.Fl t Ns Ar fodt +output has the following options: +.Bl -tag -width Ds +.It Fl -odt-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.It Fl -odt-style Ns = Ns Ar file +Specify an OpenDocument style file, which must consist of at least +.Li <office:font-face-decls> , +.Li <office:scripts> , +and +.Li <office:styles> +XML elements in the root of the document. +This is not syntax-checked in any way. +.El +.Ss Output modes +The output media is specified by +.Fl t , +which defaults to +.Fl t Ns Ar html . +.Bl -tag -width Ds +.It Fl t Ns Ar fodt +.Dq Flat +OpenDocument output. +Automatic styles (those conditional upon document state) are generated +with output. +Classes specified by PHP extended attributes are not checked for +existence. +Differences are rendered using document tracking. +.It Fl t Ns Ar gemini +Gemini protocol output. +This output mode is experimental. +Differences are not currently rendered. +.It Fl t Ns Ar html +HTML5 output with UTF-8 encoding. +Differences are rendered using the +.Li <ins> +and +.Li <del> +elements. +.It Fl t Ns Ar latex +Simple LaTeX output. +The following packages are required: +.Li amsmath +and +.Li amssymb +for maths, +.Li graphicx +for images, +.Li inputenc Pq utf8 +for UTF-8 input, +.Li fontend Pq T1 +and +.Li textcomp +for output glyphs, +.Li lmodern +for Latin modern font, +.Li xcolor +for the difference engine output, and +.Li hyperref +for links. +Differences are rendered by colouring in blue (insert) and red (delete) +(this format is not fixed). +.It Fl t Ns Ar man +The +.Ar man +macro package suitable for reading by +.Xr groff 1 , +.Xr mandoc 1 , +or traditional +.Xr troff 1 . +Does not support equations and images. +Table support is provided by +.Xr tbl 1 . +Since UTF-8 may be passed as input values, +.Xr preconv 1 +may need to be used. +Differences are rendered by colouring in blue (insert) and red (delete) +(this format is not fixed). +.It Fl t Ns Ar ms +The +.Ar ms +macro package suitable for reading by +.Xr groff 1 +or traditional +.Xr troff 1 . +Does not support equations and limited image support for encapsulated +postscript (PS and EPS suffix) images. +Images are always block-formatted. +Image dimensions and extended attributes are ignored, though images are +downsized if larger than the current text width. +Table support is provided by +.Xr tbl 1 . +Since UTF-8 may be passed as input values, +.Xr preconv 1 +may need to be used. +Differences are rendered by colouring in blue (insert) and red (delete) +(this format is not fixed). +.It Fl t Ns Ar term +ANSI-escaped UTF-8 output suitable for reading on the terminal. +Images and equations not supported. +Differences are rendered by background-colouring in blue (insert) and +red (delete) (this format is not fixed). +.It Fl t Ns Ar tree +Debugging output: not for general use. +.El +.Ss Standalone documents +When +.Fl s +is specified, additional content may be added to output: +.Bl -tag -width Ds +.It Fl t Ns Ar fodt +Envelope +.Li <office:document> +and prologue +.Li <office:automatic-styles> , +.Li <office:master-styles> , +and +.Li <office:body> . +.It Fl t Ns Ar html +Envelope +.Li <html> +and prologue +.Li <head> . +.It Fl t Ns Ar latex +Prologue +.Li documentclass +and +.Li usepackage +statements, and surrounding +.Li begin{document} +statements. +.It Fl t Ns Ar man , Fl t Ns Ar ms +Prologue macros. +.El +.Pp +If parsed from the document or as given by +.Fl m +or +.Fl M , +the following metadata keys are used by additional content. +The metadata keys are canonicalised in lowercase and without spaces. +.Pp +Metadata values should not be encoded in their output format, e.g., +.Dq css: foo&bar . +The renderer will perform any necessary output encoding. +.Bl -tag -width Ds +.It Li affiliation +Author affiliation (organisation or institution). +Multiple affiliations may be separated by two or more spaces (including +newlines). +Used in +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +and +.Fl t Ns Ar ms . +.It Li author +Document author. +Multiple authors may be separated by two or more spaces (including +newlines). +Overridden by +.Li rcsauthor . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +and +.Fl t Ns Ar ms . +.It Li baseheaderlevel +Added to each header level. +Deprecated in favour of +.Li shiftheadinglevelby . +.It Li copyright +A document copyright (without the word +.Dq Copyright ) , +for example, +.Dq 2017, Kristaps Dzonsons . +Used in +.Fl t Ns Ar ms +and +.Fl t Ns Ar html . +.It Li css +A CSS file included in the HTML5 document head. +Multiple CSS files (in order) may be separated by two or more spaces +(including newlines). +Only used in +.Fl t Ns Ar html . +.It Li date +Document date in ISO-8601 YYYY-MM-DD format. +Overridden by +.Li rcsdate . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.It Li javascript +A JavaScript file included in the HTML5 document head. +Multiple script files (in order) may be separated by two or more spaces +(including newlines). +Only used in +.Fl t Ns Ar html . +.It Li rcsauthor +Like +.Li author , +but in RCS author format. +Overrides +.Li author . +.It Li rcsdate +Like +.Li date , +but in RCS date format. +Overrides +.Li date . +.It Li section +Man page section, defaulting to +.Dq 7 . +Only used in +.Fl t Ns Ar man . +.It Li shiftheadinglevelby +Shift all headers by the given number. +For example, a value of 1 causes headers originally at level 1 +.Pq Dq <h1> +to be level 2 +.Pq Dq <h2> , +while a value of -1 moves level 2 to 1. +Levels will not move to less than 1. +Takes precedence over +.Li baseheaderlevel . +If unset or not a valid number, defaults to zero. +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.It Li source +Man page source (organisation providing the manual). +Only used in +.Fl t Ns Ar man . +.It Li volume +Man page volume (describes the manual page section). +Only used in +.Fl t Ns Ar man . +.It Li title +Document title, defaulting to +.Dq Untitled article . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.El +.Pp +Metadata values are parsed and may be used as variables in markdown +documents regardless of whether +.Fl s +is specified or not. +.Pp +Default values, such +.Dq Untitled article +for the +.Li title , +are not set as metadata values, and will not appear if the metadata key +is used as a variable. +.Pp +Differences in additional content metadata are rendered differently than +in the document body: deleted metadata key-value pairs are not processed +in the output, so only inserted or retained metadata are processed. +.Pp +In formats where metadata are part of the document body, such as +.Fl t Ns Ar term +and +.Fl t Ns Ar tree , +all metadata are shown as if in the document body. +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev NO_COLOR +Do not emit colours when in +.Fl t Ns Ar term +mode. +Synonym for +.Ev NO_COLOUR . +Same as +.Fl -term-nocolour . +.El +.Sh FILES +.Bl -tag -width Ds +.It Pa share/odt/styles.xml +Default styles used when generating standalone +.Fl t Ns Ar fodt +documents. +Template for +.Fl -odt-style +styles. +.El +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +To view Markdown differences on an ANSI-compatible, UTF-8 terminal: +.Pp +.Dl lowdown-diff -tterm old.md new.md | less -R +.Pp +The terminal may also be used with +.Xr groff 1 +rendering: +.Bd -literal -offset indent +lowdown-diff -stms old.md new.md | \e + groff -itk -mspdf -Tutf8 | less -R +lowdown-diff -stman old.md new.md | \e + groff -itk -man -Tutf8 | less -R +.Ed +.Pp +To emit a standalone HTML5 document: +.Pp +.Dl lowdown-diff -s old.md new.md > foo.html +.Pp +To use +.Xr groff 1 +to format as a PS file: +.Bd -literal -offset indent +lowdown-diff -stms old.md new.md | \e + groff -itk -mspdf > foo.ps +.Ed +.Pp +Or with LaTeX: +.Bd -literal -offset indent +lowdown-diff -stlatex old.md new.md > foo.latex +pslatex foo.latex +.Ed +.Pp +PDF generation follows similar logic: +.Bd -literal -offset indent +lowdown-diff -stms old.md new.md | \e + pdfroff -itk -mspdf > foo.pdf +lowdown-diff -stlatex old.md new.md > foo.latex +pdflatex foo.latex +.Ed +.Pp +UTF-8 support for +.Xr groff 1 +PDF or PS output requires appropriate fonts, such as the Unicode Times +font. +This and other Unicode fonts are not always installed by default. +They may be found, for PDF output, in the +.Pa devpdf +set of the +.Xr groff 1 +font directory and are prefixed with +.Sq U . +.Bd -literal -offset indent +lowdown-diff -stms old.md new.md | \e + pdfroff -itk -mspdf -FU-T > foo.pdf +.Ed +.Sh SEE ALSO +.Xr lowdown 1 , +.Xr lowdown 3 , +.Xr lowdown 5 +.Sh AUTHORS +.Nm +was written by +.An Kristaps Dzonsons , +.Mt kristaps@bsd.lv . +.Sh CAVEATS +When viewing +.Fl t Ns Ar man +differences with mandoc, the marker colours are not rendered. +The +.Fl t Ns Ar gemini +output also currently has no way of encoding differences. diff --git a/man/lowdown.1 b/man/lowdown.1 @@ -0,0 +1,728 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2016--2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN 1 +.Os +.Sh NAME +.Nm lowdown +.Nd simple markdown translator +.Sh SYNOPSIS +.Nm lowdown +.Op input_options +.Op output_options +.Op Fl s +.Op Fl M Ar metadata +.Op Fl m Ar metadata +.Op Fl o Ar file +.Op Fl t Ar mode +.Op Fl X Ar keyword +.Op Ar file +.Sh DESCRIPTION +Translate from +.Xr lowdown 5 +into diverse output formats. +Results are written to standard output. +.Pp +The arguments are as follows: +.Bl -tag -width Ds +.It Fl s +Standalone mode. +This emits a document envelope surrounding the output by drawing from +document metadata. +See +.Sx Metadata +on providing information to the document envelope. +This applies to +.Fl t Ns Ar gemini , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar ms , +.Fl t Ns Ar man , +and +.Fl t Ns Ar fodt . +.It Fl M Ar metadata +Provide a single metadata key-value pair. +This may be in the usual +.Xr lowdown 5 +colon-separated metadata format or instead separated by an equal sign, +depending upon which character comes first. +Exits with an error if given neither colon nor equal sign. +May be invoked multiple times for each pair. +This overrides +.Fl m +and what's parsed from the document. +.It Fl m Ar metadata +Like +.Fl M , +but is overridden by what's parsed the document, then +.Fl M . +.It Fl o Ar file +Send output to +.Ar file +unless it's +.Dq - , +in which case fall back to the default of standard output. +.It Fl t Ar mode , Fl T Ar mode +The output mode. +This may be +.Ar html +for HTML5 output, +.Ar latex +for LaTeX, +.Ar gemini +for the Gemini format, +.Ar ms +for roff output using the classic (i.e., no-extension) +.Fl ms +package and needing table support, +.Ar term +for ANSI-compatible UTF-8 terminal output, +.Ar man +for roff output using the classic +.Fl man +package, +.Ar tree , +to show the parse tree of the input document, and +.Ar null +to parse the document but do no rendering. +See +.Sx Output modes . +The +.Fl T Ar mode +form is retained for backward compatibility. +.It Fl X Ar keyword +Output the metadata value of +.Ar keyword +or an empty string if not found. +The +.Fl t +mode is ignored. +.It Ar file +Input Markdown document. +If not given or if +.Ar file +is +.Dq - , +it is read from standard input. +.El +.Pp +The following are options for input parsing. +These affect the parse tree passed to all outputs. +.Bl -tag -width Ds +.It Fl -parse-hilite +Enable highlight span support. +This are disabled by default because it may be erroneously interpreted +as section headers. +.It Fl -parse-math +Recognise mathematics equations. +.It Fl -parse-maxdepth=depth +The maximum depth of nested elements. +This defaults to 128, which is probably more than enough for any +real-world document. +If the maximum is hit, the system exits as if memory were exhausted. +Set to zero for no maximum. +.It Fl -parse-no-autolink +Do not parse +.Li http , +.Li https , +.Li ftp , +.Li mailto , +and relative links or link fragments. +.It Fl -parse-no-cmark +Do not parse with CommonMark constraints. +This also disables using the first ordered list value instead of +starting all lists at one. +.It Fl -parse-no-codeindent +Do not parse indented content as code blocks. +.It Fl -parse-no-deflists +Do not parse PHP extra definition lists. +.It Fl -parse-no-ext-attrs +Do not parse PHP extra extended attributes. +.It Fl -parse-no-fenced +Do not parse GFM fenced (language-specific) code blocks. +.It Fl -parse-no-footnotes +Do not parse MMD footnotes. +.It Fl -parse-no-img-ext +Deprecated. +See +.Fl -parse-no-ext-attrs . +.It Fl -parse-no-intraemph +Do not parse emphasis within words and links. +.It Fl -parse-no-metadata +Do not parse MMD metadata. +For the first paragraph to count as metadata, the first line must have +a colon in it. +This does not affect metadata given on +.Fl m +or +.Fl M . +.It Fl -parse-no-strike +Do not parse strikethroughs. +.It Fl -parse-no-super +Do not parse super-scripts. +.It Fl -parse-no-tables +Do not parse GFM tables. +.It Fl -parse-no-tasklists +Do not parse GFM task lists. +.El +.Pp +There are many output options. +The following are shared by all output modes: +.Bl -tag -width Ds +.It Fl -out-standalone +Alias for +.Fl s . +.It Fl -out-no-smarty +Do not use the smart typography filter. +By default, certain character sequences are translated into +output-specific glyphs. +.El +.Pp +What follows are per-output options. +For HTML with +.Fl t Ns Ar html , +these are as follows: +.Bl -tag -width Ds +.It Fl -html-hardwrap +Hard-wrap paragraph content by outputting line breaks where applicable. +.It Fl -html-no-escapehtml +If +.Fl -html-no-skiphtml +has been specified, this causes embedded HTML not to be escaped, and is +instead output verbatim. +This has no effect if +.Fl -html-no-skiphtml +has not been specified. +.It Fl -html-no-head-ids +Do not output +.Li id +attributes for headers. +.It Fl -html-no-num-ent +Don't normalise HTML entities (when possible) as numeric ones and +instead use the entities as given on input. +.It Fl -html-no-owasp +Don't follow the OWASP recommendations for escaping text, and do only +the minimal escaping to make sure that regular content isn't interpreted +as HTML. +.It Fl -html-no-skiphtml +Output embedded HTML. +By default, embedded HTML is not output at all. +See +.Fl -html-no-escapehtml . +.El +.Pp +For both +.Fl t Ns Ar man +and +.Fl t Ns Ar ms , +the following apply: +.Bl -tag -width Ds +.It Fl -nroff-no-groff +Don't use +.Xr groff 1 +style section headings, PDF hyperlinks and multi-page tables (these further +require +.Fl t Ns Ar ms +mode and +.Fl m Ns Ar spdf +passed to +.Xr groff 1 ) , +or Unicode sequence syntax. +The output is compatible with traditional +.Xr troff 1 . +Applies to +.Fl t Ns Ar man +and +.Fl t Ns Ar ms . +.It Fl -nroff-no-numbered +Don't output numbered headings. +Only applies to +.Fl t Ns Ar ms . +.It Fl -nroff-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.It Fl -nroff-nolinks +Don't show URLs for images and links (autolinks are still shown). +.Pq Link content is still shown. +Overrides +.Fl -nroff-shortlinks +for images and links. +Applies to +.Fl t Ns Ar man +or when +.Fl nroff-no-groff +is specified. +.It Fl -nroff-shortlinks +Shorten URLs for images, links, and autolinks to only the domain name +and final path. +Applies to +.Fl t Ns Ar man +or when +.Fl nroff-no-groff +is specified. +.El +.Pp +The +.Fl t Ns Ar term +output has the following: +.Bl -tag -width Ds +.It Fl -term-columns=columns +The number of columns in the screen. +Useful for when running in a pipe. +Defaults to what the terminal reports or 72 if in a pipe. +.It Fl -term-hmargin=margin +The number of left margin spaces. +Truncated to the number of columns. +Defaults to zero. +.It Fl -term-no-ansi +Don't show ANSI styles at all. +This implies +.Fl -term-no-colour . +.It Fl -term-no-colour +Don't show ANSI colours. +This will still decorate text with underlines, bolds, and italics, but +not emit any colour codes. +.It Fl -term-nolinks +Don't show URLs for images and links (autolinks are still shown). +.Pq Link content is still shown. +Overrides +.Fl -term-shortlinks +for images and links. +.It Fl -term-shortlinks +Shorten URLs for images, links, and autolinks to only the domain name +and final path. +.It Fl -term-vmargin=margin +The number of top and bottom margin newlines. +Defaults to zero. +.It Fl -term-width=width +Set the soft limit on the number of characters per line. +This may be exceeded by literal text. +The default (or if zero) is the number of terminal columns or 80 at +most. +.El +.Pp +The +.Fl t Ns Ar gemini +output has several flags that control the placement of links. +By default, links (images, autolinks, and links) are queued when +specified in-line then emitted in a block sequence after the nearest +block element. +.Bl -tag -width Ds +.It Fl -gemini-link-end +Emit the queue of links at the end of the document instead of after the +nearest block element. +.It Fl -gemini-link-inline +Render all links within the flow of text. +This will cause breakage when nested links, such as images within links, +links in blockquotes, etc. +It should not be used unless in carefully crafted documents. +.It Fl -gemini-link-noref +Do not format link labels. +Takes precedence over +.Fl -gemini-link-roman . +.It Fl -gemini-link-roman +When formatting link labels, use lower-case Roman numerals instead of the +default lower-case hexavigesimal (i.e., +.Dq a , +.Dq b , +\&..., +.Dq aa , +.Dq ab , +\&...). +.It Fl -gemini-metadata +Print metadata as the canonicalised key followed by a colon then the +value, each on one line (newlines replaced by spaces). +The metadata block is terminated by a double newline. +If there is no metadata, this does nothing. +.El +.Pp +The +.Fl t Ns Ar latex +output has the following options: +.Bl -tag -width Ds +.It Fl -latex-no-numbered +Don't number sections (and subsections, etc.). +.It Fl -latex-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.El +.Pp +The +.Fl t Ns Ar fodt +output has the following options: +.Bl -tag -width Ds +.It Fl -odt-no-skiphtml +Output embedded HTML. +This usually doesn't make sense because the HTML won't be interpreted by +the output reader. +By default, HTML is omitted. +.It Fl -odt-style Ns = Ns Ar file +Specify an OpenDocument style file, which must consist of at least +.Li <office:font-face-decls> , +.Li <office:scripts> , +and +.Li <office:styles> +XML elements in the root of the document. +This is not syntax-checked in any way. +.El +.Ss Output modes +The output media is specified by +.Fl t , +which defaults to +.Fl t Ns Ar html . +.Bl -tag -width Ds +.It Fl t Ns Ar fodt +.Dq Flat +OpenDocument output. +Automatic styles (those conditional upon document state) are generated +with output. +Classes specified by PHP extended attributes are not checked for +existence. +.It Fl t Ns Ar gemini +Gemini protocol output. +This output mode is experimental. +.It Fl t Ns Ar html +HTML5 output with UTF-8 encoding. +.It Fl t Ns Ar latex +Simple LaTeX output. +The following packages are required: +.Li amsmath +and +.Li amssymb +for maths, +.Li graphicx +for images, +.Li inputenc Pq utf8 +for UTF-8 input, +.Li fontend Pq T1 +and +.Li textcomp +for output glyphs, +.Li lmodern +for Latin modern font, +.Li xcolor +for the difference engine output, and +.Li hyperref +for links. +.It Fl t Ns Ar man +The +.Ar man +macro package suitable for reading by +.Xr groff 1 , +.Xr mandoc 1 , +or traditional +.Xr troff 1 . +Does not support equations and images. +Table support is provided by +.Xr tbl 1 . +Since UTF-8 may be passed as input values, +.Xr preconv 1 +may need to be used. +.It Fl t Ns Ar ms +The +.Ar ms +macro package suitable for reading by +.Xr groff 1 +or traditional +.Xr troff 1 . +Does not support equations and limited image support for encapsulated +postscript (PS and EPS suffix) images. +Images are always block-formatted. +Image dimensions and extended attributes are ignored, though images are +downsized if larger than the current text width. +Table support is provided by +.Xr tbl 1 . +Since UTF-8 may be passed as input values, +.Xr preconv 1 +may need to be used. +.It Fl t Ns Ar term +ANSI-escaped UTF-8 output suitable for reading on the terminal. +Images and equations not supported. +.It Fl t Ns Ar tree +Debugging output: not for general use. +.El +.Ss Standalone documents +When +.Fl s +is specified, additional content may be added to output: +.Bl -tag -width Ds +.It Fl t Ns Ar fodt +Envelope +.Li <office:document> +and prologue +.Li <office:automatic-styles> , +.Li <office:master-styles> , +and +.Li <office:body> . +.It Fl t Ns Ar html +Envelope +.Li <html> +and prologue +.Li <head> . +.It Fl t Ns Ar latex +Prologue +.Li documentclass +and +.Li usepackage +statements, and surrounding +.Li begin{document} +statements. +.It Fl t Ns Ar man , Fl t Ns Ar ms +Prologue macros. +.El +.Pp +If parsed from the document or as given by +.Fl m +or +.Fl M , +the following metadata keys are used by additional content. +The metadata keys are canonicalised in lowercase and without spaces. +.Pp +Metadata values should not be encoded in their output format, e.g., +.Dq css: foo&bar . +The renderer will perform any necessary output encoding. +.Bl -tag -width Ds +.It Li affiliation +Author affiliation (organisation or institution). +Multiple affiliations may be separated by two or more spaces (including +newlines). +Used in +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +and +.Fl t Ns Ar ms . +.It Li author +Document author. +Multiple authors may be separated by two or more spaces (including +newlines). +Overridden by +.Li rcsauthor . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +and +.Fl t Ns Ar ms . +.It Li baseheaderlevel +Added to each header level. +Deprecated in favour of +.Li shiftheadinglevelby . +.It Li copyright +A document copyright (without the word +.Dq Copyright ) , +for example, +.Dq 2017, Kristaps Dzonsons . +Used in +.Fl t Ns Ar ms +and +.Fl t Ns Ar html . +.It Li css +A CSS file included in the HTML5 document head. +Multiple CSS files (in order) may be separated by two or more spaces +(including newlines). +Only used in +.Fl t Ns Ar html . +.It Li date +Document date in ISO-8601 YYYY-MM-DD format. +Overridden by +.Li rcsdate . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.It Li javascript +A JavaScript file included in the HTML5 document head. +Multiple script files (in order) may be separated by two or more spaces +(including newlines). +Only used in +.Fl t Ns Ar html . +.It Li rcsauthor +Like +.Li author , +but in RCS author format. +Overrides +.Li author . +.It Li rcsdate +Like +.Li date , +but in RCS date format. +Overrides +.Li date . +.It Li section +Man page section, defaulting to +.Dq 7 . +Only used in +.Fl t Ns Ar man . +.It Li shiftheadinglevelby +Shift all headers by the given number. +For example, a value of 1 causes headers originally at level 1 +.Pq Dq <h1> +to be level 2 +.Pq Dq <h2> , +while a value of -1 moves level 2 to 1. +Levels will not move to less than 1. +Takes precedence over +.Li baseheaderlevel . +If unset or not a valid number, defaults to zero. +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.It Li source +Man page source (organisation providing the manual). +Only used in +.Fl t Ns Ar man . +.It Li volume +Man page volume (describes the manual page section). +Only used in +.Fl t Ns Ar man . +.It Li title +Document title, defaulting to +.Dq Untitled article . +Used in +.Fl t Ns Ar fodt , +.Fl t Ns Ar html , +.Fl t Ns Ar latex , +.Fl t Ns Ar man , +and +.Fl t Ns Ar ms . +.El +.Pp +Metadata values are parsed and may be used as variables in markdown +documents regardless of whether +.Fl s +is specified or not. +.Pp +Default values, such +.Dq Untitled article +for the +.Li title , +are not set as metadata values, and will not appear if the metadata key +is used as a variable. +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev NO_COLOR +Do not emit colours when in +.Fl t Ns Ar term +mode. +Synonym for +.Ev NO_COLOUR . +Same as +.Fl -term-nocolour . +.El +.Sh FILES +.Bl -tag -width Ds +.It Pa share/odt/styles.xml +Default styles used when generating standalone +.Fl t Ns Ar fodt +documents. +Template for +.Fl -odt-style +styles. +.El +.Sh EXIT STATUS +.Ex -std +.Pp +If the +.Fl X +flag is used, +.Nm lowdown +exits with an error if the given keyword is not found. +.Sh EXAMPLES +To view a Markdown file on an ANSI-compatible, UTF-8 terminal: +.Pp +.Dl lowdown -tterm foo.md | less -R +.Pp +The terminal may also be used with +.Xr groff 1 +or +.Xr mandoc 1 +rendering: +.Bd -literal -offset indent +lowdown -stms foo.md | groff -itk -mspdf -Tutf8 | less -R +lowdown -stman foo.md | groff -itk -man -Tutf8 | less -R +lowdown -stman foo.md | mandoc | less +.Ed +.Pp +To emit a standalone HTML5 document: +.Pp +.Dl lowdown -s foo.md > foo.html +.Pp +To use +.Xr groff 1 +or +.Xr mandoc 1 +to format as a PS file: +.Bd -literal -offset indent +lowdown -stms foo.md | groff -itk -mspdf > foo.ps +lowdown -stman foo.md | mandoc -Tps > foo.ps +.Ed +.Pp +Or with LaTeX: +.Bd -literal -offset indent +lowdown -stlatex foo.md > foo.latex +pslatex foo.latex +.Ed +.Pp +PDF generation follows similar logic: +.Bd -literal -offset indent +lowdown -stms foo.md | pdfroff -itk -mspdf > foo.pdf +lowdown -stman foo.md | mandoc -Tpdf > foo.pdf +lowdown -stlatex foo.md > foo.latex +pdflatex foo.latex +.Ed +.Pp +UTF-8 support for +.Xr groff 1 +PDF or PS output requires appropriate fonts, such as the Unicode Times +font. +This and other Unicode fonts are not always installed by default. +They may be found, for PDF output, in the +.Pa devpdf +set of the +.Xr groff 1 +font directory and are prefixed with +.Sq U . +.Bd -literal -offset indent +lowdown -stms foo.md | pdfroff -itk -mspdf -FU-T > foo.pdf +.Ed +.Pp +To extract the HTML-escaped title from a file's metadata: +.Pp +.Dl lowdown -X title foo.md +.Sh SEE ALSO +.Xr lowdown-diff 1 , +.Xr lowdown 3 , +.Xr lowdown 5 +.Sh AUTHORS +.Nm lowdown +was forked from +.Lk https://github.com/hoedown/hoedown hoedown +by +.An Kristaps Dzonsons , +.Mt kristaps@bsd.lv . +It has been considerably modified since. diff --git a/man/lowdown.3 b/man/lowdown.3 @@ -0,0 +1,984 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN 3 +.Os +.Sh NAME +.Nm lowdown +.Nd simple markdown translator library +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Vt "struct lowdown_metadata" +.Vt "struct lowdown_node" +.Vt "struct lowdown_opts" +.Sh DESCRIPTION +This library parses +.Xr lowdown 5 +into various output formats. +.Pp +The library consists first of a high-level interface consisting of +.Xr lowdown_buf 3 , +.Xr lowdown_buf_diff 3 , +.Xr lowdown_file 3 , +and +.Xr lowdown_file_diff 3 . +.Pp +The high-level functions interface with low-level functions that perform +parsing and formatting. +These consist of +.Xr lowdown_doc_new 3 , +.Xr lowdown_doc_parse 3 , +and +.Xr lowdown_doc_free 3 +for parsing +.Xr lowdown 5 +documents into an abstract syntax tree. +.Pp +The front-end functions for freeing, allocation, and rendering are as +follows. +.Bl -bullet +.It +HTML5: +.Bl -item -compact +.It +.Xr lowdown_html_free 3 +.It +.Xr lowdown_html_new 3 +.It +.Xr lowdown_html_rndr 3 +.El +.It +gemini: +.Bl -item -compact +.It +.Xr lowdown_gemini_free 3 +.It +.Xr lowdown_gemini_new 3 +.It +.Xr lowdown_gemini_rndr 3 +.El +.It +LaTeX: +.Bl -item -compact +.It +.Xr lowdown_latex_free 3 +.It +.Xr lowdown_latex_new 3 +.It +.Xr lowdown_latex_rndr 3 +.El +.It +OpenDocument: +.Bl -item -compact +.It +.Xr lowdown_odt_free 3 +.It +.Xr lowdown_odt_new 3 +.It +.Xr lowdown_odt_rndr 3 +.El +.It +roff: +.Bl -item -compact +.It +.Xr lowdown_nroff_free 3 +.It +.Xr lowdown_nroff_new 3 +.It +.Xr lowdown_nroff_rndr 3 +.El +.It +UTF-8 ANSI terminal: +.Bl -item -compact +.It +.Xr lowdown_term_free 3 +.It +.Xr lowdown_term_new 3 +.It +.Xr lowdown_term_rndr 3 +.El +.It +debugging: +.Bl -item -compact +.It +.Xr lowdown_tree_rndr 3 +.El +.El +.Pp +To compile and link, use +.Xr pkg-config 1 : +.Bd -literal +% cc `pkg-config --cflags lowdown` -c -o sample.o sample.c +% cc -o sample sample.o `pkg-config --libs lowdown` +.Ed +.Ss Pledge Promises +The +.Nm lowdown +library is built to operate in security-sensitive environments, such as +those using +.Xr pledge 2 +on +.Ox . +The only promise required is +.Va stdio +for +.Xr lowdown_file_diff 3 +and +.Xr lowdown_file 3 : +both require access to the stream for reading input. +.Ss Types +All +.Nm lowdown +functions use one or more of the following structures. +.Pp +The +.Vt struct lowdown_opts +structure manage features. +It has the following fields: +.Bl -tag -width Ds -offset indent +.It Va unsigned int feat +Features used during the parse. +This bit-field may have the following bits OR'd: +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_ATTRS +Parse PHP extra link, header, and image attributes. +.It Dv LOWDOWN_AUTOLINK +Parse +.Li http , +.Li https , +.Li ftp , +.Li mailto , +and relative links or link fragments. +.It Dv LOWDOWN_COMMONMARK +Tighten input parsing to the CommonMark specification. +This also uses the first ordered list value instead of starting all +lists at one. +This feature is +.Em experimental +and +.Em incomplete . +.It Dv LOWDOWN_DEFLIST +Parse PHP extra definition lists. +This is currently constrained to single-key lists. +.It Dv LOWDOWN_FENCED +Parse GFM fenced (language-specific) code blocks. +.It Dv LOWDOWN_FOOTNOTES +Parse MMD style footnotes. +This only supports the referenced footnote style, not the +.Qq inline +style. +.It Dv LOWDOWN_HILITE +Parse highlit sequences. +This are disabled by default because it may be erroneously interpreted +as section headers. +.It Dv LOWDOWN_IMG_EXT +Deprecated. +Use +.Dv LOWDOWN_ATTRS +instead. +.It Dv LOWDOWN_MATH +Parse mathematics equations. +.It Dv LOWDOWN_METADATA +Parse in-document MMD metadata. +For the first paragraph to count as meta-data, the first line must have +a colon in it. +.It Dv LOWDOWN_NOCODEIND +Do not parse indented content as code blocks. +.It Dv LOWDOWN_NOINTEM +Do not parse emphasis within words. +.It Dv LOWDOWN_STRIKE +Parse strikethrough sequences. +.It Dv LOWDOWN_SUPER +Parse super-scripts. +This accepts foo^bar, which puts the parts following the caret until +whitespace in superscripts; or foo^(bar), which puts only the parts in +parenthesis. +.It Dv LOWDOWN_TABLES +Parse GFM tables. +.It Dv LOWDOWN_TASKLIST +Parse GFM task list items. +.El +.Pp +The default value is zero (none). +.It Va unsigned int oflags +Features used by the output generators. +This bit-field may have the following enabled. +Note that bits are by definition specific to an output +.Va type . +.Pp +For +.Dv LOWDOWN_HTML : +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_HTML_ESCAPE +If +.Dv LOWDOWN_HTML_SKIP_HTML +has not been set, escapes in-document HTML so that it is rendered as +opaque text. +.It Dv LOWDOWN_HTML_HARD_WRAP +Retain line-breaks within paragraphs. +.It Dv LOWDOWN_HTML_HEAD_IDS +Have an identifier written with each header element consisting of an +HTML-escaped version of the header contents. +.It Dv LOWDOWN_HTML_OWASP +When escaping text, be extra paranoid in following the OWASP suggestions +for which characters to escape. +.It Dv LOWDOWN_HTML_NUM_ENT +Convert, when possible, HTML entities to their numeric form. +If not set, the entities are used as given in the input. +.It Dv LOWDOWN_HTML_SKIP_HTML +Do not render in-document HTML at all. +.El +.Pp +For +.Dv LOWDOWN_GEMINI , +there are several flags for controlling link placement. +By default, links (images, autolinks, and links) are queued when +specified in-line then emitted in a block sequence after the nearest +block element. +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_GEMINI_LINK_END +Emit the queue of links at the end of the document instead of after the +nearest block element. +.It Dv LOWDOWN_GEMINI_LINK_IN +Render all links within the flow of text. +This will cause breakage when nested links, such as images within links, +links in blockquotes, etc. +It should not be used unless in carefully crafted documents. +.It Dv LOWDOWN_GEMINI_LINK_NOREF +Do not format link labels. +Takes precedence over +.Dv LOWDOWN_GEMINI_LINK_ROMAN . +.It Dv LOWDOWN_GEMINI_LINK_ROMAN +When formatting link labels, use lower-case Roman numerals instead of +the default lowercase hexavigesimal (i.e., +.Dq a , +.Dq b , +\&..., +.Dq aa , +.Dq ab , +\&...). +.It Dv LOWDOWN_GEMINI_METADATA +Print metadata as the canonicalised key followed by a colon then the +value, each on one line (newlines replaced by spaces). +The metadata block is terminated by a double newline. +If there is no metadata, this does nothing. +.El +.Pp +There may only be one of +.Dv LOWDOWN_GEMINI_LINK_END +or +.Dv LOWDOWN_GEMINI_LINK_IN . +If both are specified, the latter is unset. +.Pp +For +.Dv LOWDOWN_FODT : +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_ODT_SKIP_HTML +Do not render in-document HTML at all. +Text within HTML elements remains. +.El +.Pp +For +.Dv LOWDOWN_LATEX : +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_LATEX_NUMBERED +Use the default numbering scheme for sections, subsections, etc. +If not specified, these are inhibited. +.It Dv LOWDOWN_LATEX_SKIP_HTML +Do not render in-document HTML at all. +Text within HTML elements remains. +.El +.Pp +And for +.Dv LOWDOWN_MAN +and +.Dv LOWDOWN_NROFF : +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_NROFF_GROFF +Use GNU extensions (i.e., for +.Xr groff 1 ) +when rendering output. +The groff arguments must include +.Fl m Ns Ar pdfmark +for formatting links with +.Dv LOWDOWN_MAN +or +.Fl m Ns Ar spdf +instead of +.Fl m Ns Ar s +for +.Dv LOWDOWN_NROFF . +Applies to the +.Dv LOWDOWN_MAN +and +.Dv LOWDOWN_NROFF +output types. +.It Dv LOWDOWN_NROFF_NUMBERED +Use numbered sections if +.Dv LOWDOWON_NROFF_GROFF +is not specified. +Only applies to the +.Dv LOWDOWN_NROFF +output type. +.It Dv LOWDOWN_NROFF_SKIP_HTML +Do not render in-document HTML at all. +Text within HTML elements remains. +.It Dv LOWDOWN_NROFF_SHORTLINK +Render link URLs in short form. +Applies to images, autolinks, and regular links. +Only in +.Dv LOWDOWN_MAN +or when +.Dv LOWDOWN_NROFF_GROFF +is not specified. +.It Dv LOWDOWN_NROFF_NOLINK +Don't show links at all if they have embedded text. +Applies to images and regular links. +Only in +.Dv LOWDOWN_MAN +or when +.Dv LOWDOWN_NROFF_GROFF +is not specified. +.El +.Pp +For +.Dv LOWDOWN_TERM : +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_TERM_NOANSI +Don't apply ANSI style codes at all. +This implies +.Dv LOWDOWN_TERM_NOCOLOUR . +.It Dv LOWDOWN_TERM_NOCOLOUR +Don't apply ANSI colour codes. +This will still show underline, bold, etc. +This should not be used in difference mode, as the output will make no +sense. +.It Dv LOWDOWN_TERM_NOLINK +Don't show links at all. +Applies to images and regular links: autolinks are still shown. +This may be combined with +.Dv LOWDOWN_TERM_SHORTLINK +to also shorten autolinks. +.It Dv LOWDOWN_TERM_SHORTLINK +Render link URLs in short form. +Applies to images, autolinks, and regular links. +This may be combined with +.Dv LOWDOWN_TERM_NOLINK +to only show shortened autolinks. +.El +.Pp +For any mode, you may specify: +.Pp +.Bl -tag -width Ds -compact +.It Dv LOWDOWN_SMARTY +Don't use smart typography formatting. +.It Dv LOWDOWN_STANDALONE +Emit a full document instead of a document fragment. +This envelope is largely populated from metadata if +.Dv LOWDOWN_METADATA +was provided as an option or as given in +.Va meta +or +.Va metaovr . +.El +.It Va size_t maxdepth +The maximum parse depth before the parser exits. +Most documents will have a parse depth in the single digits. +.It Va size_t cols +For +.Dv LOWDOWN_TERM , +the +.Qq soft limit +for width of terminal output not including margins. +If zero, 80 shall be used. +.It Va size_t hmargin +For +.Dv LOWDOWN_TERM , +the left margin (space characters). +.It Va size_t vmargin +For +.Dv LOWDOWN_TERM , +the top/bottom margin (newlines). +.It Va enum lowdown_type type +May be set to +.Dv LOWDOWN_HTML +for HTML5 output, +.Dv LOWDOWN_LATEX +for LaTeX, +.Dv LOWDOWN_MAN +for +.Fl m Ns Ar an +macros, +.Dv LOWDOWN_FODT +for +.Dq flat +OpenDocument, +.Dv LOWDOWN_TERM +for ANSI-compatible UTF-8 terminal output, +.Dv LOWDOWN_GEMINI +for the Gemini format, or +.Dv LOWDOWN_NROFF +for +.Fl m Ns Ar s +macros. +The +.Dv LOWDOWN_TREE +type causes a debug tree to be written. +.It Va struct lowdown_opts_odt odt +If +.Va type +is +.Dv LOWDOWN_FODT , +this contains +.Vt "const char *sty" , +which is either +.Dv NULL +or the OpenDocument styles used when creating standalone documents. +If +.Dv NULL , +the default styles are used. +.It Va char **meta +An array of metadata key-value pairs or +.Dv NULL . +Each pair must appear as if provided on one line (or multiple lines) of +the input, including the terminating newline character. +If not consisting of a valid pair (e.g., no newline, no colon), then it is +ignored. +When processed, these values are overridden by those in the document (if +.Dv LOWDOWN_METADATA +is specified) or by those in +.Va metaovr . +.It Va size_t metasz +Number of pairs in +.Va metaovr . +.It Va char **metaovr +See +.Va meta . +The difference is that +.Va metaovr +is applied after +.Va meta +and in-document metadata, so it overrides prior values. +.It Va size_t metaovrsz +Number of pairs in +.Va metaovr . +.El +.Pp +Another common structure is +.Vt "struct lowdown_metadata" , +which is used to hold parsed (and output-formatted) metadata keys and +values if +.Dv LOWDOWN_METADATA +was provided as an input bit. +This structure consists of the following fields: +.Bl -tag -width Ds -offset indent +.It Va char *key +The metadata key in its lowercase, canonical form. +.It Va char *value +The metadata value as rendered in the current output format. +This may be an empty string. +.El +.Pp +The abstract syntax tree is encoded in +.Vt struct lowdown_node , +which consists of the following. +.Bl -tag -width Ds -offset indent +.It Va enum lowdown_rndrt type +The node type. +.Pq Described below. +.It Va size_t id +An identifier unique within the document. +This can be used as a table index since the number is assigned from a +monotonically increasing point during the parse. +.It Va struct lowdown_node *parent +The parent of the node, or +.Dv NULL +at the root. +.It Va enum lowdown_chng chng +Change tracking: whether this node was inserted +.Pq Dv LOWDOWN_CHNG_INSERT , +deleted +.Pq Dv LOWDOWN_CHNG_DELETE , +or neither +.Pq Dv LOWDOWN_CHNG_NONE . +.It Va struct lowdown_nodeq children +A possibly-empty list of child nodes. +.It Va <anon union> +An anonymous union of type-specific structures. +See below for a description of each one. +.El +.Pp +The nodes may be one of the following types, with default rendering in +HTML5 to illustrate functionality. +.Bl -tag -width Ds -offset indent +.It Dv LOWDOWN_BLOCKCODE +A block-level (and possibly language-specific) snippet of code. +Described by the +.Li <pre><code> +elements. +.It Dv LOWDOWN_BLOCKHTML +A block-level snippet of HTML. +This is simply opaque HTML content. +(Only if configured during parse.) +.It Dv LOWDOWN_BLOCKQUOTE +A block-level quotation. +Described by the +.Li <blockquote> +element. +.It Dv LOWDOWN_CODESPAN +A snippet of code. +Described by the +.Li <code> +element. +.It Dv LOWDOWN_DOC_HEADER +A header with data gathered from document metadata (if configured). +Described by the +.Li <head> +element. +(Only if configured during parse.) +.It Dv LOWDOWN_DOUBLE_EMPHASIS +Bold (or otherwise notable) content. +Described by the +.Li <strong> +element. +.It Dv LOWDOWN_EMPHASIS +Italic (or otherwise notable) content. +Described by the +.Li <em> +element. +.It Dv LOWDOWN_ENTITY +An HTML entity, which may either be named or numeric. +.It Dv LOWDOWN_FOOTNOTE +A footnote. +(Only if configured during parse.) +.It Dv LOWDOWN_HEADER +A block-level header. +Described (in the HTML case) by one of +.Li <h1> +through +.Li <h6> . +.It Dv LOWDOWN_HIGHLIGHT +Marked test. +Described by the +.Li <mark> +element. +(Only if configured during parse.) +.It Dv LOWDOWN_HRULE +A horizontal line. +Described by +.Li <hr> . +.It Dv LOWDOWN_IMAGE +An image. +Described by the +.Li <img> +element. +.It Dv LOWDOWN_LINEBREAK +A hard line-break within a block context. +Described by the +.Li <br> +element. +.It Dv LOWDOWN_LINK +A link to external media. +Described by the +.Li <a> +element. +.It Dv LOWDOWN_LINK_AUTO +Like +.Dv LOWDOWN_LINK , +except inferred from text content. +Described by the +.Li <a> +element. +(Only if configured during parse.) +.It Dv LOWDOWN_LIST +A block-level list enclosure. +Described by +.Li <ul> +or +.Li <ol> . +.It Dv LOWDOWN_LISTITEM +A block-level list item, always appearing within a +.Dv LOWDOWN_LIST . +Described by +.Li <li> . +.It Dv LOWDOWN_MATH_BLOCK +A block (or inline) of mathematical text in LaTeX format. +Described within +.Li \e[xx\e] +or +.Li \e(xx\e) . +This is usually (in HTML) externally handled by a JavaScript renderer. +(Only if configured during parse.) +.It Dv LOWDOWN_META +Meta-data keys and values. +(Only if configured during parse.) +These are described by elements in the +.Li <head> +element. +.It Dv LOWDOWN_NORMAL_TEXT +Normal text content. +.It Dv LOWDOWN_PARAGRAPH +A block-level paragraph. +Described by the +.Li <p> +element. +.It Dv LOWDOWN_RAW_HTML +An inline of raw HTML. +(Only if configured during parse.) +.It Dv LOWDOWN_ROOT +The root of the document. +This is always the topmost node, and the only node where the +.Va parent +field is +.Dv NULL . +.It Dv LOWDOWN_STRIKETHROUGH +Content struck through. +Described by the +.Li <del> +element. +(Only if configured during parse.) +.It Dv LOWDOWN_SUPERSCRIPT +A superscript. +Described by the +.Li <sup> +element. +(Only if configured during parse.) +.It Dv LOWDOWN_TABLE_BLOCK +A table block. +Described by +.Li <table> . +(Only if configured during parse.) +.It Dv LOWDOWN_TABLE_BODY +A table body section. +Described by +.Li <tbody> . +Parent is always +.Dv LOWDOWN_TABLE_BLOCK . +(Only if configured during parse.) +.It Dv LOWDOWN_TABLE_CELL +A table cell. +Described by +.Li <td> +or +.Li <th> +if in the header. +Parent is always +.Dv LOWDOWN_TABLE_ROW . +(Only if configured during parse.) +.It Dv LOWDOWN_TABLE_HEADER +A table header section. +Described by +.Li <thead> . +Parent is always +.Dv LOWDOWN_TABLE_BLOCK . +(Only if configured during parse.) +.It Dv LOWDOWN_TABLE_ROW +A table row. +Described by +.Li <tr> . +Parent is always +.Dv LOWDOWN_TABLE_HEADER +or +.Dv LOWDOWN_TABLE_BODY . +(Only if configured during parse.) +.It Dv LOWDOWN_TRIPLE_EMPHASIS +Combination of +.Dv LOWDOWN_EMPHASIS +and +.Dv LOWDOWN_DOUBLE_EMPHASIS . +.El +.Pp +The following anonymous union structures correspond to certain nodes. +Note that all buffers may be zero-length. +.Bl -tag -width Ds -offset indent +.It Va rndr_autolink +For +.Dv LOWDOWN_LINK_AUTO , +the link address as +.Va link +and the link type +.Va type , +which may be one of +.Dv HALINK_EMAIL +for e-mail links and +.Dv HALINK_NORMAL +otherwise. +Any buffer may be empty-sized. +.It Va rndr_blockcode +For +.Dv LOWDOWN_BLOCKCODE , +the opaque +.Va text +of the block and the optional +.Va lang +of the code language. +.It Va rndr_blockhtml +For +.Dv LOWDOWN_BLOCKHTML , +the opaque HTML +.Va text . +.It Va rndr_codespan +The opaque +.Va text +of the contents. +.It Va rndr_definition +For +.Dv LOWDOWN_DEFINITION , +containing +.Va flags +that may be +.Dv HLIST_FL_BLOCK +if the definition list should be interpreted as containing block +elements. +.It Va rndr_entity +For +.Dv LOWDOWN_ENTITY , +the entity +.Va text . +.It Va rndr_header +For +.Dv LOWDOWN_HEADER , +the +.Va level +of the header starting at zero (this value is relative to the metadata +base header level, defaulting to one), optional space-separated class +list +.Va attr_cls , +and optional single identifier +.Va attr_id . +.It Va rndr_image +For +.Dv LOWDOWN_IMAGE , +the image address +.Va link , +the image title +.Va title , +dimensions NxN (width by height) in +.Va dims , +and alternate text +.Va alt . +CSS in-line style for width and height may be given in +.Va attr_width +and/or +.Va attr_height , +and a space-separated list of classes may be in +.Va attr_cls +and a single identifier may be in +.Va attr_id . +.It Va rndr_link +Like +.Va rndr_autolink , +but without a type and further defining an optional link title +.Va title , +optional space-separated class list +.Va attr_cls , +and optional single identifier +.Va attr_id . +.It Va rndr_list +For +.Dv LOWDOWN_LIST , +consists of a bitfield +.Va flags +that may be set to +.Dv HLIST_FL_ORDERED +for an ordered list and +.Dv HLIST_FL_UNORDERED +for an unordered one. +If +.Dv HLIST_FL_BLOCK +is set, the list should be output as if items were separate blocks. +The +.Va start +value for +.Dv HLIST_FL_ORDERED +is the starting list item position, which is one by default and never +zero. +.It Va rndr_listitem +For +.Dv LOWDOWN_LISTITEM , +consists of a bitfield +.Va flags +that may be set to +.Dv HLIST_FL_ORDERED +for an ordered list, +.Dv HLIST_FL_UNORDERED +for an unordered list, +.Dv HLIST_FL_DEF +for definition list data, +.Dv HLIST_FL_CHECKED +or +.Dv HLIST_FL_UNCHECKED +for an unordered +.Dq task +list element, and/or +.Dv HLIST_FL_BLOCK +for list item output as if containing block elements. +The +.Dv HLIST_FL_BLOCK +should not be used: use the parent list (or definition list) flags for +this. +The +.Va num +is the index in a +.Dv HLIST_FL_ORDERED +list. +It is monotonically increasing with each item in the list, starting at +the +.Va start +variable given in +.Vt struct rndr_list . +.It Va rndr_math +For +.Dv LOWDOWN_MATH , +the mode of display in +.Va blockmode : +if 1, in-line math; if 2, multi-line. +The opaque equation, which is assumed to be in LaTeX format, is in the +opaque +.Va text . +.It Va rndr_meta +Each +.Dv LOWDOWN_META +key-value pair is represented. +The keys are lower-case without spaces or non-ASCII characters. +If provided, enclosed nodes may consist only of +.Dv LOWDOWN_NORMAL_TEXT +and +.Dv LOWDOWN_ENTITY . +.It Va rndr_normal_text +The basic +.Va text +content for +.Dv LOWDOWN_NORMAL_TEXT . +.It Va rndr_paragraph +For +.Dv LOWDOWN_PARAGRAPH , +species how many +.Va lines +the paragraph has in the input file and +.Va beoln , +set to non-zero if the paragraph ends with an empty line instead of a +breaking block element. +.It Va rndr_raw_html +For +.Dv LOWDOWN_RAW_HTML , +the opaque HTML +.Va text . +.It Va rndr_table +For +.Dv LOWDOWN_TABLE_BLOCK , +the number of +.Va columns +in each row or header row. +The number of columns in +.Va rndr_table , +.Va rndr_table_header , +and +.Va rndr_table_cell +are the same. +.It Va rndr_table_cell +For +.Dv LOWDOWN_TABLE_CELL , +the current +.Va col +column number out of +.Va columns . +See +.Va rndr_table_header +for a description of the bits in +.Va flags . +The number of columns in +.Va rndr_table , +.Va rndr_table_header , +and +.Va rndr_table_cell +are the same. +.It Va rndr_table_header +For +.Dv LOWDOWN_TABLE_HEADER , +the number of +.Va columns +in each row and the per-column +.Va flags , +which may tested for equality against +.Dv HTBL_FL_ALIGN_LEFT , +.Dv HTBL_FL_ALIGN_RIGHT , +or +.Dv HTBL_FL_ALIGN_CENTER +after being masked with +.Dv HTBL_FL_ALIGNMASK ; +or +.Dv HTBL_FL_HEADER . +If no alignment is specified after the mask, the default should be +left-aligned. +The number of columns in +.Va rndr_table , +.Va rndr_table_header , +and +.Va rndr_table_cell +are the same. +.El +.Sh SEE ALSO +.Xr lowdown 1 , +.Xr lowdown_buf 3 , +.Xr lowdown_buf_diff 3 , +.Xr lowdown_diff 3 , +.Xr lowdown_doc_free 3 , +.Xr lowdown_doc_new 3 , +.Xr lowdown_doc_parse 3 , +.Xr lowdown_file 3 , +.Xr lowdown_file_diff 3 , +.Xr lowdown_gemini_free 3 , +.Xr lowdown_gemini_new 3 , +.Xr lowdown_gemini_rndr 3 , +.Xr lowdown_html_free 3 , +.Xr lowdown_html_new 3 , +.Xr lowdown_html_rndr 3 , +.Xr lowdown_latex_free 3 , +.Xr lowdown_latex_new 3 , +.Xr lowdown_latex_rndr 3 , +.Xr lowdown_metaq_free 3 , +.Xr lowdown_nroff_free 3 , +.Xr lowdown_nroff_new 3 , +.Xr lowdown_nroff_rndr 3 , +.Xr lowdown_odt_free 3 , +.Xr lowdown_odt_new 3 , +.Xr lowdown_odt_rndr 3 , +.Xr lowdown_term_free 3 , +.Xr lowdown_term_new 3 , +.Xr lowdown_term_rndr 3 , +.Xr lowdown_tree_rndr 3 , +.Xr lowdown 5 +.Sh AUTHORS +.Nm lowdown +was forked from +.Lk https://github.com/hoedown/hoedown hoedown +by +.An Kristaps Dzonsons , +.Mt kristaps@bsd.lv . +It has been considerably modified since. diff --git a/man/lowdown.5 b/man/lowdown.5 @@ -0,0 +1,948 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017 Christina Sophonpanich <huck@divelog.blue> +.\" Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN 5 +.Os +. +. +.Sh NAME +.Nm lowdown +.Nd Markdown reference for lowdown +. +. +.Sh DESCRIPTION +Markdown is a simple, plain-text formatting language. +.Dq Plain-text +in this case means the document input looks similar to the output, less +the formatting niceties (boxed tables, italics, clickable links, etc.) +provided by the output medium. +For example: +.Bd -literal -offset indent +# How to be a Picard fan + +## Introduction + +In order to develop fandom skills one must first and foremost +know *whom* one idolises. Therefore: **who is Captain Picard**? + +1. Picard was named the \e*Best Star Trek Captain\e*, according +to a [5-week poll](poll.html). + + > Picard continued his winning ways in the final week, + > with fans naming him the most inspiring captain. + +2. Picard is handsome.  +3. Picard knows how to code: `make engage` + +--------------------------------- + +## Picard Fandom + +Here's why everyone wants to be a fan... +.Ed +.Pp +This example consists of a series of block elements: section header, +sub-section header, paragraph, set of list elements, horizontal rule, +then another sub-section header. +Each block element contains span elements: normal text, emphasised text +(bold and italised), an image, a link, and a span of code. +.Pp +This document describes the Markdown syntax accepted by +.Xr lowdown 1 . +. +. +.Sh BLOCK ELEMENTS +A block element starts on a new line and extends to the next blank line +or block element. +A block element contains span elements. +. +.Ss Paragraphs and Line Breaks +A paragraph is made up of one or more lines of text possibly containing +span elements. +Paragraphs are separated by blank lines. +.Pp +To insert a hard line break (i.e., a line-break in the input that is +reproduced in the output), insert two spaces at the end of the line. +If commonmark input parsing is enabled, this may also be effected by +escaping the newline: +.Bd -literal -offset indent +Darmok and Jalad...\e +at Tanagra. +.Ed +. +.Ss Headers +There are two styles of headers: underlined +.Pq Dq setext +and hash-marked +.Pq Dq atx . +For underlined headers, underline the given word using equal signs +.Pq Dq = +for first-level headers and dashes +.Pq Dq \&- +for second-level headers. +.Bd -literal -offset indent +This is an underlined header 1 +============================== +.Ed +.Pp +For hash-marked headers, use the corresponding number of hash characters +to the corresponding level of header, up to 6 levels, at the start of +the line separated by one space followed by the header. +.Bd -literal -offset indent +## This is a hash-marked header 2 +.Ed +.Pp +If commonmark input parsing is enabled, the space is required after the +hash-marks in any hash-marked header. +.Pp +Both types support PHP Extra attributes enclosed in curly braces. +These may begin at any point and must end at the end of the line. +.Bd -literal -offset indent +## Star Trek: Enterprise { #stent } + +Star Trek: Enterprise { .reboots } +--------------------- +.Ed +.Pp +Non-empty values with a leading period are interpreted as HTML (CSS) or +OpenDocument classes, and values with a leading pound symbol are +interpreted as in-document link identifiers. +.Pp +Extra attribute identifiers override the default mechanism for creating +header identifiers. +They should contain only ASCII alphanumeric characters. +.Ss Block Quotes +Block quoted sections are invoked with a single right-angle bracket +.Pq Dq > +followed by a space at the start of each line and between paragraphs. +.Bd -literal -offset indent +> The Prime Directive is not just a set of rules; +> it is a philosophy... and a very correct one. +> +> (It goes on for a few paragraphs). +.Ed +.Pp +Block quotes may also have a non-multiline invocation: you need only +invoke the right-angle bracket at the start of a paragraph and omit it +entirely between paragraphs. +.Bd -literal -offset indent +> You cannot explain away a wantonly immoral act because +you think it is connected to some higher purpose. + +> Here is another paragraph about Picard wisdom. +.Ed +.Pp +Consecutive blockquotes as above will be merged as paragraphs within a +single block quote on output, even if styles +.Pq non-multiline and otherwise +are mixed. +.Pp +Block quotes may be nested within other block quotes, as may any other +block elements such as headers, ordered/unordered lists, and code +blocks. +.Bd -literal -offset indent +> ### hash-marked header 3 +> +> > I'd be delighted to offer any advice +> > I have on understanding women. +> > When I have some, I'll let you know. +> +> 1. advice list item 1 +> 2. advice list item 2 +> +> Here's the code to implement JLP's advice: + +> yes | read engage +.Ed +. +.Ss Lists +Lists may be specified as ordered (numbered) or unordered. +Ordered lists are invoked as numbers followed by periods +.Pq e.g., Dq 1. +and rendered in a similar format. +.Em Note : +it does not matter which order or which numbers you use in your ordered +lists, as all ordered lists start at one. +.Bd -literal -offset indent +1. Make. +2. It. +1. So. (Not 1. again!) +.Ed +.Pp +If commonmark input parsing is enabled, list items may alternatively +terminate with the right parenthesis: +.Bd -literal -offset indent +1) Live long +2) Prosper +.Ed +.Pp +To prevent lists erroneously started by a paragraph beginning with a +number and period, use a backslash before the period. +.Bd -literal -offset indent +1987. The year TNG premiered. + +1987\e. The year TNG premiered. +.Ed +.Pp +Unordered lists, on the other hand, can be invoked using either +asterisk +.Pq Dq * , +pluses +.Pq Dq + , +or hyphens +.Pq Dq \- , +and can be a mix of all three styles. +Regardless the style, list items are rendered the same way. +.Bd -literal -offset indent +- Earl Grey tea. +* Shakespeare. ++ Exotic fish. +.Ed +.Pp +All nested block elements need a new line break, otherwise they will be +rendered on the same line as the list item on output. +To insert paragraphs into a list item, indent each paragraph with either +four spaces or one tab. +.Bd -literal -offset indent +- First list item + + Courage can be an emotion too. + + Things are only impossible until they're not. ++ Second list item ++ Third list item +.Ed +.Pp +To insert block quotes into a list item, indent the block quote with +four spaces or one tab prior to the right-angle bracket +.Pq Dq > . +.Bd -literal -offset indent +* List item 1 +* List item 2 + + > I am Locutus of Borg. + + > That is the cutest of Borg. +.Ed +.Pp +Code blocks need to be indented twice (two tabs or eight leading spaces): once +for being recognised within the list item, another for the code block itself. +.Bd -literal -offset indent +* Here is a list item for an indented code block: + + alias path='echo -e ${PATH//:/\\n}' +.Ed +.Pp +To make list elements occur in tight sequence \(em like a grocery list +\(em don't have an empty line between the items. +.Bd -literal -offset indent +- Phaser +- Communicator +.Ed +.Pp +On the other hand, if you want to render lists separated by white-space, +use the following syntax: +.Bd -literal -offset indent +- A phaser is a type of weapon. + +- A communicator keeps Riker in contact with Troi. +.Ed +.Pp +This applies to ordered and unordered list types. +. +.Ss Task lists +One form of an unordered list is task lists, a GFM extension. +These begin with checkboxes (checked or not), rendered similarly in the output. +.Bd -literal -offset indent +Star Trek series with episodes in the Delta quadrant: + +- [ ] Original series +- [x] TNG +- [ ] DS9 +- [x] Voyager +- [ ] Enterprise +- [ ] Discovery +.Ed +.Pp +The check may be upper or lower case. +A space must follow the right square bracket. +. +.Ss Definition Lists +Definition lists are a PHP Extra extension. +They're similar to lists except in having key and value pairs, with keys +being preceded by a blank line: +.Bd -literal -offset indent +Best understated characters: + +*Quark* +: Armin Shimerman + +*Weyoun* +: Jeffrey Combs +.Ed +.Pp +Keys consist of a single line and may contain inline elements. +Like other lists, values may consist of arbitrary nested blocks. +There may be multiple consecutive values per key. +If the key and value are separated by a blank line, the list is emitted +as if it contained block elements (usually output as spacing between +key-value pairs). +. +.Ss Code Blocks +Code blocks consist of pre-formatted text, such as source code. +Each code block contains opaque/literal text. +This means that new lines and white spaces are retained \(em they're not +formatted in any way, and any text inside the code block is not +interpreted. +To invoke a code block, create a line break then indent each line with four +spaces or one tab. +.Bd -literal -offset indent +Here is a paragraph about Bridge protocol + + Here is a code block for the command "Engage" +.Ed +.Pp +Within a code block, text is escaped given the output format. +Therefore, characters that would normally need to be escaped in other +text processing languages such as ampersands +.Pq Dq & +do not need to be escaped. +.Bd -literal -offset indent +Here is how you start the program xterm: + + xterm & +.Ed +. +.Ss Horizontal Rules +A horizontal rule is a line that goes across an output page. +These are invoked with three or more asterisks +.Pq Dq * , +hyphens +.Pq Dq \- , +or underscores +.Pq Dq _ +on their own line. +Spaces between these characters are disregarded. +.Bd -literal -offset indent +*** +* * * +--- +- - - +___ +_ _ _ +___________________________ +.Ed +. +. +.Ss Metadata +Documents can include metadata that is not part of the main text. +The syntax loosely follows the +.Qq Multimarkdown +specification. +.Pp +The metadata block begins on the document's first line and continues +until the first blank line. +It consists of one or more key-value pairs, with keys and values +separated by a colon, and pairs separated by a newline. +A key (and following value) must exist on the line beginning the +metadata pair, but the value may span multiple lines. +.Bd -literal -offset indent +Title: Captain's log +Author: Captain J-L Picard +Summary: As part of an exchange program, we're taking + aboard a Klingon officer to return the recent visit + of Commander Riker to the cruiser Pagh. +Stardate: 43917.4 +.Ed +.Pp +If there are multiple lines of text in a metadata value, subsequent +lines should (but need not) be offset with whitespace. +Otherwise, they must not have a colon in the value, else they will be +construed as a subsequent pair's key. +.Pp +End each line with two spaces to ensure linebreaks are rendered on +output for non-conforming Markdown renderers. +Moreover, beginning a document with a regular sentence containing a +colon might invoke metadata. +To escape this, add one blank line to the beginning of the document. +.Pp +Metadata keys must consist of alphanumeric ASCII characters, the hyphen +.Pq Qq \&- , +or the underscore +.Pq Qq \&_ . +They must have at least one character and are stripped of white-space +and converted to lower case. +.Pp +Metadata values are opaque text: Markdown statements (e.g., italics, +entities, etc.) are copied as-is. +The values will have leading white-space stripped, i.e., space following +the colon. +.Pp +If multiple metadata keys resolve to the same name, the last invocation +is retained. +This check happens after canonicalising the name by stripping spaces, +converting to lower-case, and substituting unknown characters. +.Pp +Metadata values may be pasted into a document by referencing the +.Li \&[%key] , +such as using the above example, again with the caveat that Markdown +annotations (italics, etc.) are copied verbatim: +.Bd -literal -offset indent +Stardate: 43917.4 + +It's currently stardate [%stardate]. +.Ed +. +. +.Ss Mathematics +Mathematics support is an extension of Markdown. +The extension only describes how the math blocks begin and end: the +contained equations are usually in LaTeX and implemented in the +front-end (e.g., HTML). +There are two types: inline and block. +Both may occur anywhere in a text stream. +Inline equations are rendered as part of the text; block equations are +rendered on their own. +.Bd -literal -offset indent +This is an inline $f(x)$ function. +This is a block $$f(x)$$ function. +This is also an inline \e\e(f(x)\e\e) function. +This is also a block \e\e[f(x)\e\e] function. +.Ed +. +.Ss Tables +Tables are a GFM (GitHub-flavoured Markdown) extension of the basic +syntax. +They consist of a table header and body, and columns may be left, right, +or centre justified. +.Bd -literal -offset indent +| Officer | Rank | +| --------------: | -------------------- | +| Jean-Luc Picard | Captain | +| Worf | Lieutenant Commander | +| Data | Lieutenant Commander | +| William Riker | Commander | +.Ed +.Pp +The table header must be followed by a line of hyphens with at least +three hyphen/colons per column. +Columns are separated by vertical bars. +The colon indicates alignment: a colon at the beginning means left +justified; at the right for right justified, and both for centred. +.Pp +The leading and trailing column separator is superfluous. +Table data is not necessary, but the table header is. +The minimum table structure for the above is: +.Bd -literal -offset indent +Officer | Rank +--:|--- +Jean-Luc Picard | Captain +.Ed +.Pp +Table columns may contain arbitrary span elements. +. +.Ss Footnote Definition +Footnotes are a MMD (Multimarkdown) extension of the basic syntax. +Footnote definitions may occur anywhere in the text (except within +blocks) and are +.Dq pointed to +by a +.Sx Footnote Reference . +They consist of the footnote name (in square brackets, preceded by the +caret), a colon, then everything remaining in the block is the footnote +content. +.Bd -literal -offset indent +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). +.Ed +.Pp +Footnote contents may be on the same line as the colon. +The footnote name is rendered as a number. +If a footnote definition is not referred to, it is not printed. +. +.Ss HTML Blocks +Embedded HTML is discouraged, as it inhibits formatting into non-HTML +output, but is still accepted. +Blocks of HTML must begin with a recognised HTML block-level element. +.Pp +In the original Markdown, block-level elements were well-defined by +HTML4. +HTML5 elements are also accepted, but as there is no concept of +block-level in HTML5, these are non-canonical. +Accepted elements are +.Li <address> , +.Li <article> , +.Li <aside> , +.Li <blockquote> , +.Li <del> , +.Li <details> , +.Li <dialog> , +.Li <dd> , +.Li <div> , +.Li <dl> , +.Li <dt> , +.Li <fieldset> , +.Li <figcaption> , +.Li <figure> , +.Li <footer> , +.Li <form> , +.Li <h1> , +.Li <h2> , +.Li <h3> , +.Li <h4> , +.Li <h5> , +.Li <h6> , +.Li <header> , +.Li <hgroup> , +.Li <iframe> , +.Li <ins> , +.Li <li> , +.Li <main> , +.Li <math> , +.Li <nav> , +.Li <noscript> , +.Li <ol> , +.Li <p> , +.Li <pre> , +.Li <section> , +.Li <script> , +.Li <style> , +.Li <table> , +.Li <ul> , +and self-closing +.Li <hr /> . +.Sh SPAN ELEMENTS +Span elements are inline elements (including normal text) within block +elements, for example, a span of emphasised text or a hyperlink. +A span element cannot contain a block element, but can contain other +span elements. +. +.Ss Emphasis +There are two different styles of emphasis: strong, usually rendered as +bold; and emphasis, usually rendered as italics. +This is confusing, so sometimes the former is referred to as a +.Dq double-emphasis +while the latter is a +.Dq single-emphasis . +.Pp +Text surrounded by a single asterisk +.Pq Dq * +or underscore +.Pq Dq _ , +the single-emphasis variant, is traditionally rendered with italics. +.Bd -literal -offset indent +*Captain Picard* +_Captain Picard_ +.Ed +.Pp +Text surrounded by a double asterisk +.Pq Dq ** +or underscore +.Pq Dq __ , +the double-emphasis variant, is traditionally rendered as bold. +.Bd -literal -offset indent +**Jean-Luc Picard** +__Jean-Luc Picard__ +.Ed +.Pp +Emphasis may occur within the middle of a word: +.Bd -literal -offset indent +En*ter*prise +.Ed +.Pp +In order to produce a literal asterisk +.Pq Dq * +or underscore +.Pq Dq _ +simply surround the character by white space. +.Bd -literal -offset indent +The ship * USS Enterprise * will not be emphasized +.Ed +.Pp +Two additional types of double-emphasis are the strike-through and +highlight. +These are produced by pairs of tilde and equal characters, respectively: +.Bd -literal -offset indent +~~Kirk~~Picard is the best ==captain==. +.Ed +.Pp +The highlight variant may be enabled in +.Xr lowdown 1 +with highlight parsing enabled. +It's disabled by default because if used at the beginning of a line it +may be erroneously interpreted as a section. +. +.Ss Links +There are two types of links: inline and reference. +In both cases, the linked text is denoted by square brackets +.Pq Dq \&[] . +An inline link uses parentheses +.Pq Dq \&() +containing the URL immediately following the linked text in square +brackets to invoke the link. +.Bd -literal -offset indent +[text to link](https://bsd.lv) +.Ed +.Pp +Local references may be absolute or relative paths: +.Bd -literal -offset indent +[Picard](/Picard) +.Ed +.Pp +A reference link, on the other hand, keeps the URL outside of the text +\(em usually in the footnotes. +Define a reference link anywhere in a document by a title in square +brackets +.Pq Dq \&[] +followed a colon +.Pq Dq \&: +followed by the corresponding URL or path, then an optional title. +.Bd -literal -offset indent +[link1]: https://www.bsd.lv/picard.jpg "Optional Title" +.Ed +.Pp +The title may be delimited with single quotes, double quotes, or +parenthesis. +It is only rendered in HTML output. +It encompasses all text until the last delimiter before the end of line, +so it may contain delimiters. +The title may be on its own line. +.Pp +Reference the link anywhere in your text using [text to the link] and +the same [link title], both in square brackets +.Pq Dq \&[] +next to each other: +.Bd -literal -offset indent +Text about [Captain Picard][link1]. +.Ed +.Pp +References need not follow the definition: both may appear anywhere in +relation to the other. +.Pp +Reference and inline links may be followed by PHP Extra attributes. +.Bd -literal -offset indent +Lowdown [homepage][home] or +[github](https://github.com/kristapsdz/lowdown){ .gh #link1 }. + +[home]: https://kristaps.bsd.lv/lowdown { .home #link2 } +.Ed +.Pp +For inline links, the open brace must immediately follow the closing +parenthesis. +Attributes are separated by spaces. +.Pp +Values with a leading period +.Pq Qq \&.class +are interpreted as HTML (CSS) or OpenDocument classses, and values with +a leading pound symbol +.Pq Qq \&#id +are interpreted as in-document link identifiers. +. +.Ss Automatic Links +Automatic links are links to URLs or emails addresses that do not require text +to links; rather, the full link or email address is inferred from the +text. +To invoke an automatic link, surround the link or email address with +angle brackets +.Pq Dq \&<> , +for example: +.Bd -literal -offset indent +<https://bsd.lv/> +<kristaps@localhost> +.Ed +. +.Ss Images +The image syntax resembles the links syntax. +The key difference is that images require an exclamation +mark +.Pq Dq \&! +before the text to link surrounded by square brackets +.Pq Dq \&[] . +.Bd -literal -offset indent + +.Ed +.Pp +Just like with links, there are both inline and reference image links. +.Pp +The inline style consists of an exclamation mark +.Pq Dq \&! +followed by the alternate text (which may be empty) surrounded by square +brackets +.Dq Pq \&[] +followed by the URL or the path in parentheses +.Dq Pq \&() . +.Pp +Unlike link text within square brackets, the alternate text is interpreted +as-is. +Thus, passing Markdown or HTML entities will be passed directly to output +(escaped according to output medium). +Alternate text +.Em may not +begin with the caret +.Pq Dq \&^ +or percent +.Pq Dq \&% , +else they will be interpreted as footnote or metadata references, +respectively. +.Pp +The parentheses may contain optional dimensions +.Pq Ar width Ns x Ns Op Ar height +starting with an equal sign or a quoted (single or double quotes) title +in any order after the URL or path. +These dimensions are pixel sizes. +.Bd -literal -offset indent + +.Ed +.Pp +The reference style definition consists of an image identifier +surrounded by square brackets +.Dq Pq \&[] +followed by a colon +.Dq Pq \&: +followed by an image URL or path to image and optional title attribute +in double quotation marks. +.Bd -literal -offset -indent +[image1]: https://bsd.lv/picard.jpg "Picture of Picard" +.Ed +.Pp +Invoking the image reference is as follows: +.Bd -literal -offset indent +A picture of the captain: ![Captain Picard][image1] +.Ed +.Pp +As with regular reference links, the definition and references may occur +anywhere in relation to each other. +.Pp +Images may also be followed by PHP Extra attributes for classes, +identifiers, and width and height. +Implementation of these depends on the output medium. +.Bd -literal -offset indent +{width=20% .class} +.Ed +.Pp +The open brace must immediately follow the closing parenthesis. +Attributes are separated by spaces. +.Pp +Value pairs +.Qq width=xx +and +.Qq height=xx +are interpreted as HTML (CSS), OpenOffice, or LaTeX dimensions. +These override set pixel dimensions. +Percentages are understood by all three media; otherwise, dimension +units are interpreted according to the medium. +.Pp +Values with a leading period +.Pq Qq \&.class +are interpreted as HTML (CSS) or OpenDocument classes, and values with a +leading pound symbol +.Pq Qq \&#id +are interpreted as in-document link identifiers. +. +.Ss Code +In addition to code blocks, inline code spans may be specified within +paragraphs or other block or span elements. +To invoke a span of code, surround the code using backtick quotes +.Pq Dq \&` . +.Bd -literal -offset indent +I need your IP address to scp you Picard pics. +Use the `ifconfig iwm0` command. +.Ed +.Pp +To include literal backticks +.Pq Dq \&` +within a code of span, +surround the code using multiple backticks +.Pq Dq \&(`` . +.Bd -literal -offset indent +``Here is a span of code with `backticks` inside it.`` +.Ed +.Pp +If you have a literal backtick at the start or end of the span of code, +leave a space between the literal backtick and the delimiting backticks. +.Bd -literal -offset indent +`` `So many backticks.` `` +.Ed +. +.Ss Footnote Reference +Footnotes are a MMD (Multimarkdown) extension of the basic syntax. +Footnote references point into a block-level +.Sx Footnote Definition . +They consist of the footnote name in square brackets, preceded by the +caret. +.Bd -literal -offset indent +P'tahk[^pt], tell me who you are, or I will kill you right here! +.Ed +.Pp +The footnote name is rendered as a number. +There may only be one footnote reference per definition. +If a footnote reference refers to an unknown definition, or if it has +already been used in referring to a definition, it is printed as-is. +Footnote definitions without references are not printed. +Nested footnotes are not allowed. +. +.Ss Superscripts +Uses the caret +.Pq Dq \(ha +to start a superscript. +The superscripted material continues to white-space or, if starting with +an open parenthesis, the close parenthesis. +.Bd -literal -offset indent +Though a great book, Q\(ha2 (Q\(ha(squared)) isn't Star Trek canon. +.Ed +. +.Ss HTML Content +While block-level HTML must begin with a recognised block-level HTML +element, span-level HTML need only begin and end with angle brackets, +and not contain a hyperlink. +.Pp +Thus, +.Li <p> , +.Li <Leonard Nimoy> , +and +.Li <span class="foo"> +are all accepted. +Even malformed content, such as +.Li <span class="foo> +is accepted, so long as it begins and ends with angle brackets. +. +.Sh ESCAPES +. +.Ss Automatic Escapes +Output is automatically escaped depending upon the medium. +For example, HTML output will properly escape angle brackets +.Dq Pq \&< +and ampersands +.Dq Pq \&& +to produce conformant HTML. +The same goes with +.Xr man 7 +output in escaping leading periods and so forth. +. +.Ss Backslash Escapes +Backslash escapes render literal characters that would otherwise invoke +a particular block or span element. +For example, surrounding a phrase with single asterisks renders it as an +emphasis: +.Bd -literal -offset indent +*Captain Picard* +.Ed +.Pp +However, if you want to invoke those italics as literal characters, +escape those asterisks using backslashes +.Pq Dq \e . +.Bd -literal -offset indent +\e*Captain Picard\e* +.Ed +.Pp +The following characters may be escaped to produce literal text: +.Pp +.Bl -tag -width Ds -compact -offset indent +.It Li * +asterisk +.It Li \e +backslash +.It Li ` +backtick +.It Li { +curly brace +.It Li \&! +exclamation mark +.It Li # +hash mark +.It Li - +minus sign +.It Li \&( +parentheses +.It Li \&. +period +.It Li + +plus sign +.It Li \&[ +square bracket +.It Li _ +underscore +.El +. +. +.Sh TYPOGRAPHY +.Xr lowdown 1 +renders certain character sequences for easier reading. +This is called +.Qq smart formatting . +The following character sequences are converted to output-specific +glyphs. +The table shows whether the sequences must be on word boundaries. +.Bd -filled -offset indent +.TS +l l l. +(c) copyright +(r) registered +(tm) trademark +(sm) service mark +\&... ellipsis +\&. . . ellipsis +--- em-dash +-- en-dash +1/4 one-quarter full word boundary +1/4th one-quarter full word boundary +3/4 three-quarters full word boundary +3/4th three-quarters full word boundary +3/4ths three-quarters full word boundary +1/2 one-half full word boundary +" left-double left word boundary +" right-double right word boundary +\&' left-single left word boundary +\&' right-single not left word boundary +.TE +.Ed +.Pp +Word boundaries are defined by white-space (including the end of blocks, +such as paragraphs, or end of file) or punctuation. +Left word boundary refers to white-space or a left parenthesis or square +bracket to the left of the sequence. +Right refers to white-space or punctuation to the right. +.Pp +Smart quotes (single and double) are not context aware: using a left or +right quote depends upon the characters surrounding the quote, not +whether a prior quote mark has already been used. +. +.Sh SEE ALSO +.Xr lowdown 1 +.Sh STANDARDS +The Markdown syntax accepted by +.Xr lowdown 1 +conforms to John Gruber's original Markdown implementation. +Extensions to the language are specifically noted. +They include: +.Bl -tag -width Ds +.It Lk http://commonmark.org CommonMark +.It Lk https://github.github.com/gfm GFM +.It Lk http://fletcherpenney.net/multimarkdown Multimarkdown +.It Lk https://michelf.ca/projects/php-markdown/extra PHP Extra +.El +.Sh AUTHORS +.An -nosplit +The +.Nm +reference was originally written by +.An Christina Sophonpanich +and is maintained by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/man/lowdown_buf.3 b/man/lowdown_buf.3 @@ -0,0 +1,126 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_BUF 3 +.Os +.Sh NAME +.Nm lowdown_buf +.Nd parse a Markdown buffer into formatted output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_buf +.Fa "const struct lowdown_opts *opts" +.Fa "const char *buf" +.Fa "size_t bufsz" +.Fa "char **ret" +.Fa "size_t *retsz" +.Fa "struct lowdown_metaq *metaq" +.Fc +.Sh DESCRIPTION +Parses a +.Xr lowdown 5 +buffer +.Fa buf +of size +.Fa bufsz +into an output buffer +.Fa ret +of size +.Fa retsz +according to a configuration +.Fa opts . +The output format is specified by +.Fa opts->type . +If +.Dv LOWDOWN_METADATA +is set in +.Fa opts->feat +and +.Fa metaq +is not +.Dv NULL , +.Fa metaq +is filled with metadata rendered in the given output format. +.Pp +The caller is responsible for freeing +.Fa ret +and +.Fa metaq . +.Sh RETURN VALUES +Returns zero on failure, non-zero on success. +On failure, the values pointed to by +.Fa res +and +.Fa rsz +are undefined. +.Sh EXAMPLES +The following parses standard input into a standalone HTML5 document. +It enables footnotes, autolinks, tables, superscript, strikethrough, +fenced codeblocks, commonmark, definition lists, extended image +attributes, and metadata processing. +The output passes through raw HTML and has smart typography. +.Bd -literal -offset indent +struct lowdown_opts opts; +char *buf = NULL, *obuf; +char rbuf[1024]; +size_t sz, bufsz = 0, obufsz; + +while (!(feof(stdin) || ferror(stdin))) { + sz = fread(rbuf, 1, sizeof(rbuf), stdin); + if (sz == 0) + err(1, "fread"); + buf = realloc(buf, bufsz + sz); + if (buf == NULL) + err(1, NULL); + memcpy(buf + bufsz, rbuf, sz); + bufsz += sz; +} + +if (ferror(stdin)) + err(1, "fread"); + +memset(&opts, 0, sizeof(struct lowdown_opts)); +opts.type = LOWDOWN_HTML; +opts.feat = LOWDOWN_FOOTNOTES | + LOWDOWN_AUTOLINK | + LOWDOWN_TABLES | + LOWDOWN_SUPER | + LOWDOWN_STRIKE | + LOWDOWN_FENCED | + LOWDOWN_COMMONMARK | + LOWDOWN_DEFLIST | + LOWDOWN_IMG_EXT | + LOWDOWN_METADATA; +opts.oflags = LOWDOWN_HTML_HEAD_IDS | + LOWDOWN_HTML_NUM_ENT | + LOWDOWN_HTML_OWASP | + LOWDOWN_SMARTY | + LOWDOWN_STANDALONE; +if (!lowdown_buf(&opts, buf, bufsz, &obuf, &obufsz, NULL)) + errx(1, "lowdown_buf"); +fwrite(buf, 1, bufsz, stdout); +free(buf); +free(obuf); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_metaq_free 3 diff --git a/man/lowdown_buf_diff.3 b/man/lowdown_buf_diff.3 @@ -0,0 +1,69 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2018, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_BUF_DIFF 3 +.Os +.Sh NAME +.Nm lowdown_buf +.Nd parse and diff Markdown buffers into formatted output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_buf_diff +.Fa "const struct lowdown_opts *opts" +.Fa "const char *new" +.Fa "size_t newsz" +.Fa "const char *old" +.Fa "size_t oldsz" +.Fa "char **ret" +.Fa "size_t *retsz" +.Fc +.Sh DESCRIPTION +Parses +.Xr lowdown 5 +buffers +.Fa new +of size +.Fa newsz +and +.Fa old +of size +.Fa oldsz +and produces an edit script in +.Fa ret +of size +.Fa retsz +according to configurations +.Fa opts . +The script defines differences from +.Fa old +to +.Fa new . +The output format is specified by +.Fa opts->type . +.Pp +The caller is responsible for freeing +.Fa ret . +.Sh RETURN VALUES +Returns zero on failure, non-zero on success. +Failure occurs from memory exhaustion. +.Sh SEE ALSO +.Xr lowdown 3 diff --git a/man/lowdown_buf_free.3 b/man/lowdown_buf_free.3 @@ -0,0 +1,43 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_BUF_FREE 3 +.Os +.Sh NAME +.Nm lowdown_buf_free +.Nd free a dynamic buffer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_buf_free +.Fa "struct lowdown_buf *buf" +.Fc +.Sh DESCRIPTION +Frees a dynamic buffer created with +.Xr lowdown_buf_new 3 . +If +.Va buf +is +.Dv NULL , +the function does nothing. +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_buf_new 3 diff --git a/man/lowdown_buf_new.3 b/man/lowdown_buf_new.3 @@ -0,0 +1,44 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_BUF_NEW 3 +.Os +.Sh NAME +.Nm lowdown_buf_new +.Nd allocate a dynamic buffer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft struct lowdown_buf * +.Fo lowdown_buf_new +.Fa "size_t growsz" +.Fc +.Sh DESCRIPTION +Allocates a dynamic buffer that grows in increments of size +.Fa growsz , +which may not be zero. +.Sh RETURN VALUES +Returns a pointer to a buffer or +.Dv NULL +on memory failure. +The pointer must be freed with +.Xr lowdown_buf_free 3 . +.Sh SEE ALSO +.Xr lowdown 3 diff --git a/man/lowdown_diff.3 b/man/lowdown_diff.3 @@ -0,0 +1,123 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_DIFF 3 +.Os +.Sh NAME +.Nm lowdown_diff +.Nd compute difference between parsed Markdown trees +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft "struct lowdown_node *" +.Fo lowdown_diff +.Fa "const struct lowdown_node *nold" +.Fa "const struct lowdown_node *nnew" +.Fa "size_t *maxn" +.Fc +.Sh DESCRIPTION +Computes the difference between two Markdown trees, the source +.Fa nold +and destination +.Fa nnew , +parsed by +.Xr lowdown_doc_parse 3 . +It uses the +.Vt enum lowdown_chng +type in the return tree's nodes to dictate insertions into and deletions +from +.Fa nold . +The +.Fa maxn +argument, if not +.Dv NULL , +is set to one greater than the highest node identifier of the returned +tree. +.Sh RETURN VALUES +Returns a pointer to the difference tree or +.Dv NULL +on memory exhaution. +The pointer must be freed with +.Xr lowdown_node_free 3 . +.Sh EXAMPLES +The following parses and compares +.Va old +of length +.Va osz +and +.Va new +of length +.Va nsz . +It first allocates the parser, then the document, then the renderer +(HTML is used in this case). +Then it passes output to the renderer, prints it, and cleans up +resources. +On any memory errors, it exits with +.Xr err 3 . +.Bd -literal -offset indent +struct lowdown_doc *doc; +struct lowdown_node *no, *nn, *diff; +struct lowdown_buf *ob; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((no = lowdown_doc_parse(doc, NULL, old, osz, NULL)) == NULL) + err(1, NULL); +if ((nn = lowdown_doc_parse(doc, NULL, new, nsz, NULL)) == NULL) + err(1, NULL); +if ((diff = lowdown_diff(no, nn, NULL)) == NULL) + err(1, NULL); +if ((rndr = lowdown_html_new(NULL)) == NULL) + err(1, NULL); +if ((ob = lowdown_buf_new(1024)) == NULL) + err(1, NULL); +if (!lowdown_html_rndr(ob, rndr, diff)) + err(1, NULL); + +fwrite(stdout, 1, ob->size, ob->data); + +lowdown_buf_free(ob); +lowdown_html_rndr_free(rndr); +lowdown_node_free(no); +lowdown_node_free(nn); +lowdown_node_free(diff); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 +.Rs +.%A Gregory Cobena +.%A Serge Abiteboul +.%A Amelie Marian +.%D 2002 +.%T "Detecting Changes in XML Documents" +.%U https://www.cs.rutgers.edu/~amelie/papers/2002/diff.pdf +.Re +.Rs +.%A Wu Sun +.%A Manber Udi +.%A Myers Gene +.%T "An O(NP) sequence comparison algorithm" +.%J Information Processing Letters +.%V Volume 35 +.%I Issue 6 +.%D 1990 +.Re diff --git a/man/lowdown_doc_free.3 b/man/lowdown_doc_free.3 @@ -0,0 +1,43 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_DOC_FREE 3 +.Os +.Sh NAME +.Nm lowdown_doc_free +.Nd free a Markdown parser instance +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_doc_free +.Fa "struct lowdown_doc *doc" +.Fc +.Sh DESCRIPTION +Frees a parser created with +.Xr lowdown_doc_new 3 . +If +.Va doc +is +.Dv NULL , +the function does nothing. +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_doc_new 3 diff --git a/man/lowdown_doc_new.3 b/man/lowdown_doc_new.3 @@ -0,0 +1,81 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_DOC_NEW 3 +.Os +.Sh NAME +.Nm lowdown_doc_new +.Nd allocate a Markdown parser +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft struct lowdown_doc * +.Fo lowdown_doc_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates a Markdown parser instance with configuration +.Fa opts . +If +.Fa opts +is +.Dv NULL , +all values are assumed to be zero except for the maximum parse depth, +which is fixed at 128. +.Pp +The returned instance may be used with multiple invocations of +.Xr lowdown_doc_parse 3 . +.Sh RETURN VALUES +Returns a pointer to the parser or +.Dv NULL +on memory allocation failure. +The returned pointer must be freed with a call to +.Xr lowdown_doc_free 3 . +.Pp +Any pointer values in +.Fa opts , +such as those in +.Va meta +and +.Va metaovr , +are copied over, so they need not persist after being passed to +.Fn lowdown_doc_new . +.Sh EXAMPLES +The following parses +.Va b +if length +.Va bsz +and throws away the result. +.Bd -literal -offset indent +struct lowdown_doc *doc; +struct lowdown_node *n; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); + +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_doc_free 3 , +.Xr lowdown_doc_parse 3 diff --git a/man/lowdown_doc_parse.3 b/man/lowdown_doc_parse.3 @@ -0,0 +1,110 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_DOC_PARSE 3 +.Os +.Sh NAME +.Nm lowdown_doc_parse +.Nd parse a Markdown document into an AST +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft "struct lowdown_node *" +.Fo lowdown_doc_parse +.Fa "struct lowdown_doc *doc" +.Fa "size_t *maxn" +.Fa "const char *input" +.Fa "size_t inputsz" +.Fa "struct lowdown_metaq *metaq" +.Fc +.Sh DESCRIPTION +Parse a +.Xr lowdown 5 +document +.Fa input +of length +.Fa inputsz +into an AST with the parser +.Fa doc . +The +.Fa maxn +argument, if not +.Dv NULL , +is set to one greater than the highest node identifier. +Its value is undefined if the function returns +.Dv NULL . +.Pp +If +.Fa metaq +is not +.Dv NULL , +it is filled in with document metadata (if any). +Metadata key names are canonicalised and duplicate names are ignored. +The results should be freed with +.Xr lowdown_metaq_free 3 . +.Pp +This function may be invoked multiple times with a single +.Fa doc +and different input. +.Sh RETURN VALUES +Returns the root of the parse tree or +.Dv NULL +on memory allocation failure. +If not +.Dv NULL , +the returned node is always of type +.Dv LOWDOWN_ROOT . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz . +It first allocates the parser, then the document, then the renderer +(HTML is used in this case). +Then it passes output to the renderer, prints it, and cleans up +resources. +On any memory errors, it exits with +.Xr err 3 . +.Bd -literal -offset indent +struct lowdown_doc *doc; +struct lowdown_node *n; +struct lowdown_buf *ob; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((rndr = lowdown_html_new(NULL)) == NULL) + err(1, NULL); +if ((ob = lowdown_buf_new(1024)) == NULL) + err(1, NULL); +if (!lowdown_html_rndr(ob, rndr, n)) + err(1, NULL); + +fwrite(stdout, 1, ob->size, ob->data); + +lowdown_buf_free(ob); +lowdown_html_rndr_free(rndr); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 diff --git a/man/lowdown_file.3 b/man/lowdown_file.3 @@ -0,0 +1,107 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_FILE 3 +.Os +.Sh NAME +.Nm lowdown_file +.Nd parse a Markdown file into formatted output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_file +.Fa "const struct lowdown_opts *opts" +.Fa "FILE *in" +.Fa "char **ret" +.Fa "size_t *retsz" +.Fa "struct lowdown_metaq *metaq" +.Fc +.Sh DESCRIPTION +Parses a +.Xr lowdown 5 +file stream +.Fa in +into an output buffer +.Fa ret +of size +.Fa retsz +according to configuration +.Fa opts . +The output format is specified by +.Fa opts->type . +If +.Dv LOWDOWN_METADATA +is set in +.Fa opts->feat +and +.Fa metaq +is not +.Dv NULL , +.Fa metaq +is filled with metadata rendered in the given output format. +.Pp +On success, the caller is responsible for freeing +.Fa ret +and +.Fa metaq . +.Sh RETURN VALUES +Returns zero on failure, non-zero on success. +On failure, the values pointed to by +.Fa res +and +.Fa rsz +are undefined. +.Sh EXAMPLES +The following parses standard input into a standalone HTML5 document. +It enables footnotes, autolinks, tables, superscript, strikethrough, +fenced codeblocks, commonmark, definition lists, extended image +attributes, and metadata processing. +The output passes through raw HTML and has smart typography. +.Bd -literal -offset indent +struct lowdown_opts opts; +char *buf; +size_t bufsz; + +memset(&opts, 0, sizeof(struct lowdown_opts)); +opts.type = LOWDOWN_HTML; +opts.feat = LOWDOWN_FOOTNOTES | + LOWDOWN_AUTOLINK | + LOWDOWN_TABLES | + LOWDOWN_SUPER | + LOWDOWN_STRIKE | + LOWDOWN_FENCED | + LOWDOWN_COMMONMARK | + LOWDOWN_DEFLIST | + LOWDOWN_IMG_EXT | + LOWDOWN_METADATA; +opts.oflags = LOWDOWN_HTML_HEAD_IDS | + LOWDOWN_HTML_NUM_ENT | + LOWDOWN_HTML_OWASP | + LOWDOWN_SMARTY | + LOWDOWN_STANDALONE; +if (!lowdown_file(&opts, stdin, &buf, &bufsz, NULL)) + errx(1, "lowdown_file"); +fwrite(buf, 1, bufsz, stdout); +free(buf); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_metaq_free 3 diff --git a/man/lowdown_file_diff.3 b/man/lowdown_file_diff.3 @@ -0,0 +1,64 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2018, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_FILE_DIFF 3 +.Os +.Sh NAME +.Nm lowdown_file_diff +.Nd parse and diff Markdown files into formatted output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_file_diff +.Fa "const struct lowdown_opts *opts" +.Fa "FILE *fnew" +.Fa "FILE *fold" +.Fa "char **ret" +.Fa "size_t *retsz" +.Fc +.Sh DESCRIPTION +Parses +.Xr lowdown 5 +file streams +.Fa fnew +and +.Fa fold +and produces an edit script in +.Fa ret +of size +.Fa retsz +according to configurations +.Fa opts . +The output format is specified by +.Fa opts->type . +.Pp +On success, the caller is responsible for freeing +.Fa ret . +.Sh RETURN VALUES +Returns zero on failure, non-zero on success. +Failure occurs when the file read failed or on memory exhaustion. +On failure, the contents of +.Fa ret +and +.Fa retsz +are undefined. +.Sh SEE ALSO +.Xr lowdown 3 diff --git a/man/lowdown_gemini_free.3 b/man/lowdown_gemini_free.3 @@ -0,0 +1,79 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_GEMINI_FREE 3 +.Os +.Sh NAME +.Nm lowdown_gemini_free +.Nd free a Markdown gemini renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_gemini_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the gemini renderer created with +.Xr lowdown_gemini_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in Gemini format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_gemini_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_gemini_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_gemini_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_gemini_new 3 +.Sh STANDARDS +The gemini format is documented in +.Lk https://gemini.circumlunar.space/docs/specification.html Project Gemini . +The version at the time of writing is 0.14.3. diff --git a/man/lowdown_gemini_new.3 b/man/lowdown_gemini_new.3 @@ -0,0 +1,102 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_GEMINI_NEW 3 +.Os +.Sh NAME +.Nm lowdown_gemini_new +.Nd allocate a Markdown gemini renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_gemini_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates a Gemini renderer using +.Fa opts->oflags , +or zero if +.Fa opts +is +.Dv NULL . +The returned pointer may be used with multiple invocations of +.Xr lowdown_gemini_rndr 3 +and must be freed with +.Xr lowdown_gemini_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_GEMINI_LINK_IN , +.Dv LOWDOWN_GEMINI_LINK_END , +.Dv LOWDOWN_GEMINI_LINK_NOREF , +.Dv LOWDOWN_GEMINI_LINK_ROMAN , +.Dv LOWDOWN_GEMINI_METADATA , +and +.Dv LOWDOWN_STANDALONE . +These are documented in +.Xr lowdown 3 . +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_gemini_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in Gemini format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_gemini_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_gemini_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_gemini_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_gemini_free 3 , +.Xr lowdown_gemini_rndr 3 +.Sh STANDARDS +The gemini format is documented in +.Lk https://gemini.circumlunar.space/docs/specification.html Project Gemini . +The version at the time of writing is 0.14.3. diff --git a/man/lowdown_gemini_rndr.3 b/man/lowdown_gemini_rndr.3 @@ -0,0 +1,98 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_GEMINI_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_gemini_rndr +.Nd render Markdown into gemini +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_gemini_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the gemini renderer +.Fa arg +as returned by +.Xr lowdown_gemini_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The caller is expected to have invoked +.Xr setlocale 3 +to a +.Qq UTF-8 +character encoding prior to using this function, otherwise UTF-8 +sequences will not be properly recognised. +This is used when formatting table column widths. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in Gemini format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_gemini_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_gemini_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_gemini_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_gemini_free 3 , +.Xr lowdown_gemini_new 3 +.Sh STANDARDS +The gemini format is documented in +.Lk https://gemini.circumlunar.space/docs/specification.html Project Gemini . +The version at the time of writing is 0.14.3. diff --git a/man/lowdown_html_free.3 b/man/lowdown_html_free.3 @@ -0,0 +1,77 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_HTML_FREE 3 +.Os +.Sh NAME +.Nm lowdown_html_free +.Nd free a Markdown HTML renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_html_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the HTML renderer created with +.Xr lowdown_html_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in HTML format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_html_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_html_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_html_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_html_new 3 +.Sh STANDARDS +The referenced HTML5 standard is +.Lk https://www.w3.org/TR/html52 HTML5.2 . +Output is compatible with prior HTML5 standards. diff --git a/man/lowdown_html_new.3 b/man/lowdown_html_new.3 @@ -0,0 +1,101 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_HTML_NEW 3 +.Os +.Sh NAME +.Nm lowdown_html_new +.Nd allocate a Markdown HTML renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_html_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates an HTML5 renderer using +.Fa opts->flags , +or zero if +.Fa opts +is +.Dv NULL . +This field is documented in +.Xr lowdown 3 . +The returned pointer may be used with multiple invocations of +.Xr lowdown_html_rndr 3 +and must be freed with +.Xr lowdown_html_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_HTML_OWASP , +.Dv LOWDOWN_HTML_NUM_ENT , +.Dv LOWDOWN_HTML_HEAD_IDS , +.Dv LOWDOWN_HTML_HARD_WRAP , +.Dv LOWDOWN_HTML_SKIP_HTML , +.Dv LOWDOWN_HTML_ESCAPE , +and +.Dv LOWDOWN_STANDALONE . +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_html_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in HTML format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_html_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_html_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_html_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_html_free 3 , +.Xr lowdown_html_rndr 3 +.Sh STANDARDS +The referenced HTML5 standard is +.Lk https://www.w3.org/TR/html52 HTML5.2 . +Output is compatible with prior HTML5 standards. diff --git a/man/lowdown_html_rndr.3 b/man/lowdown_html_rndr.3 @@ -0,0 +1,90 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_HTML_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_html_rndr +.Nd render Markdown into HTML +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_html_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the HTML renderer +.Fa arg +as returned by +.Xr lowdown_html_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The output consists of a UTF-8 HTML5 document. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in HTML format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_html_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_html_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_html_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_html_free 3 , +.Xr lowdown_html_new 3 +.Sh STANDARDS +The referenced HTML5 standard is +.Lk https://www.w3.org/TR/html52 HTML5.2 . +Output is compatible with prior HTML5 standards. diff --git a/man/lowdown_latex_free.3 b/man/lowdown_latex_free.3 @@ -0,0 +1,73 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_LATEX_FREE 3 +.Os +.Sh NAME +.Nm lowdown_latex_free +.Nd free a Markdown LaTeX renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_latex_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the LaTeX renderer created with +.Xr lowdown_latex_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in LaTeX format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_latex_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_latex_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_latex_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_latex_new 3 diff --git a/man/lowdown_latex_new.3 b/man/lowdown_latex_new.3 @@ -0,0 +1,93 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_LATEX_NEW 3 +.Os +.Sh NAME +.Nm lowdown_latex_new +.Nd allocate a Markdown LaTeX renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_latex_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates an LaTeX renderer using +.Fa opts->flags , +or zero if +.Fa opts +is +.Dv NULL . +This field is documented in +.Xr lowdown 3 . +The returned pointer may be used with multiple invocations of +.Xr lowdown_latex_rndr 3 +and must be freed with +.Xr lowdown_latex_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_LATEX_NUMBERED , +.Dv LOWDOWN_LATEX_SKIP_HTML , +and +.Dv LOWDOWN_STANDALONE . +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_latex_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in LaTeX format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_latex_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_latex_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_latex_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_latex_free 3 , +.Xr lowdown_latex_rndr 3 diff --git a/man/lowdown_latex_rndr.3 b/man/lowdown_latex_rndr.3 @@ -0,0 +1,84 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_LATEX_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_latex_rndr +.Nd render Markdown into LaTeX +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_latex_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the LaTeX renderer +.Fa arg +as returned by +.Xr lowdown_latex_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in LaTeX format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_latex_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_latex_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_latex_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_latex_free 3 , +.Xr lowdown_latex_new 3 diff --git a/man/lowdown_metaq_free.3 b/man/lowdown_metaq_free.3 @@ -0,0 +1,50 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_METAQ_FREE 3 +.Os +.Sh NAME +.Nm lowdown_metaq_free +.Nd free rendered metadata key-value pairs +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_metaq_free +.Fa "struct lowdown_metaq *q" +.Fc +.Sh DESCRIPTION +Frees rendered metadata +.Fa q +as created by +.Xr lowdown_buf 3 , +.Xr lowdown_buf_diff 3 , +.Xr lowdown_file 3 , +.Xr lowdown_file_diff 3 , +or the low-level rendering functions. +.Pp +If +.Fa q +is +.Dv NULL , +the function does nothing. +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_doc_new 3 diff --git a/man/lowdown_node_free.3 b/man/lowdown_node_free.3 @@ -0,0 +1,49 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_NODE_FREE 3 +.Os +.Sh NAME +.Nm lowdown_node_free +.Nd free a parsed Markdown tree +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_node_free +.Fa "struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Frees a parsed tree +.Fa n +as created with +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 , +including all of its descendents. +If +.Fa n +is +.Dv NULL , +the function does nothing. +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_diff 3 , +.Xr lowdown_doc_new 3 diff --git a/man/lowdown_nroff_free.3 b/man/lowdown_nroff_free.3 @@ -0,0 +1,97 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_NROFF_FREE 3 +.Os +.Sh NAME +.Nm lowdown_nroff_free +.Nd free a Markdown roff renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_nroff_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the roff renderer created with +.Xr lowdown_nroff_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Xr groff_ms 7 +format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_nroff_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_nroff_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_nroff_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_nroff_new 3 +.Pp +This uses both the original troff +.Ar man +macros for +.At v7 , +defined in +.Xr man 7 , +and the +.Ar man-ext +groff extensions. +Both are implemented in mandoc. +.Pp +The troff +.Ar ms +macros are defined in +.Xr groff_ms 7 , +with the +.Ar mspdf +groff extensions described in +.Qq Portable Document Format Publishing with GNU Troff +by Keith Marshall. +Neither are implemented in mandoc. diff --git a/man/lowdown_nroff_new.3 b/man/lowdown_nroff_new.3 @@ -0,0 +1,151 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_NROFF_NEW 3 +.Os +.Sh NAME +.Nm lowdown_nroff_new +.Nd allocate a roff renderer for lowdown documents +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_nroff_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates a roff renderer using +.Fa opts->oflags +and +.Fa opts->type , +or zero and +.Dv LOWDODN_NROFF , +respectively, if +.Fa opts +is +.Dv NULL . +These are documented in +.Xr lowdown 3 . +The returned pointer may be used with multiple invocations of +.Xr lowdown_nroff_rndr 3 +and must be freed with +.Xr lowdown_nroff_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_NROFF_GROFF , +.Dv LOWDOWN_NROFF_NOLINK , +.Dv LOWDOWN_NROFF_NUMBERED , +.Dv LOWDOWN_NROFF_SHORTLINK , +.Dv LOWDOWN_NROFF_SKIP_HTML , +and +.Dv LOWDOWN_STANDALONE . +.Pp +The values recognised in +.Fa opts->type +are +.Dv LOWDOWN_MAN +and +.Dv LOWDODN_NROFF : +anything else triggers +.Dv LOWDODN_NROFF . +.Pp +If +.Dv LOWDOWN_NROFF_GROFF +is set in +.Dv LOWDOWN_MAN +mode, macros from the +.Ar man-ext +package as well as the original +.Ar man +are used in output. +These are supported by both groff and mandoc. +If in +.Dv LOWDODN_NROFF +mode, GNU extensions to +.Ar ms +are used along with +.Ar mspdf . +These are only supported by groff. +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_nroff_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Xr groff_ms 7 +format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_nroff_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_nroff_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_nroff_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_nroff_free 3 , +.Xr lowdown_nroff_rndr 3 , +.Pp +This uses both the original troff +.Ar man +macros for +.At v7 , +defined in +.Xr man 7 , +and the +.Ar man-ext +groff extensions. +Both are implemented in mandoc. +.Pp +The troff +.Ar ms +macros are defined in +.Xr groff_ms 7 , +with the +.Ar mspdf +groff extensions described in +.Qq Portable Document Format Publishing with GNU Troff +by Keith Marshall. +Neither are implemented in mandoc. diff --git a/man/lowdown_nroff_rndr.3 b/man/lowdown_nroff_rndr.3 @@ -0,0 +1,114 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_NROFF_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_nroff_rndr +.Nd render Markdown into roff +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_nroff_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the roff renderer +.Fa arg +as returned by +.Xr lowdown_nroff_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The output consists of roff output using the +.Ar ms +or +.Ar man +macro packages. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Xr groff_ms 7 +format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_nroff_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_nroff_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_nroff_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_nroff_free 3 , +.Xr lowdown_nroff_new 3 +.Pp +This uses both the original troff +.Ar man +macros for +.At v7 , +defined in +.Xr man 7 , +and the +.Ar man-ext +groff extensions. +Both are implemented in mandoc. +.Pp +The troff +.Ar ms +macros are defined in +.Xr groff_ms 7 , +with the +.Ar mspdf +groff extensions described in +.Qq Portable Document Format Publishing with GNU Troff +by Keith Marshall. +Neither are implemented in mandoc. diff --git a/man/lowdown_odt_free.3 b/man/lowdown_odt_free.3 @@ -0,0 +1,76 @@ +.\" Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_ODT_FREE 3 +.Os +.Sh NAME +.Nm lowdown_odt_free +.Nd free a Markdown OpenDocument renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_odt_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the OpenDocument renderer created with +.Xr lowdown_odt_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Dq flat +OpenDocument format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_odt_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_odt_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_odt_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_odt_new 3 +.Sh STANDARDS +The referenced OpenDocument standard is +.Lk https://docs.oasis-open.org/office/OpenDocument/v1.3/ 1.3 . diff --git a/man/lowdown_odt_new.3 b/man/lowdown_odt_new.3 @@ -0,0 +1,106 @@ +.\" Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_ODT_NEW 3 +.Os +.Sh NAME +.Nm lowdown_odt_new +.Nd allocate a Markdown OpenDocument renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_odt_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates an OpenDocument renderer using +.Fa opts->flags , +or zero if +.Fa opts +is +.Dv NULL . +This field is documented in +.Xr lowdown 3 . +The returned pointer may be used with multiple invocations of +.Xr lowdown_odt_rndr 3 +and must be freed with +.Xr lowdown_odt_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_ODT_SKIP_HTML +and +.Dv LOWDOWN_STANDALONE . +.Pp +The +.Fa opts->odt.sty +field, if not +.Dv NULL , +overrides the default +.Li <office:font-face-decls> , +.Li <office:scripts> , +and +.Li <office:styles> +elements of the document styles. +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_odt_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Dq flat +OpenDocument format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_odt_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_odt_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_odt_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_odt_free 3 , +.Xr lowdown_odt_rndr 3 +.Sh STANDARDS +The referenced OpenDocument standard is +.Lk https://docs.oasis-open.org/office/OpenDocument/v1.3/ 1.3 . diff --git a/man/lowdown_odt_rndr.3 b/man/lowdown_odt_rndr.3 @@ -0,0 +1,89 @@ +.\" Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_ODT_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_odt_rndr +.Nd render Markdown into OpenDocument +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_odt_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the OpenDocument renderer +.Fa arg +as returned by +.Xr lowdown_odt_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The output consists of an OpenDocument document. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in +.Dq flat +OpenDocument format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_odt_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_odt_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_odt_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_odt_free 3 , +.Xr lowdown_odt_new 3 +.Sh STANDARDS +The referenced OpenDocument standard is +.Lk https://docs.oasis-open.org/office/OpenDocument/v1.3/ 1.3 . diff --git a/man/lowdown_term_free.3 b/man/lowdown_term_free.3 @@ -0,0 +1,78 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_TERM_FREE 3 +.Os +.Sh NAME +.Nm lowdown_term_free +.Nd free an Markdown terminal renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void +.Fo lowdown_term_free +.Fa "void *arg" +.Fc +.Sh DESCRIPTION +Frees the terminal renderer created with +.Xr lowdown_term_new 3 . +If +.Va arg +is +.Dv NULL , +the function does nothing. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in ANSI terminal format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_term_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_term_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_term_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_term_new 3 +.Sh STANDARDS +ANSI escape codes are described in ISO/IEC 6429, previously ECMA-48. diff --git a/man/lowdown_term_new.3 b/man/lowdown_term_new.3 @@ -0,0 +1,102 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_TERM_NEW 3 +.Os +.Sh NAME +.Nm lowdown_term_new +.Nd allocate a Markdown terminal renderer +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft void * +.Fo lowdown_term_new +.Fa "const struct lowdown_opts *opts" +.Fc +.Sh DESCRIPTION +Allocates a terminal renderer using +.Fa opts->cols , +.Fa opts->hmargin , +.Fa opts->vmargin , +and +.Fa opts->oflags , +or 80 and all others zero, respectively, if +.Fa opts +is +.Dv NULL . +These fields are documented in +.Xr lowdown 3 . +The returned pointer may be used with multiple invocations of +.Xr lowdown_term_rndr 3 +and must be freed with +.Xr lowdown_term_free 3 . +.Pp +The bits recognised in +.Fa opts->oflags +are +.Dv LOWDOWN_TERM_SHORTLINK , +.Dv LOWDOWN_TERM_NOCOLOUR , +and +.Dv LOWDOWN_TERM_NOLINK . +.Sh RETURN VALUES +Returns a pointer to the renderer or +.Dv NULL +on memory failure. +The returned pointer must be freed with +.Xr lowdown_term_free 3 . +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs in ANSI terminal format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_term_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_term_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_term_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_term_free 3 , +.Xr lowdown_term_rndr 3 +.Sh STANDARDS +ANSI escape codes are described in ISO/IEC 6429, previously ECMA-48. diff --git a/man/lowdown_term_rndr.3 b/man/lowdown_term_rndr.3 @@ -0,0 +1,99 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_TERM_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_term_rndr +.Nd render Markdown into terminal output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_term_rndr +.Fa "struct lowdown_buf *out" +.Fa "void *arg" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 +using the terminal renderer +.Fa arg +as returned by +.Xr lowdown_term_new 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The output consists of UTF-8 encoded characters and ANSI (really ISO/IEC +6429) escape sequences. +.Pp +The caller is expected to have invoked +.Xr setlocale 3 +to a +.Qq UTF-8 +character encoding prior to using this function, otherwise UTF-8 +sequences will not be properly recognised. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va bi +of length +.Va bsz +and outputs in ANSI terminal format. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) + err(1, NULL); + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if ((rndr = lowdown_term_new(NULL)) == NULL) + err(1, NULL); +if (!lowdown_term_rndr(out, rndr, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_term_free(rndr); +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 , +.Xr lowdown_term_free 3 , +.Xr lowdown_term_new 3 +.Sh STANDARDS +ANSI escape codes are described in ISO/IEC 6429, previously ECMA-48. diff --git a/man/lowdown_tree_rndr.3 b/man/lowdown_tree_rndr.3 @@ -0,0 +1,81 @@ +.\" $Id$ +.\" +.\" Copyright (c) 2017, 2021 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt LOWDOWN_TREE_RNDR 3 +.Os +.Sh NAME +.Nm lowdown_tree_rndr +.Nd render Markdown into debugging output +.Sh LIBRARY +.Lb liblowdown +.Sh SYNOPSIS +.In sys/queue.h +.In stdio.h +.In lowdown.h +.Ft int +.Fo lowdown_tree_rndr +.Fa "struct lowdown_buf *out" +.Fa "const struct lowdown_node *n" +.Fc +.Sh DESCRIPTION +Renders a node tree +.Fa n +created by +.Xr lowdown_doc_parse 3 +or +.Xr lowdown_diff 3 . +The output is written into +.Fa out , +which must be initialised and freed by the caller. +.Pp +The output consists of an UTF-8 tree. +The format is not standardised and subject to change. +.Pp +Unlike other renderers, +.Fn lowdown_tree_rndr +accepts no options and thus has no context. +.Sh RETURN VALUES +Returns zero on failure to allocate memory, non-zero on success. +.Sh EXAMPLES +The following parses +.Va b +of length +.Va bsz +and outputs the parse tree. +.Bd -literal -offset indent +struct lowdown_buf *out; +struct lowdown_doc *doc; +struct lowdown_node *n; +void *rndr; + +if ((doc = lowdown_doc_new(NULL)) == NULL) + err(1, NULL); +if ((n = lowdown_doc_parse(doc, NULL, b, bsz, NULL)) == NULL) + err(1, NULL); +if ((out = lowdown_buf_new(256)) == NULL) + err(1, NULL); +if (!lowdown_tree_rndr(out, n)) + err(1, NULL); + +fwrite(out->data, 1, out->size, stdout); + +lowdown_buf_free(out); +lowdown_node_free(n); +lowdown_doc_free(doc); +.Ed +.Sh SEE ALSO +.Xr lowdown 3 diff --git a/nroff.c b/nroff.c @@ -0,0 +1,1940 @@ +/* $Id$ */ +/* + * Copyright (c) 2008, Natacha Porté + * Copyright (c) 2011, Vicent Martí + * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors + * Copyright (c) 2016--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +enum nfont { + NFONT_ITALIC = 0, /* italic */ + NFONT_BOLD, /* bold */ + NFONT_FIXED, /* fixed-width */ + NFONT__MAX +}; + +struct nroff { + struct hentryq headers_used; /* headers we've seen */ + int man; /* whether man(7) */ + int post_para; /* for choosing PP/LP */ + unsigned int flags; /* output flags */ + ssize_t headers_offs; /* header offset */ + enum nfont fonts[NFONT__MAX]; /* see bqueue_font() */ + struct bnodeq **foots; /* footnotes */ + size_t footsz; /* footnote size */ +}; + +enum bscope { + BSCOPE_BLOCK = 0, + BSCOPE_SPAN, + BSCOPE_PDFHREF, + BSCOPE_LITERAL, + BSCOPE_FONT, + BSCOPE_COLOUR +}; + +/* + * Instead of writing directly into the output buffer, we write + * temporarily into bnodes, which are converted into output. These + * nodes are aware of whether they need surrounding newlines. + */ +struct bnode { + char *nbuf; /* (safe) 1st data */ + char *buf; /* (unsafe) 2nd data */ + char *nargs; /* (safe) 1st args */ + char *args; /* (unsafe) 2nd args */ + int close; /* BNODE_COLOUR/FONT */ + int tblhack; /* BSCOPE_SPAN */ + enum bscope scope; /* scope */ + unsigned int font; /* if BNODE_FONT */ +#define BFONT_ITALIC 0x01 +#define BFONT_BOLD 0x02 +#define BFONT_FIXED 0x04 + unsigned int colour; /* if BNODE_COLOUR */ +#define BFONT_BLUE 0x01 +#define BFONT_RED 0x02 + TAILQ_ENTRY(bnode) entries; +}; + +TAILQ_HEAD(bnodeq, bnode); + +/* + * Escape unsafe text into roff output such that no roff fetaures are + * invoked by the text (macros, escapes, etc.). + * If "oneline" is non-zero, newlines are replaced with spaces. + * If "literal", doesn't strip leading space. + * Return zero on failure, non-zero on success. + */ +static int +hesc_nroff(struct lowdown_buf *ob, const char *data, + size_t size, int oneline, int literal, int esc) +{ + size_t i = 0; + unsigned char ch; + + if (size == 0) + return 1; + + if (!esc && oneline) { + assert(!literal); + for (i = 0; i < size; i++) { + ch = data[i] == '\n' ? ' ' : data[i]; + if (!hbuf_putc(ob, ch)) + return 0; + if (ch != ' ') + continue; + while (i < size && isspace((unsigned char)data[i])) + i++; + i--; + } + return 1; + } else if (!esc) + return hbuf_put(ob, data, size); + + /* Strip leading whitespace. */ + + if (!literal && ob->size > 0 && ob->data[ob->size - 1] == '\n') + while (i < size && (data[i] == ' ' || data[i] == '\n')) + i++; + + /* + * According to mandoc_char(7), we need to escape the backtick, + * single apostrophe, and tilde or else they'll be considered as + * special Unicode output. + * Slashes need to be escaped too. + * We also escape double-quotes because this text might be used + * within quoted macro arguments. + */ + + for ( ; i < size; i++) + switch (data[i]) { + case '^': + if (!HBUF_PUTSL(ob, "\\(ha")) + return 0; + break; + case '~': + if (!HBUF_PUTSL(ob, "\\(ti")) + return 0; + break; + case '`': + if (!HBUF_PUTSL(ob, "\\(ga")) + return 0; + break; + case '"': + if (!HBUF_PUTSL(ob, "\\(dq")) + return 0; + break; + case '\n': + if (!hbuf_putc(ob, oneline ? ' ' : '\n')) + return 0; + if (literal) + break; + + /* Prevent leading spaces on the line. */ + + while (i + 1 < size && + (data[i + 1] == ' ' || + data[i + 1] == '\n')) + i++; + break; + case '\\': + if (!HBUF_PUTSL(ob, "\\e")) + return 0; + break; + case '\'': + case '.': + if (!oneline && + ob->size > 0 && + ob->data[ob->size - 1] == '\n' && + !HBUF_PUTSL(ob, "\\&")) + return 0; + /* FALLTHROUGH */ + default: + if (!hbuf_putc(ob, data[i])) + return 0; + break; + } + + return 1; +} + +static const char * +nstate_colour_buf(unsigned int ft) +{ + static char fonts[10]; + + fonts[0] = '\0'; + if (ft == BFONT_BLUE) + strlcat(fonts, "blue", sizeof(fonts)); + else if (ft == BFONT_RED) + strlcat(fonts, "red", sizeof(fonts)); + else + strlcat(fonts, "black", sizeof(fonts)); + return fonts; +} + +/* + * For compatibility with traditional troff, return non-block font code + * using the correct sequence of \fX, \f(xx, and \f[xxx]. + */ +static const char * +nstate_font_buf(unsigned int ft, int blk) +{ + static char fonts[10]; + char *cp = fonts; + size_t len = 0; + + if (ft & BFONT_FIXED) + len++; + if (ft & BFONT_BOLD) + len++; + if (ft & BFONT_ITALIC) + len++; + if (ft == 0) + len++; + + if (!blk && len == 3) + (*cp++) = '['; + else if (!blk && len == 2) + (*cp++) = '('; + + if (ft & BFONT_FIXED) + (*cp++) = 'C'; + if (ft & BFONT_BOLD) + (*cp++) = 'B'; + if (ft & BFONT_ITALIC) + (*cp++) = 'I'; + if (ft == 0) + (*cp++) = 'R'; + + if (!blk && len == 3) + (*cp++) = ']'; + + (*cp++) = '\0'; + return fonts; +} + +static int +bqueue_colour(struct bnodeq *bq, enum lowdown_chng chng, int close) +{ + struct bnode *bn; + + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return 0; + TAILQ_INSERT_TAIL(bq, bn, entries); + bn->scope = BSCOPE_COLOUR; + bn->close = close; + bn->colour = close ? 0 : + chng == LOWDOWN_CHNG_INSERT ? + BFONT_BLUE : BFONT_RED; + return 1; +} + +static int +bqueue_font(const struct nroff *st, struct bnodeq *bq, int close) +{ + struct bnode *bn; + + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return 0; + TAILQ_INSERT_TAIL(bq, bn, entries); + bn->scope = BSCOPE_FONT; + bn->close = close; + if (st->fonts[NFONT_FIXED]) + bn->font |= BFONT_FIXED; + if (st->fonts[NFONT_BOLD]) + bn->font |= BFONT_BOLD; + if (st->fonts[NFONT_ITALIC]) + bn->font |= BFONT_ITALIC; + return 1; +} + +static struct bnode * +bqueue_node(struct bnodeq *bq, enum bscope scope, const char *text) +{ + struct bnode *bn; + + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return NULL; + bn->scope = scope; + if (text != NULL && (bn->nbuf = strdup(text)) == NULL) { + free(bn); + return NULL; + } + TAILQ_INSERT_TAIL(bq, bn, entries); + return bn; +} + +static struct bnode * +bqueue_span(struct bnodeq *bq, const char *text) +{ + + return bqueue_node(bq, BSCOPE_SPAN, text); +} + +static struct bnode * +bqueue_block(struct bnodeq *bq, const char *text) +{ + + return bqueue_node(bq, BSCOPE_BLOCK, text); +} + +static struct bnode * +bqueue_semiblock(struct bnodeq *bq, const char *text) +{ + + return bqueue_node(bq, BSCOPE_PDFHREF, text); +} + +static void +bnode_free(struct bnode *bn) +{ + + free(bn->args); + free(bn->nargs); + free(bn->nbuf); + free(bn->buf); + free(bn); +} + +static void +bqueue_free(struct bnodeq *bq) +{ + struct bnode *bn; + + while ((bn = TAILQ_FIRST(bq)) != NULL) { + TAILQ_REMOVE(bq, bn, entries); + bnode_free(bn); + } +} + +static void +bqueue_strip_paras(struct bnodeq *bq) +{ + struct bnode *bn; + + while ((bn = TAILQ_FIRST(bq)) != NULL) { + if (bn->scope != BSCOPE_BLOCK || bn->nbuf == NULL) + break; + if (strcmp(bn->nbuf, ".PP") && + strcmp(bn->nbuf, ".IP") && + strcmp(bn->nbuf, ".LP")) + break; + TAILQ_REMOVE(bq, bn, entries); + bnode_free(bn); + } +} + +static int +bqueue_flush(struct lowdown_buf *ob, const struct bnodeq *bq, int esc) +{ + const struct bnode *bn, *chk, *next; + const char *cp; + int nextblk; + + TAILQ_FOREACH(bn, bq, entries) { + nextblk = 0; + + if (bn->scope == BSCOPE_PDFHREF && + ob->size > 0 && + ob->data[ob->size - 1] != '\n' && + !hbuf_puts(ob, "\\c")) + return 0; + + /* + * Block scopes start with a newline. + * Also have colours use their own block, as otherwise + * (bugs in groff?) inline colour selection after a + * hyperlink macro causes line breaks. + * Besides, having spaces around changing colour, which + * indicates differences, improves readability. + */ + + if (bn->scope == BSCOPE_BLOCK || + bn->scope == BSCOPE_PDFHREF || + bn->scope == BSCOPE_COLOUR) { + if (ob->size > 0 && + ob->data[ob->size - 1] != '\n' && + !hbuf_putc(ob, '\n')) + return 0; + nextblk = 1; + } + + /* + * Fonts can either be macros or inline depending upon + * where they set relative to a macro block. + */ + + if (bn->scope == BSCOPE_FONT) { + chk = bn->close ? + TAILQ_PREV(bn, bnodeq, entries) : + TAILQ_NEXT(bn, entries); + if (chk != NULL && + (chk->scope == BSCOPE_PDFHREF || + chk->scope == BSCOPE_BLOCK)) { + if (ob->size > 0 && + ob->data[ob->size - 1] != '\n' && + !hbuf_putc(ob, '\n')) + return 0; + nextblk = 1; + } + } + + /* Print font and colour escapes. */ + + if (bn->scope == BSCOPE_FONT && nextblk) { + if (!hbuf_printf(ob, ".ft %s", + nstate_font_buf(bn->font, nextblk))) + return 0; + } else if (bn->scope == BSCOPE_FONT) { + if (!hbuf_printf(ob, "\\f%s", + nstate_font_buf(bn->font, nextblk))) + return 0; + } else if (bn->scope == BSCOPE_COLOUR) { + assert(nextblk); + if (!hbuf_printf(ob, ".gcolor %s", + nstate_colour_buf(bn->colour))) + return 0; + } + + /* + * A "tblhack" is used by a span macro to indicate + * that it should start its own line, but that data + * continues to flow after it. This is only used in + * tables with T}, at this point. + */ + + if (bn->scope == BSCOPE_SPAN && bn->tblhack && + ob->size > 0 && ob->data[ob->size - 1] != '\n') + if (!hbuf_putc(ob, '\n')) + return 0; + + /* + * If we're a span, double-check to see whether we + * should introduce a line with an escape. + */ + + if (bn->scope == BSCOPE_SPAN && + bn->nbuf != NULL && ob->size > 0 && + ob->data[ob->size - 1] == '\n' && + (bn->nbuf[0] == '.' || bn->nbuf[0] == '\'') && + !HBUF_PUTSL(ob, "\\&")) + return 0; + + /* Safe data need not be escaped. */ + + if (bn->nbuf != NULL && !hbuf_puts(ob, bn->nbuf)) + return 0; + + /* Unsafe data must be escaped. */ + + if (bn->scope == BSCOPE_LITERAL) { + assert(bn->buf != NULL); + if (!hesc_nroff(ob, bn->buf, + strlen(bn->buf), 0, 1, esc)) + return 0; + } else if (bn->buf != NULL) + if (!hesc_nroff(ob, bn->buf, + strlen(bn->buf), 0, 0, esc)) + return 0; + + if (bn->scope == BSCOPE_PDFHREF && + (next = TAILQ_NEXT(bn, entries)) != NULL && + next->scope == BSCOPE_SPAN && + next->buf != NULL && + next->buf[0] != ' ' && + next->buf[0] != '\n' && + !HBUF_PUTSL(ob, " -A \"\\c\"")) + return 0; + + /* + * Macro arguments follow after space. For links, these + * must all be printed on the same line. + */ + + if (bn->nargs != NULL && + (bn->scope == BSCOPE_BLOCK || + bn->scope == BSCOPE_PDFHREF)) { + assert(nextblk); + if (!hbuf_putc(ob, ' ')) + return 0; + for (cp = bn->nargs; *cp != '\0'; cp++) + if (!hbuf_putc(ob, + *cp == '\n' ? ' ' : *cp)) + return 0; + } + + if (bn->args != NULL) { + assert(nextblk); + assert(bn->scope == BSCOPE_BLOCK || + bn->scope == BSCOPE_PDFHREF); + if (!hbuf_putc(ob, ' ')) + return 0; + if (!hesc_nroff(ob, bn->args, + strlen(bn->args), 1, 0, esc)) + return 0; + } + + /* Finally, trailing newline. */ + + if (nextblk && ob->size > 0 && + ob->data[ob->size - 1] != '\n' && + !hbuf_putc(ob, '\n')) + return 0; + } + + return 1; +} + +/* + * Convert a link into a short-link and place the escaped output into a + * returned string. + * Returns NULL on memory allocation failure. + */ +static char * +hbuf2shortlink(const struct lowdown_buf *link) +{ + struct lowdown_buf *tmp = NULL, *slink = NULL; + char *ret = NULL; + + if ((tmp = hbuf_new(32)) == NULL) + goto out; + if ((slink = hbuf_new(32)) == NULL) + goto out; + if (!hbuf_shortlink(tmp, link)) + goto out; + if (!hesc_nroff(slink, tmp->data, tmp->size, 1, 0, 1)) + goto out; + ret = strndup(slink->data, slink->size); +out: + hbuf_free(tmp); + hbuf_free(slink); + return ret; +} + +/* + * Manage hypertext linking with the groff "pdfhref" macro or simply + * using italics. XXX: use italics because the UR/UE macro doesn't + * support leading un-spaced content, so "x[foo](https://foo.com)y" + * wouldn't work. Until a solution is found, let's just italicise the + * link text (or link, if no text is found). Return FALSE on error + * (memory), TRUE on success. + */ +static int +putlink(struct bnodeq *obq, struct nroff *st, + const struct lowdown_buf *link, + const struct lowdown_buf *id, + struct bnodeq *bq, enum halink_type type) +{ + struct lowdown_buf *ob = NULL; + struct bnode *bn; + size_t i; + int rc = 0, local = 0; + + /* + * For -Tman or without .pdfhref, format the link as-is, with + * text then link, or use the various shorteners. + */ + + if (st->man || !(st->flags & LOWDOWN_NROFF_GROFF)) { + if (bq == NULL) { + st->fonts[NFONT_ITALIC]++; + if (!bqueue_font(st, obq, 0)) + goto out; + if ((bn = bqueue_span(obq, NULL)) == NULL) + goto out; + if (st->flags & LOWDOWN_NROFF_SHORTLINK) { + bn->nbuf = hbuf2shortlink(link); + if (bn->nbuf == NULL) + goto out; + } else { + bn->buf = strndup(link->data, link->size); + if (bn->buf == NULL) + goto out; + } + st->fonts[NFONT_ITALIC]--; + if (!bqueue_font(st, obq, 1)) + goto out; + rc = 1; + goto out; + } + st->fonts[NFONT_BOLD]++; + if (!bqueue_font(st, obq, 0)) + goto out; + TAILQ_CONCAT(obq, bq, entries); + st->fonts[NFONT_BOLD]--; + if (!bqueue_font(st, obq, 1)) + goto out; + if (st->flags & LOWDOWN_NROFF_NOLINK) { + rc = 1; + goto out; + } + if (bqueue_span(obq, " <") == NULL) + goto out; + st->fonts[NFONT_ITALIC]++; + if (!bqueue_font(st, obq, 0)) + goto out; + if ((bn = bqueue_span(obq, NULL)) == NULL) + goto out; + if (st->flags & LOWDOWN_NROFF_SHORTLINK) { + bn->nbuf = hbuf2shortlink(link); + if (bn->nbuf == NULL) + goto out; + } else { + bn->buf = strndup(link->data, link->size); + if (bn->buf == NULL) + goto out; + } + st->fonts[NFONT_ITALIC]--; + if (!bqueue_font(st, obq, 1)) + goto out; + if (bqueue_span(obq, ">") == NULL) + goto out; + rc = 1; + goto out; + } + + /* Otherwise, use .pdfhref. */ + + if ((ob = hbuf_new(32)) == NULL) + goto out; + + /* Encode the URL. */ + + local = type != HALINK_EMAIL && + link->size && link->data[0] == '#'; + + if (!HBUF_PUTSL(ob, "-D ")) + goto out; + if (type == HALINK_EMAIL && !HBUF_PUTSL(ob, "mailto:")) + goto out; + for (i = local ? 1 : 0; i < link->size; i++) { + if (!isprint((unsigned char)link->data[i]) || + strchr("<>\\^`{|}\"", link->data[i]) != NULL) { + if (!hbuf_printf(ob, "%%%.2X", link->data[i])) + goto out; + } else if (!hbuf_putc(ob, link->data[i])) + goto out; + } + + if (!HBUF_PUTSL(ob, " -- ")) + goto out; + if (bq == NULL && !hbuf_putb(ob, link)) + goto out; + else if (bq != NULL && !bqueue_flush(ob, bq, 1)) + goto out; + + /* + * If we have an ID, emit it before the link part. This is + * important because this isn't printed, so using "-A \c" will + * have no effect, so that's used on the subsequent link. + */ + + if (id != NULL && id->size > 0) { + bn = bqueue_semiblock(obq, ".pdfhref M"); + if (bn == NULL) + goto out; + bn->args = strndup(id->data, id->size); + if (bn->args == NULL) + goto out; + } + + /* Finally, emit the link contents. */ + + bn = local ? + bqueue_semiblock(obq, ".pdfhref L") : + bqueue_semiblock(obq, ".pdfhref W"); + if (bn == NULL) + goto out; + if ((bn->nargs = strndup(ob->data, ob->size)) == NULL) + goto out; + + rc = 1; +out: + hbuf_free(ob); + return rc; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_autolink(struct nroff *st, struct bnodeq *obq, + const struct rndr_autolink *param) +{ + + return putlink(obq, st, ¶m->link, NULL, NULL, param->type); +} + +static int +rndr_blockcode(const struct nroff *st, struct bnodeq *obq, + const struct rndr_blockcode *param) +{ + struct bnode *bn; + + /* + * XXX: intentionally don't use LD/DE because it introduces + * vertical space. This means that subsequent blocks + * (paragraphs, etc.) will have a double-newline. + */ + + if (bqueue_block(obq, ".LP") == NULL) + return 0; + + if (st->man && (st->flags & LOWDOWN_NROFF_GROFF)) { + if (bqueue_block(obq, ".EX") == NULL) + return 0; + } else { + if (bqueue_block(obq, ".nf") == NULL) + return 0; + if (bqueue_block(obq, ".ft CR") == NULL) + return 0; + } + + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return 0; + TAILQ_INSERT_TAIL(obq, bn, entries); + bn->scope = BSCOPE_LITERAL; + bn->buf = strndup(param->text.data, param->text.size); + if (bn->buf == NULL) + return 0; + + if (st->man && (st->flags & LOWDOWN_NROFF_GROFF)) + return bqueue_block(obq, ".EE") != NULL; + + if (bqueue_block(obq, ".ft") == NULL) + return 0; + return bqueue_block(obq, ".fi") != NULL; +} + +static int +rndr_definition_title(struct bnodeq *obq, struct bnodeq *bq) +{ + + if (bqueue_block(obq, ".LP") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + return 1; +} + +static int +rndr_definition_data(struct bnodeq *obq, struct bnodeq *bq) +{ + /* + * The IP creates an empty vertical space til I figure out a + * better way to do hanging lists, so account for it by backing + * up first. + * + * XXX: this produces different results on mandoc and groff as + * of 2022-02-19: mandoc backs up one space, while groff backs + * up two. The groff behaviour is what we want, so that the + * text is flush on the next line, but this is good enough. + */ + + if (bqueue_block(obq, ".if n \\\n.sp -1v") == NULL) + return 0; + if (bqueue_block(obq, ".if t \\\n.sp -0.25v\n") == NULL) + return 0; + if (bqueue_block(obq, ".IP \"\" \\*(PI") == NULL) + return 0; + + /* Strip out leading paragraphs. */ + + bqueue_strip_paras(bq); + TAILQ_CONCAT(obq, bq, entries); + return 1; +} + +static int +rndr_list(struct nroff *st, struct bnodeq *obq, + const struct lowdown_node *n, struct bnodeq *bq) +{ + /* + * If we have a nested list, we need to use RS/RE to indent the + * nested component. Otherwise the "IP" used for the titles and + * contained paragraphs won't indent properly. + */ + + for (n = n->parent; n != NULL; n = n->parent) + if (n->type == LOWDOWN_LISTITEM) + break; + + if (n != NULL && bqueue_block(obq, ".RS") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + if (n != NULL && bqueue_block(obq, ".RE") == NULL) + return 0; + + st->post_para = 1; + return 1; +} + +static int +rndr_blockquote(struct nroff *st, + struct bnodeq *obq, struct bnodeq *bq) +{ + + if (bqueue_block(obq, ".RS") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + st->post_para = 1; + return bqueue_block(obq, ".RE") != NULL; +} + +static int +rndr_codespan(struct bnodeq *obq, const struct rndr_codespan *param) +{ + struct bnode *bn; + + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + bn->buf = strndup(param->text.data, param->text.size); + return bn->buf != NULL; +} + +static int +rndr_linebreak(struct bnodeq *obq) +{ + + return bqueue_block(obq, ".br") != NULL; +} + +static int +rndr_header(struct nroff *st, struct bnodeq *obq, + struct bnodeq *bq, const struct lowdown_node *n) +{ + ssize_t level; + struct bnode *bn; + struct lowdown_buf *buf = NULL; + const struct lowdown_buf *nbuf; + int rc = 0; + + level = (ssize_t)n->rndr_header.level + st->headers_offs; + if (level < 1) + level = 1; + + /* + * For man(7), we use SH for the first-level section, SS for + * other sections. TODO: use PP then italics or something for + * third-level etc. + */ + + if (st->man) { + bn = level == 1 ? + bqueue_block(obq, ".SH") : + bqueue_block(obq, ".SS"); + if (bn == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + st->post_para = 1; + return 1; + } + + /* + * If we're using ms(7) w/groff extensions and w/o numbering, + * used the numbered version of the SH macro. + * If we're numbered ms(7), use NH. + */ + + bn = (st->flags & LOWDOWN_NROFF_NUMBERED) ? + bqueue_block(obq, ".NH") : bqueue_block(obq, ".SH"); + if (bn == NULL) + goto out; + + if ((st->flags & LOWDOWN_NROFF_NUMBERED) || + (st->flags & LOWDOWN_NROFF_GROFF)) + if (asprintf(&bn->nargs, "%zd", level) == -1) { + bn->nargs = NULL; + goto out; + } + + TAILQ_CONCAT(obq, bq, entries); + st->post_para = 1; + + /* + * Used in -mspdf output for creating a TOC and intra-document + * linking. + */ + + if (st->flags & LOWDOWN_NROFF_GROFF) { + if ((buf = hbuf_new(32)) == NULL) + goto out; + if (!hbuf_extract_text(buf, n)) + goto out; + + if ((bn = bqueue_block(obq, ".pdfhref")) == NULL) + goto out; + if (asprintf(&bn->nargs, "O %zd", level) == -1) { + bn->nargs = NULL; + goto out; + } + + /* + * No need to quote: quotes will be converted by + * escaping into roff. + */ + + bn->args = strndup(buf->data, buf->size); + if (bn->args == NULL) + goto out; + + if ((bn = bqueue_block(obq, ".pdfhref M")) == NULL) + goto out; + + /* + * If the identifier comes from the user, we need to + * escape it accordingly; otherwise, use it directly as + * the hbuf_id() function will take care of it. + */ + + if (n->rndr_header.attr_id.size) { + bn->args = strndup + (n->rndr_header.attr_id.data, + n->rndr_header.attr_id.size); + if (bn->args == NULL) + goto out; + } else { + nbuf = hbuf_id(buf, NULL, &st->headers_used); + if (nbuf == NULL) + goto out; + bn->nargs = strndup(nbuf->data, nbuf->size); + if (bn->nargs == NULL) + goto out; + } + } + + rc = 1; +out: + hbuf_free(buf); + return rc; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static ssize_t +rndr_link(struct nroff *st, struct bnodeq *obq, struct bnodeq *bq, + const struct rndr_link *param) +{ + + return putlink(obq, st, ¶m->link, + ¶m->attr_id, bq, HALINK_NORMAL); +} + +static int +rndr_listitem(struct bnodeq *obq, const struct lowdown_node *n, + struct bnodeq *bq, const struct rndr_listitem *param) +{ + struct bnode *bn; + const char *box; + + if (param->flags & HLIST_FL_ORDERED) { + if ((bn = bqueue_block(obq, ".IP")) == NULL) + return 0; + if (asprintf(&bn->nargs, + "\"%zu. \"", param->num) == -1) + return 0; + } else if (param->flags & HLIST_FL_UNORDERED) { + if (param->flags & HLIST_FL_CHECKED) + box = "[u2611]"; + else if (param->flags & HLIST_FL_UNCHECKED) + box = "[u2610]"; + else + box = "(bu"; + if ((bn = bqueue_block(obq, ".IP")) == NULL) + return 0; + if (asprintf(&bn->nargs, "\"\\%s\" 2", box) == -1) + return 0; + } + + /* Strip out all leading redundant paragraphs. */ + + bqueue_strip_paras(bq); + TAILQ_CONCAT(obq, bq, entries); + + /* + * Suppress trailing space if we're not in a block and there's a + * list item that comes after us (i.e., anything after us). + */ + + if ((n->rndr_listitem.flags & HLIST_FL_BLOCK) || + (n->rndr_listitem.flags & HLIST_FL_DEF)) + return 1; + + if (TAILQ_NEXT(n, entries) != NULL) { + if (bqueue_block(obq, ".if n \\\n.sp -1") == NULL) + return 0; + if (bqueue_block(obq, ".if t \\\n.sp -0.25v\n") == NULL) + return 0; + } + + return 1; +} + +static int +rndr_paragraph(struct nroff *st, const struct lowdown_node *n, + struct bnodeq *obq, struct bnodeq *nbq) +{ + struct bnode *bn; + + /* + * Subsequent paragraphs get a PP for the indentation; otherwise, use + * LP and forego the indentation. If we're in a list item, make sure + * that we don't reset our text indent by using an "IP". + */ + + for ( ; n != NULL; n = n->parent) + if (n->type == LOWDOWN_LISTITEM) + break; + if (n != NULL) + bn = bqueue_block(obq, ".IP"); + else if (st->post_para) + bn = bqueue_block(obq, ".LP"); + else + bn = bqueue_block(obq, ".PP"); + if (bn == NULL) + return 0; + + TAILQ_CONCAT(obq, nbq, entries); + st->post_para = 0; + return 1; +} + +static int +rndr_raw_block(const struct nroff *st, + struct bnodeq *obq, const struct rndr_blockhtml *param) +{ + struct bnode *bn; + + if (st->flags & LOWDOWN_NROFF_SKIP_HTML) + return 1; + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return 0; + TAILQ_INSERT_TAIL(obq, bn, entries); + bn->scope = BSCOPE_LITERAL; + bn->buf = strndup(param->text.data, param->text.size); + return bn->buf != NULL; +} + +static int +rndr_hrule(struct nroff *st, struct bnodeq *obq) +{ + + /* The LP is to reset the margins. */ + + if (bqueue_block(obq, ".LP") == NULL) + return 0; + + /* Set post_para so we get a following LP not PP. */ + + st->post_para = 1; + + if (st->man) + return bqueue_block(obq, "\\l\'2i'") != NULL; + + return bqueue_block(obq, + ".ie d HR \\{\\\n" + ".HR\n" + "\\}\n" + ".el \\{\\\n" + ".sp 1v\n" + "\\l'\\n(.lu'\n" + ".sp 1v\n" + ".\\}") != NULL; +} + +static int +rndr_image(struct nroff *st, struct bnodeq *obq, + const struct rndr_image *param) +{ + const char *cp; + size_t sz; + struct bnode *bn; + + if (!st->man) { + cp = memrchr(param->link.data, '.', param->link.size); + if (cp != NULL) { + cp++; + sz = param->link.size - (cp - param->link.data); + if ((sz == 2 && memcmp(cp, "ps", 2) == 0) || + (sz == 3 && memcmp(cp, "eps", 3) == 0)) { + bn = bqueue_block(obq, ".PSPIC"); + if (bn == NULL) + return 0; + bn->args = strndup(param->link.data, + param->link.size); + return bn->args != NULL; + } + } + } + + /* In -Tman, we have no images: treat as a link. */ + + st->fonts[NFONT_BOLD]++; + if (!bqueue_font(st, obq, 0)) + return 0; + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + bn->buf = strndup(param->alt.data, param->alt.size); + if (bn->buf == NULL) + return 0; + st->fonts[NFONT_BOLD]--; + if (!bqueue_font(st, obq, 1)) + return 0; + if (st->flags & LOWDOWN_NROFF_NOLINK) + return bqueue_span(obq, " (Image)") != NULL; + if (bqueue_span(obq, " (Image: ") == NULL) + return 0; + st->fonts[NFONT_ITALIC]++; + if (!bqueue_font(st, obq, 0)) + return 0; + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + if (st->flags & LOWDOWN_NROFF_SHORTLINK) { + bn->nbuf = hbuf2shortlink(¶m->link); + if (bn->nbuf == NULL) + return 0; + } else { + bn->buf = strndup(param->link.data, param->link.size); + if (bn->buf == NULL) + return 0; + } + st->fonts[NFONT_ITALIC]--; + if (!bqueue_font(st, obq, 1)) + return 0; + return bqueue_span(obq, ")") != NULL; +} + +static int +rndr_raw_html(const struct nroff *st, + struct bnodeq *obq, const struct rndr_raw_html *param) +{ + struct bnode *bn; + + if (st->flags & LOWDOWN_NROFF_SKIP_HTML) + return 1; + if ((bn = calloc(1, sizeof(struct bnode))) == NULL) + return 0; + TAILQ_INSERT_TAIL(obq, bn, entries); + bn->scope = BSCOPE_LITERAL; + bn->buf = strndup(param->text.data, param->text.size); + return bn->buf != NULL; +} + +static int +rndr_table(struct nroff *st, struct bnodeq *obq, struct bnodeq *bq) +{ + const char *macro; + + macro = st->man || !(st->flags & LOWDOWN_NROFF_GROFF) ? + ".TS" : ".TS H"; + if (bqueue_block(obq, macro) == NULL) + return 0; + if (bqueue_block(obq, "tab(|) expand allbox;") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + st->post_para = 1; + return bqueue_block(obq, ".TE") != NULL; +} + +static int +rndr_table_header(const struct nroff *st, struct bnodeq *obq, + struct bnodeq *bq, const struct rndr_table_header *param) +{ + size_t i; + struct lowdown_buf *ob; + struct bnode *bn; + int rc = 0; + + if ((ob = hbuf_new(32)) == NULL) + return 0; + + /* + * This specifies the header layout. + * We make the header bold, but this is arbitrary. + */ + + if ((bn = bqueue_block(obq, NULL)) == NULL) + goto out; + for (i = 0; i < param->columns; i++) { + if (i > 0 && !HBUF_PUTSL(ob, " ")) + goto out; + switch (param->flags[i] & HTBL_FL_ALIGNMASK) { + case HTBL_FL_ALIGN_CENTER: + if (!HBUF_PUTSL(ob, "cb")) + goto out; + break; + case HTBL_FL_ALIGN_RIGHT: + if (!HBUF_PUTSL(ob, "rb")) + goto out; + break; + default: + if (!HBUF_PUTSL(ob, "lb")) + goto out; + break; + } + } + if ((bn->nbuf = strndup(ob->data, ob->size)) == NULL) + goto out; + + /* Now the body layout. */ + + hbuf_truncate(ob); + if ((bn = bqueue_block(obq, NULL)) == NULL) + goto out; + for (i = 0; i < param->columns; i++) { + if (i > 0 && !HBUF_PUTSL(ob, " ")) + goto out; + switch (param->flags[i] & HTBL_FL_ALIGNMASK) { + case HTBL_FL_ALIGN_CENTER: + if (!HBUF_PUTSL(ob, "c")) + goto out; + break; + case HTBL_FL_ALIGN_RIGHT: + if (!HBUF_PUTSL(ob, "r")) + goto out; + break; + default: + if (!HBUF_PUTSL(ob, "l")) + goto out; + break; + } + } + if (!hbuf_putc(ob, '.')) + goto out; + if ((bn->nbuf = strndup(ob->data, ob->size)) == NULL) + goto out; + + TAILQ_CONCAT(obq, bq, entries); + + if (!st->man && (st->flags & LOWDOWN_NROFF_GROFF) && + bqueue_block(obq, ".TH") == NULL) + goto out; + + rc = 1; +out: + hbuf_free(ob); + return rc; +} + +static int +rndr_table_row(struct bnodeq *obq, struct bnodeq *bq) +{ + + TAILQ_CONCAT(obq, bq, entries); + return bqueue_block(obq, NULL) != NULL; +} + +static int +rndr_table_cell(struct bnodeq *obq, struct bnodeq *bq, + const struct rndr_table_cell *param) +{ + struct bnode *bn; + + if (param->col > 0 && bqueue_span(obq, "|") == NULL) + return 0; + if (bqueue_span(obq, "T{\n") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + if ((bn = bqueue_span(obq, "T}")) == NULL) + return 0; + bn->tblhack = 1; + return 1; +} + +static int +rndr_superscript(struct bnodeq *obq, struct bnodeq *bq) +{ + + if (bqueue_span(obq, "\\u\\s-3") == NULL) + return 0; + TAILQ_CONCAT(obq, bq, entries); + return bqueue_span(obq, "\\s+3\\d") != NULL; +} + +static int +rndr_footnote_def(const struct nroff *st, struct bnodeq *obq, + struct bnodeq *bq, size_t num) +{ + struct bnode *bn; + + /* + * Use groff_ms(7)-style footnotes. + * We know that the definitions are delivered in the same order + * as the footnotes are made, so we can use the automatic + * ordering facilities. + */ + + if (!st->man) { + if (bqueue_block(obq, ".FS") == NULL) + return 0; + bqueue_strip_paras(bq); + TAILQ_CONCAT(obq, bq, entries); + return bqueue_block(obq, ".FE") != NULL; + } + + /* + * For man(7), just print as normal, with a leading footnote + * number in italics and superscripted. + */ + + if (bqueue_block(obq, ".LP") == NULL) + return 0; + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + if (asprintf(&bn->nbuf, + "\\0\\fI\\u\\s-3%zu\\s+3\\d\\fP\\0", num) == -1) { + bn->nbuf = NULL; + return 0; + } + bqueue_strip_paras(bq); + TAILQ_CONCAT(obq, bq, entries); + return 1; +} + +static int +rndr_footnotes(const struct nroff *st, struct bnodeq *obq) +{ + size_t i; + + if (st->footsz == 0) + return 1; + + if (st->man) { + if (bqueue_block(obq, ".LP") == NULL) + return 0; + if (bqueue_block(obq, ".sp 3") == NULL) + return 0; + if (bqueue_block(obq, "\\l\'2i'") == NULL) + return 0; + } + + for (i = 0; i < st->footsz; i++) + if (!rndr_footnote_def(st, obq, st->foots[i], i + 1)) + return 0; + + return 1; +} + +static int +rndr_footnote_ref(struct nroff *st, struct bnodeq *obq, + struct bnodeq *bq) +{ + struct bnode *bn; + void *pp; + size_t num = st->footsz; + + /* + * Use groff_ms(7)-style automatic footnoting, else just put a + * reference number in small superscripts. + */ + + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + + if (!st->man) + bn->nbuf = strdup("\\**"); + else if (asprintf(&bn->nbuf, + "\\u\\s-3%zu\\s+3\\d", num + 1) == -1) + bn->nbuf = NULL; + + if (bn->nbuf == NULL) + return 0; + + /* + * For -Tman, queue the footnote for printing at the end of the + * document. For -Tms, emit it now in a FS/FE block. + */ + + if (st->man) { + pp = recallocarray(st->foots, st->footsz, + st->footsz + 1, sizeof(struct bnodeq *)); + if (pp == NULL) + return 0; + st->foots = pp; + st->foots[st->footsz++] = malloc(sizeof(struct bnodeq)); + if (st->foots[num] == NULL) + return 0; + TAILQ_INIT(st->foots[num]); + TAILQ_CONCAT(st->foots[num], bq, entries); + return 1; + } else { + if (bqueue_block(obq, ".FS") == NULL) + return 0; + bqueue_strip_paras(bq); + TAILQ_CONCAT(obq, bq, entries); + return bqueue_block(obq, ".FE") != NULL; + } +} + +static int +rndr_entity(const struct nroff *st, + struct bnodeq *obq, const struct rndr_entity *param) +{ + char buf[32]; + const char *ent; + struct bnode *bn; + int32_t iso; + size_t sz; + + /* + * Handle named entities if "ent" is non-NULL, use unicode + * escapes for values above 126, and just the regular character + * if within the ASCII set. + */ + + if ((ent = entity_find_nroff(¶m->text, &iso)) != NULL) { + sz = strlen(ent); + if (sz == 1) + snprintf(buf, sizeof(buf), "\\%s", ent); + else if (sz == 2) + snprintf(buf, sizeof(buf), "\\(%s", ent); + else + snprintf(buf, sizeof(buf), "\\[%s]", ent); + return bqueue_span(obq, buf) != NULL; + } else if (iso > 0 && iso > 126) { + if (st->flags & LOWDOWN_NROFF_GROFF) + snprintf(buf, sizeof(buf), "\\[u%.4llX]", + (unsigned long long)iso); + else + snprintf(buf, sizeof(buf), "\\U\'%.4llX\'", + (unsigned long long)iso); + return bqueue_span(obq, buf) != NULL; + } else if (iso > 0) { + snprintf(buf, sizeof(buf), "%c", iso); + return bqueue_span(obq, buf) != NULL; + } + + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + bn->buf = strndup(param->text.data, param->text.size); + return bn->buf != NULL; +} + +/* + * Split "b" at sequential white-space, outputting the results in the + * line-based "env" macro. The content in "b" has already been escaped. + */ +static int +rndr_meta_multi(struct bnodeq *obq, const char *b, const char *env) +{ + const char *start; + size_t sz, i, bsz; + struct bnode *bn; + char macro[32]; + + if (b == NULL) + return 1; + + assert(strlen(env) < sizeof(macro) - 1); + snprintf(macro, sizeof(macro), ".%s", env); + bsz = strlen(b); + + for (i = 0; i < bsz; i++) { + while (i < bsz && + isspace((unsigned char)b[i])) + i++; + if (i == bsz) + continue; + start = &b[i]; + + for (; i < bsz; i++) + if (i < bsz - 1 && + isspace((unsigned char)b[i]) && + isspace((unsigned char)b[i + 1])) + break; + if ((sz = &b[i] - start) == 0) + continue; + + if (bqueue_block(obq, macro) == NULL) + return 0; + if ((bn = bqueue_span(obq, NULL)) == NULL) + return 0; + if ((bn->nbuf = strndup(start, sz)) == NULL) + return 0; + } + + return 1; +} + +/* + * Fill "mq" by serialising child nodes into strings. The serialised + * strings are escaped. + */ +static int +rndr_meta(struct nroff *st, const struct bnodeq *bq, + struct lowdown_metaq *mq, const struct rndr_meta *params) +{ + struct lowdown_meta *m; + struct lowdown_buf *ob; + ssize_t val; + const char *ep; + + if ((m = calloc(1, sizeof(struct lowdown_meta))) == NULL) + return 0; + TAILQ_INSERT_TAIL(mq, m, entries); + + m->key = strndup(params->key.data, params->key.size); + if (m->key == NULL) + return 0; + + if ((ob = hbuf_new(32)) == NULL) + return 0; + if (!bqueue_flush(ob, bq, 1)) { + hbuf_free(ob); + return 0; + } + m->value = strndup(ob->data, ob->size); + hbuf_free(ob); + if (m->value == NULL) + return 0; + + if (strcmp(m->key, "shiftheadinglevelby") == 0) { + val = (ssize_t)strtonum + (m->value, -100, 100, &ep); + if (ep == NULL) + st->headers_offs = val + 1; + } else if (strcmp(m->key, "baseheaderlevel") == 0) { + val = (ssize_t)strtonum + (m->value, 1, 100, &ep); + if (ep == NULL) + st->headers_offs = val; + } + + return 1; +} + +static int +rndr_doc_header(const struct nroff *st, + struct bnodeq *obq, const struct lowdown_metaq *mq) +{ + struct lowdown_buf *ob = NULL; + struct bnode *bn; + const struct lowdown_meta *m; + int rc = 0; + const char *author = NULL, *title = NULL, + *affil = NULL, *date = NULL, + *copy = NULL, *sec = NULL, + *rcsauthor = NULL, *rcsdate = NULL, + *source = NULL, *volume = NULL; + + if (!(st->flags & LOWDOWN_STANDALONE)) + return 1; + + TAILQ_FOREACH(m, mq, entries) + if (strcasecmp(m->key, "author") == 0) + author = m->value; + else if (strcasecmp(m->key, "copyright") == 0) + copy = m->value; + else if (strcasecmp(m->key, "affiliation") == 0) + affil = m->value; + else if (strcasecmp(m->key, "date") == 0) + date = m->value; + else if (strcasecmp(m->key, "rcsauthor") == 0) + rcsauthor = rcsauthor2str(m->value); + else if (strcasecmp(m->key, "rcsdate") == 0) + rcsdate = rcsdate2str(m->value); + else if (strcasecmp(m->key, "title") == 0) + title = m->value; + else if (strcasecmp(m->key, "section") == 0) + sec = m->value; + else if (strcasecmp(m->key, "source") == 0) + source = m->value; + else if (strcasecmp(m->key, "volume") == 0) + volume = m->value; + + /* Overrides. */ + + if (title == NULL) + title = "Untitled article"; + if (sec == NULL) + sec = "7"; + if (rcsdate != NULL) + date = rcsdate; + if (rcsauthor != NULL) + author = rcsauthor; + + bn = bqueue_block(obq, + ".\\\" -*- mode: troff; coding: utf-8 -*-"); + if (bn == NULL) + goto out; + + if (!st->man) { + if (copy != NULL) { + bn = bqueue_block(obq, + ".ds LF Copyright \\(co"); + if (bn == NULL) + goto out; + if ((bn->nargs = strdup(copy)) == NULL) + goto out; + } + if (date != NULL) { + if (copy != NULL) + bn = bqueue_block(obq, ".ds RF"); + else + bn = bqueue_block(obq, ".DA"); + if (bn == NULL) + goto out; + if ((bn->nargs = strdup(date)) == NULL) + goto out; + } + + if (bqueue_block(obq, ".TL") == NULL) + goto out; + if ((bn = bqueue_span(obq, NULL)) == NULL) + goto out; + if ((bn->nbuf = strdup(title)) == NULL) + goto out; + if (!rndr_meta_multi(obq, author, "AU")) + goto out; + if (!rndr_meta_multi(obq, affil, "AI")) + goto out; + } else { + if ((ob = hbuf_new(32)) == NULL) + goto out; + + /* + * The syntax of this macro, according to man(7), is + * TH name section date [source [volume]]. + */ + + if ((bn = bqueue_block(obq, ".TH")) == NULL) + goto out; + + if (!hbuf_putc(ob, '"') || + !hesc_nroff(ob, title, strlen(title), 1, 0, 0) || + !HBUF_PUTSL(ob, "\" \"") || + !hesc_nroff(ob, sec, strlen(sec), 1, 0, 0) || + !hbuf_putc(ob, '"')) + goto out; + + /* + * We may not have a date (or it may be empty), in which + * case man(7) says the current date is used. + */ + + if (!HBUF_PUTSL(ob, " \"")) + goto out; + if (date != NULL && + !hesc_nroff(ob, date, strlen(date), 1, 0, 0)) + goto out; + if (!HBUF_PUTSL(ob, "\"")) + goto out; + + /* + * Don't print these unless necessary, as the latter + * overrides the default system printing for the + * section. + */ + + if (source != NULL || volume != NULL) { + if (!HBUF_PUTSL(ob, " \"")) + goto out; + if (source != NULL && !hesc_nroff + (ob, source, strlen(source), 1, 0, 0)) + goto out; + if (!HBUF_PUTSL(ob, "\"")) + goto out; + if (!HBUF_PUTSL(ob, " \"")) + goto out; + if (volume != NULL && !hesc_nroff + (ob, volume, strlen(volume), 1, 0, 0)) + goto out; + if (!HBUF_PUTSL(ob, "\"")) + goto out; + } + if ((bn->nargs = strndup(ob->data, ob->size)) == NULL) + goto out; + } + + rc = 1; +out: + hbuf_free(ob); + return rc; +} + +/* + * Actually render the node "n" and all of its children into the output + * buffer "ob", chopping "chop" from the current node if specified. + * Return what (if anything) we should chop from the next node or <0 on + * failure. + */ +static int +rndr(struct lowdown_metaq *mq, struct nroff *st, + const struct lowdown_node *n, struct bnodeq *obq) +{ + const struct lowdown_node *child; + int rc = 1; + enum nfont fonts[NFONT__MAX]; + struct bnodeq tmpbq; + struct bnode *bn; + + TAILQ_INIT(&tmpbq); + + if ((n->chng == LOWDOWN_CHNG_INSERT || + n->chng == LOWDOWN_CHNG_DELETE) && + !bqueue_colour(obq, n->chng, 0)) + goto out; + + /* + * Font management. + * roff doesn't handle its own font stack, so we can't set fonts + * and step out of them in a nested way. + */ + + memcpy(fonts, st->fonts, sizeof(fonts)); + + switch (n->type) { + case LOWDOWN_CODESPAN: + st->fonts[NFONT_FIXED]++; + if (!bqueue_font(st, obq, 0)) + goto out; + break; + case LOWDOWN_EMPHASIS: + st->fonts[NFONT_ITALIC]++; + if (!bqueue_font(st, obq, 0)) + goto out; + break; + case LOWDOWN_HIGHLIGHT: + case LOWDOWN_DOUBLE_EMPHASIS: + st->fonts[NFONT_BOLD]++; + if (!bqueue_font(st, obq, 0)) + goto out; + break; + case LOWDOWN_TRIPLE_EMPHASIS: + st->fonts[NFONT_ITALIC]++; + st->fonts[NFONT_BOLD]++; + if (!bqueue_font(st, obq, 0)) + goto out; + break; + default: + break; + } + + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(mq, st, child, &tmpbq)) + goto out; + + switch (n->type) { + case LOWDOWN_BLOCKCODE: + rc = rndr_blockcode(st, obq, &n->rndr_blockcode); + break; + case LOWDOWN_BLOCKQUOTE: + rc = rndr_blockquote(st, obq, &tmpbq); + break; + case LOWDOWN_DEFINITION: + rc = rndr_list(st, obq, n, &tmpbq); + break; + case LOWDOWN_DEFINITION_DATA: + rc = rndr_definition_data(obq, &tmpbq); + break; + case LOWDOWN_DEFINITION_TITLE: + rc = rndr_definition_title(obq, &tmpbq); + break; + case LOWDOWN_DOC_HEADER: + rc = rndr_doc_header(st, obq, mq); + break; + case LOWDOWN_META: + if (n->chng != LOWDOWN_CHNG_DELETE) + rc = rndr_meta(st, &tmpbq, mq, &n->rndr_meta); + break; + case LOWDOWN_HEADER: + rc = rndr_header(st, obq, &tmpbq, n); + break; + case LOWDOWN_HRULE: + rc = rndr_hrule(st, obq); + break; + case LOWDOWN_LIST: + rc = rndr_list(st, obq, n, &tmpbq); + break; + case LOWDOWN_LISTITEM: + rc = rndr_listitem(obq, n, &tmpbq, &n->rndr_listitem); + break; + case LOWDOWN_PARAGRAPH: + rc = rndr_paragraph(st, n, obq, &tmpbq); + break; + case LOWDOWN_TABLE_BLOCK: + rc = rndr_table(st, obq, &tmpbq); + break; + case LOWDOWN_TABLE_HEADER: + rc = rndr_table_header(st, + obq, &tmpbq, &n->rndr_table_header); + break; + case LOWDOWN_TABLE_ROW: + rc = rndr_table_row(obq, &tmpbq); + break; + case LOWDOWN_TABLE_CELL: + rc = rndr_table_cell(obq, &tmpbq, &n->rndr_table_cell); + break; + case LOWDOWN_ROOT: + TAILQ_CONCAT(obq, &tmpbq, entries); + rc = rndr_footnotes(st, obq); + break; + case LOWDOWN_BLOCKHTML: + rc = rndr_raw_block(st, obq, &n->rndr_blockhtml); + break; + case LOWDOWN_LINK_AUTO: + rc = rndr_autolink(st, obq, &n->rndr_autolink); + break; + case LOWDOWN_CODESPAN: + rc = rndr_codespan(obq, &n->rndr_codespan); + break; + case LOWDOWN_IMAGE: + rc = rndr_image(st, obq, &n->rndr_image); + break; + case LOWDOWN_LINEBREAK: + rc = rndr_linebreak(obq); + break; + case LOWDOWN_LINK: + rc = rndr_link(st, obq, &tmpbq, &n->rndr_link); + break; + case LOWDOWN_SUPERSCRIPT: + rc = rndr_superscript(obq, &tmpbq); + break; + case LOWDOWN_FOOTNOTE: + rc = rndr_footnote_ref(st, obq, &tmpbq); + break; + case LOWDOWN_RAW_HTML: + rc = rndr_raw_html(st, obq, &n->rndr_raw_html); + break; + case LOWDOWN_NORMAL_TEXT: + if ((bn = bqueue_span(obq, NULL)) == NULL) + goto out; + bn->buf = strndup + (n->rndr_normal_text.text.data, + n->rndr_normal_text.text.size); + if (bn->buf == NULL) + goto out; + break; + case LOWDOWN_ENTITY: + rc = rndr_entity(st, obq, &n->rndr_entity); + break; + default: + TAILQ_CONCAT(obq, &tmpbq, entries); + break; + } + + if (!rc) + goto out; + + /* Restore the font stack. */ + + switch (n->type) { + case LOWDOWN_CODESPAN: + case LOWDOWN_EMPHASIS: + case LOWDOWN_HIGHLIGHT: + case LOWDOWN_DOUBLE_EMPHASIS: + case LOWDOWN_TRIPLE_EMPHASIS: + memcpy(st->fonts, fonts, sizeof(fonts)); + if (!bqueue_font(st, obq, 1)) { + rc = 0; + goto out; + } + break; + default: + break; + } + + if ((n->chng == LOWDOWN_CHNG_INSERT || + n->chng == LOWDOWN_CHNG_DELETE) && + !bqueue_colour(obq, n->chng, 1)) { + rc = 0; + goto out; + } + + rc = 1; +out: + bqueue_free(&tmpbq); + return rc; +} + +int +lowdown_nroff_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct nroff *st = arg; + struct lowdown_metaq metaq; + int rc = 0; + struct bnodeq bq; + size_t i; + + TAILQ_INIT(&metaq); + TAILQ_INIT(&bq); + TAILQ_INIT(&st->headers_used); + + memset(st->fonts, 0, sizeof(st->fonts)); + st->headers_offs = 1; + st->post_para = 0; + + if (rndr(&metaq, st, n, &bq)) { + if (!bqueue_flush(ob, &bq, 1)) + goto out; + if (ob->size && ob->data[ob->size - 1] != '\n' && + !hbuf_putc(ob, '\n')) + goto out; + rc = 1; + } + +out: + for (i = 0; i < st->footsz; i++) { + bqueue_free(st->foots[i]); + free(st->foots[i]); + } + + free(st->foots); + st->footsz = 0; + st->foots = NULL; + lowdown_metaq_free(&metaq); + bqueue_free(&bq); + hentryq_clear(&st->headers_used); + return rc; +} + +void * +lowdown_nroff_new(const struct lowdown_opts *opts) +{ + struct nroff *p; + + if ((p = calloc(1, sizeof(struct nroff))) == NULL) + return NULL; + + p->flags = opts != NULL ? opts->oflags : 0; + p->man = opts != NULL && opts->type == LOWDOWN_MAN; + return p; +} + +void +lowdown_nroff_free(void *arg) +{ + + /* No need to check NULL: pass directly to free(). */ + + free(arg); +} diff --git a/nroff.o b/nroff.o Binary files differ. diff --git a/odt.c b/odt.c @@ -0,0 +1,2246 @@ +/* + * Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * Maximum length of any style. This should account for fixed prefix + * text (e.g., "Frame" at longest) then an incrementing size_t. + */ +#define STYLE_NAME_LEN 32 + +/* + * Default size for a blockquote (paragraph indent). + */ +static const float TAB_LEN = 1.25; + +/* + * Default size for a list indent. Lists are first indented by the + * number of tabs (starting at zero), then giving a full list indent, + * then each sub-list gets half again this. + */ +static const float LIST_LEN = 1.27; + +/* + * A style in <office-automatic-styles>. + */ +struct odt_sty { + char name[STYLE_NAME_LEN]; /* name */ + size_t offs; /* offset ("tabs") from zero */ + size_t parent; /* parent or (size_t)-1*/ + enum lowdown_rndrt type; /* specific type of style */ + int foot; /* in a footnote */ + int fmt; /* general type of style */ +#define ODT_STY_TEXT 1 /* text (inline) */ +#define ODT_STY_PARA 2 /* paragraph */ +#define ODT_STY_UL 3 /* unordered list */ +#define ODT_STY_OL 4 /* ordered list */ +#define ODT_STY_H1 5 /* h1 heading */ +#define ODT_STY_H2 6 /* h2 heading */ +#define ODT_STY_H3 7 /* h3 heading */ +#define ODT_STY_TBL 8 /* table */ +#define ODT_STY_TBL_PARA 9 /* table contents */ +#define ODT_STY_LIT 10 /* literal */ +}; + +/* + * A change. I'm not sure we'll need anything but "ins", so this could + * just be an array of int, but whatever. + */ +struct odt_chng { + int ins; /* inserted vs deleted */ +}; + +/* + * Our internal state object. Beyond retaining our flags, this also + * keeps output state in terms of the styles that need printing. + */ +struct odt { + struct hentryq headers_used; /* headers we've seen */ + ssize_t headers_offs; /* header offset */ + unsigned int flags; /* "oflags" in lowdown_opts */ + struct odt_sty *stys; /* styles for content */ + size_t stysz; /* number of styles */ + size_t sty_T; /* "T" styles */ + size_t sty_Table; /* "Table" styles */ + size_t sty_L; /* "L" styles */ + size_t sty_P; /* "P" styles */ + size_t offs; /* offs or (size_t)-1 in list */ + size_t list; /* root list style or (size_t)-1 */ + int foot; /* in footnote or not */ + size_t footcount; /* footnote */ + struct odt_chng *chngs; /* changes in content */ + size_t chngsz; /* number of changes */ + char *sty; /* external styles or NULL */ +}; + +static int rndr(struct lowdown_buf *, + struct lowdown_metaq *, void *, const struct lowdown_node *); + +/* + * Append a new zeroed style with an unset parent. Return NULL on + * memory failure or the new style. + */ +static struct odt_sty * +odt_style_add(struct odt *st) +{ + void *pp; + + pp = reallocarray(st->stys, + st->stysz + 1, sizeof(struct odt_sty)); + if (pp == NULL) + return NULL; + st->stys = pp; + memset(&st->stys[st->stysz], 0, sizeof(struct odt_sty)); + st->stys[st->stysz].parent = (size_t)-1; + return &st->stys[st->stysz++]; +} + +/* + * Create or fetch an inline style corresponding to the node type. + * Return NULL on error or the style name on success. + */ +static const char * +odt_style_add_text(struct odt *st, enum lowdown_rndrt type) +{ + size_t i; + struct odt_sty *s; + + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == type) { + assert(st->stys[i].fmt == ODT_STY_TEXT); + return st->stys[i].name; + } + + if ((s = odt_style_add(st)) == NULL) + return NULL; + + s->fmt = ODT_STY_TEXT; + s->type = type; + snprintf(s->name, sizeof(s->name), "T%zu", st->sty_T++); + return s->name; +} + +/* + * Flush out all of the styles and automatic styles. Return FALSE on + * failure, TRUE on success. + */ +static int +odt_sty_flush(struct lowdown_buf *ob, + const struct odt *st, const struct odt_sty *sty) +{ + size_t i; + + /* + * Lists and non-lists have a different XML element name, and + * non-lists designate whether in-line or paragraphs. + */ + + if (sty->type == LOWDOWN_LIST && + !HBUF_PUTSL(ob, "<text:list-style")) + return 0; + if (sty->type != LOWDOWN_LIST && + !HBUF_PUTSL(ob, "<style:style")) + return 0; + + switch (sty->fmt) { + case ODT_STY_TEXT: + if (!HBUF_PUTSL(ob, " style:family=\"text\"")) + return 0; + break; + case ODT_STY_TBL_PARA: + case ODT_STY_PARA: + case ODT_STY_LIT: + case ODT_STY_H1: + case ODT_STY_H2: + case ODT_STY_H3: + if (!HBUF_PUTSL(ob, " style:family=\"paragraph\"")) + return 0; + break; + case ODT_STY_TBL: + if (!HBUF_PUTSL(ob, " style:family=\"table\"")) + return 0; + break; + } + + if (!hbuf_printf(ob, " style:name=\"%s\"", sty->name)) + return 0; + + /* + * Paragraphs in lists need to link to the list, then set some + * other crap found in libreoffice output. + */ + + switch (sty->fmt) { + case ODT_STY_LIT: + if (!HBUF_PUTSL(ob, + " style:parent-style-name=\"Preformatted_20_Text\"")) + return 0; + break; + case ODT_STY_PARA: + if (!sty->foot && !HBUF_PUTSL(ob, + " style:parent-style-name=\"Standard\"")) + return 0; + if (sty->foot && !HBUF_PUTSL(ob, + " style:parent-style-name=\"Footnote\"")) + return 0; + if (sty->parent != (size_t)-1 && !hbuf_printf(ob, + " style:list-style-name=\"%s\"", + st->stys[sty->parent].name)) + return 0; + break; + case ODT_STY_TBL_PARA: + if (sty->foot && !HBUF_PUTSL(ob, + " style:parent-style-name=\"Footnote\"")) + return 0; + if (!sty->foot && !HBUF_PUTSL(ob, + " style:parent-style-name=\"Table_20_Contents\"")) + return 0; + break; + case ODT_STY_H1: + if (!HBUF_PUTSL(ob, + " style:parent-style-name=\"Heading_20_1\"")) + return 0; + break; + case ODT_STY_H2: + if (!HBUF_PUTSL(ob, + " style:parent-style-name=\"Heading_20_2\"")) + return 0; + break; + case ODT_STY_H3: + if (!HBUF_PUTSL(ob, + " style:parent-style-name=\"Heading_20_3\"")) + return 0; + break; + default: + break; + } + + if (!HBUF_PUTSL(ob, ">\n")) + return 0; + + /* + * I'm not sure what in this is necessary and what isn't yet. + * The template followed is from libreoffice output. + */ + + switch (sty->type) { + case LOWDOWN_TABLE_BLOCK: + if (!hbuf_printf(ob, + "<style:table-properties" + " fo:margin-left=\"%.3fcm\"" + " fo:margin-right=\"0cm\"" + " table:align=\"margins\"/>\n", + sty->offs * TAB_LEN)) + return 0; + break; + case LOWDOWN_HEADER: + break; + case LOWDOWN_PARAGRAPH: + if (sty->offs == 0) + break; + if (!hbuf_printf(ob, + "<style:paragraph-properties" + " fo:margin-left=\"%.3fcm\"" + " fo:margin-right=\"0cm\"" + " fo:text-indent=\"0cm\"" + " style:auto-text-indent=\"false\"/>\n", + sty->offs * TAB_LEN)) + return 0; + break; + case LOWDOWN_LIST: + for (i = 0; i < 10; i++) { + if (sty->fmt == ODT_STY_OL && !hbuf_printf(ob, + "<text:list-level-style-number" + " text:level=\"%zu\"" + " text:style-name=\"Numbering_20_Symbols\"" + " style:num-suffix=\".\"" + " style:num-format=\"1\">\n" + "<style:list-level-properties" + " text:list-level-position-and-space-mode=" + "\"label-alignment\">\n" + "<style:list-level-label-alignment" + " text:label-followed-by=\"listtab\"" + " text:list-tab-stop-position=\"%.3fcm\"" + " fo:text-indent=\"-0.635cm\"" + " fo:margin-left=\"%.3fcm\"/>\n" + "</style:list-level-properties>\n" + "</text:list-level-style-number>\n", + i + 1, + (TAB_LEN * sty->offs) + LIST_LEN + + ((LIST_LEN / 2.0) * i), + (TAB_LEN * sty->offs) + LIST_LEN + + ((LIST_LEN / 2.0) * i))) + return 0; + if (sty->fmt == ODT_STY_UL && !hbuf_printf(ob, + "<text:list-level-style-bullet" + " text:level=\"%zu\"" + " text:style-name=\"Bullet_20_Symbols\"" + " text:bullet-char=\"•\">\n" + "<style:list-level-properties" + " text:list-level-position-and-space-mode=" + "\"label-alignment\">\n" + "<style:list-level-label-alignment" + " text:label-followed-by=\"listtab\"" + " text:list-tab-stop-position=\"%.3fcm\"" + " fo:text-indent=\"-0.635cm\"" + " fo:margin-left=\"%.3fcm\"/>\n" + "</style:list-level-properties>\n" + "</text:list-level-style-bullet>\n", + i + 1, + (TAB_LEN * sty->offs) + LIST_LEN + + ((LIST_LEN / 2.0) * i), + (TAB_LEN * sty->offs) + LIST_LEN + + ((LIST_LEN / 2.0) * i))) + return 0; + } + break; + case LOWDOWN_SUPERSCRIPT: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " style:text-position=\"super 58%\"/>\n")) + return 0; + break; + case LOWDOWN_TRIPLE_EMPHASIS: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " fo:font-style=\"italic\"" + " style:font-style-asian=\"italic\"" + " style:font-style-complex=\"italic\"" + " fo:font-weight=\"bold\"" + " style:font-weight-asian=\"bold\"" + " style:font-weight-complex=\"bold\"/>\n")) + return 0; + break; + case LOWDOWN_DOUBLE_EMPHASIS: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " fo:font-weight=\"bold\"" + " style:font-weight-asian=\"bold\"" + " style:font-weight-complex=\"bold\"/>\n")) + return 0; + break; + case LOWDOWN_EMPHASIS: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " fo:font-style=\"italic\"" + " style:font-style-asian=\"italic\"" + " style:font-style-complex=\"italic\"/>\n")) + return 0; + break; + case LOWDOWN_STRIKETHROUGH: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " style:text-line-through-style=\"solid\"" + " style:text-line-through-type=\"single\"/>\n")) + return 0; + break; + case LOWDOWN_HIGHLIGHT: + if (!HBUF_PUTSL(ob, + "<style:text-properties" + " style:text-underline-style=\"solid\"" + " style:text-underline-color=\"font-color\"" + " style:text-underline-width=\"auto\"/>\n")) + return 0; + break; + default: + abort(); + /* NOTREACHED */ + } + + if (sty->type == LOWDOWN_LIST && + !HBUF_PUTSL(ob, "</text:list-style>\n")) + return 0; + if (sty->type != LOWDOWN_LIST && + !HBUF_PUTSL(ob, "</style:style>\n")) + return 0; + + return 1; +} + +/* + * Flush out the "fixed" styles we need for standalone mode. + * XXX: it's possible to put a lot of this into a separate file, + * somehow, but that's a matter for the future. Return FALSE on + * failure, TRUE on success. + */ +static int +odt_styles_flush_fixed(struct lowdown_buf *ob, const struct odt *st) +{ + + if (st->sty != NULL) + return hbuf_puts(ob, st->sty); + + if (!HBUF_PUTSL(ob, + "<office:font-face-decls>\n" + "<style:font-face style:name=\"OpenSymbol\"" + " svg:font-family=\"OpenSymbol\"" + " style:font-charset=\"x-symbol\"/>\n" + "<style:font-face style:name=\"Liberation Mono\"" + " svg:font-family=\"'Liberation Mono'\"" + " style:font-family-generic=\"modern\"" + " style:font-pitch=\"fixed\"/>\n" + "<style:font-face style:name=\"Liberation Serif\"" + " svg:font-family=\"'Liberation Serif'\"" + " style:font-family-generic=\"roman\"" + " style:font-pitch=\"variable\"/>\n" + "<style:font-face style:name=\"Liberation Sans\"" + " svg:font-family=\"'Liberation Sans'\"" + " style:font-family-generic=\"swiss\"" + " style:font-pitch=\"variable\"/>\n" + "</office:font-face-decls>\n")) + return 0; + + /* + * This doesn't appear to make a difference if it's specified or + * not, but I'm adding it because libreoffice does. + */ + + if (!HBUF_PUTSL(ob, + "<office:scripts>\n" + " <office:script script:language=\"ooo:Basic\">\n" + " <ooo:libraries xmlns:ooo=\"http://openoffice.org/2004/office\"" + " xmlns:xlink=\"http://www.w3.org/1999/xlink\"/>\n" + " </office:script>\n" + "</office:scripts>\n")) + return 0; + + if (!HBUF_PUTSL(ob, "<office:styles>\n")) + return 0; + + /* Baseline. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Standard\"" + " style:family=\"paragraph\"" + " style:class=\"text\"/>\n")) + return 0; + + /* Text within block. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Text_20_body\"" + " style:display-name=\"Text body\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Standard\"" + " style:class=\"text\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0cm\"" + " fo:margin-bottom=\"0.247cm\"" + " style:contextual-spacing=\"false\"" + " fo:line-height=\"115%\"/>\n" + "</style:style>\n")) + return 0; + + /* Horizontal line. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:family=\"paragraph\"" + " style:name=\"Horizontal_20_Line\"" + " style:parent-style-name=\"Standard\"" + " style:display-name=\"Horizontal Line\"" + " style:next-style-name=\"Text_20_body\"" + " style:class=\"html\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0cm\"" + " fo:margin-bottom=\"0.499cm\"" + " style:contextual-spacing=\"false\"" + " style:border-line-width-bottom=\"0.002cm 0.004cm 0.002cm\"" + " fo:padding=\"0cm\"" + " fo:border-left=\"none\"" + " fo:border-right=\"none\"" + " fo:border-top=\"none\"" + " fo:border-bottom=\"0.14pt double #808080\"" + " text:number-lines=\"false\"" + " text:line-number=\"0\"" + " style:join-border=\"false\"/>\n" + "<style:text-properties" + " fo:font-size=\"6pt\"" + " style:font-size-asian=\"6pt\"" + " style:font-size-complex=\"6pt\"/>\n" + "</style:style>\n")) + return 0; + + /* Images. */ + + if (!HBUF_PUTSL(ob, + "<style:style style:name=\"Graphics\" style:family=\"graphic\">\n" + "<style:graphic-properties" + " text:anchor-type=\"paragraph\"" + " svg:x=\"0cm\" svg:y=\"0cm\"" + " style:wrap=\"dynamic\"" + " style:number-wrapped-paragraphs=\"no-limit\"" + " style:wrap-contour=\"false\"" + " draw:auto-grow-height=\"true\"" + " draw:auto-grow-width=\"true\"" + " style:vertical-pos=\"top\"" + " style:vertical-rel=\"paragraph\"" + " style:horizontal-pos=\"center\"" + " style:horizontal-rel=\"paragraph\"/>" + "</style:style>")) + return 0; + + /* Internet link. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:family=\"text\"" + " style:name=\"Internet_20_Link\"" + " style:display-name=\"Internet Link\">\n" + "<style:text-properties" + " fo:color=\"#000080\"" + " loext:opacity=\"100%\"" + " fo:language=\"zxx\"" + " fo:country=\"none\"" + " style:language-asian=\"zxx\"" + " style:country-asian=\"none\"" + " style:language-complex=\"zxx\"" + " style:country-complex=\"none\"" + " style:text-underline-style=\"solid\"" + " style:text-underline-color=\"font-color\"" + " style:text-underline-width=\"auto\"/>\n" + "</style:style>\n")) + return 0; + + /* Source (preformatted) code. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:family=\"text\"" + " style:name=\"Source_20_Text\"" + " style:display-name=\"Source Text\">\n" + "<style:text-properties" + " style:font-name=\"Liberation Mono\"" + " fo:font-family=\"'Liberation Mono'\"" + " style:font-family-generic=\"modern\"" + " style:font-pitch=\"fixed\"" + " style:font-name-asian=\"Liberation Mono\"" + " style:font-family-asian=" + "\"'Liberation Mono'\"" + " style:font-family-generic-asian=\"modern\"" + " style:font-pitch-asian=\"fixed\"" + " style:font-name-complex=\"Liberation Mono\"" + " style:font-family-complex=" + "\"'Liberation Mono'\"" + " style:font-family-generic-complex=\"modern\"" + " style:font-pitch-complex=\"fixed\"/>\n" + "</style:style>\n")) + return 0; + + /* Frame (tables). */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Frame\"" + " style:family=\"graphic\">\n" + "<style:graphic-properties" + " text:anchor-type=\"as-char\"" + " svg:x=\"0cm\"" + " svg:y=\"0cm\"" + " fo:margin-left=\"0cm\"" + " fo:margin-right=\"0cm\"" + " fo:margin-top=\"0.201cm\"" + " fo:margin-bottom=\"0.201cm\"" + " style:wrap=\"parallel\"" + " style:number-wrapped-paragraphs=\"no-limit\"" + " style:wrap-contour=\"false\"" + " style:vertical-pos=\"top\"" + " style:vertical-rel=\"paragraph-content\"" + " style:horizontal-pos=\"center\"" + " style:horizontal-rel=\"paragraph-content\"" + " fo:padding=\"0cm\"" + " fo:border=\"0pt solid #000000\"/>\n" + "</style:style>\n")) + return 0; + + /* Preformatted text. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Preformatted_20_Text\"" + " style:display-name=\"Preformatted Text\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Standard\"" + " style:class=\"html\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0cm\"" + " fo:margin-bottom=\"0cm\"" + " style:contextual-spacing=\"false\"/>\n" + "<style:text-properties" + " style:font-name=\"Liberation Mono\"" + " fo:font-family=\"'Liberation Mono'\"" + " style:font-family-generic=\"modern\"" + " style:font-pitch=\"fixed\"" + " fo:font-size=\"10pt\"" + " style:font-name-asian=\"Liberation Mono\"" + " style:font-family-asian=\"'Liberation Mono'\"" + " style:font-family-generic-asian=\"modern\"" + " style:font-pitch-asian=\"fixed\"" + " style:font-size-asian=\"10pt\"" + " style:font-name-complex=\"Liberation Mono\"" + " style:font-family-complex=\"'Liberation Mono'\"" + " style:font-family-generic-complex=\"modern\"" + " style:font-pitch-complex=\"fixed\"" + " style:font-size-complex=\"10pt\"/>\n" + "</style:style>\n")) + return 0; + + /* Table contents. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Table_20_Contents\"" + " style:display-name=\"Table Contents\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Standard\"" + " style:class=\"extra\">\n" + "<style:paragraph-properties" + " fo:orphans=\"0\"" + " fo:widows=\"0\"" + " text:number-lines=\"false\"" + " text:line-number=\"0\"/>\n" + "</style:style>\n")) + return 0; + + /* Headings. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Heading\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Standard\"" + " style:class=\"text\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0.423cm\"" + " fo:margin-bottom=\"0.212cm\"" + " style:contextual-spacing=\"false\"" + " fo:keep-with-next=\"always\"/>\n" + "<style:text-properties" + " style:font-name=\"Liberation Sans\"" + " fo:font-family=\"'Liberation Sans'\"" + " style:font-family-generic=\"swiss\"" + " style:font-pitch=\"variable\"" + " fo:font-size=\"14pt\"" + " style:font-name-asian=\"Liberation Sans\"" + " style:font-family-asian=\"'Liberation Sans'\"" + " style:font-family-generic-asian=\"system\"" + " style:font-pitch-asian=\"variable\"" + " style:font-size-asian=\"14pt\"" + " style:font-name-complex=\"Liberation Sans\"" + " style:font-family-complex=\"'Liberation Sans'\"" + " style:font-family-generic-complex=\"system\"" + " style:font-pitch-complex=\"variable\"" + " style:font-size-complex=\"14pt\"/>\n" + "</style:style>\n")) + return 0; + + /* Unordered list. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Bullet_20_Symbols\"" + " style:display-name=\"Bullet Symbols\"" + " style:family=\"text\">\n" + "<style:text-properties" + " style:font-name=\"OpenSymbol\"" + " fo:font-family=\"OpenSymbol\"" + " style:font-charset=\"x-symbol\"" + " style:font-name-asian=\"OpenSymbol\"" + " style:font-family-asian=\"OpenSymbol\"" + " style:font-charset-asian=\"x-symbol\"" + " style:font-name-complex=\"OpenSymbol\"" + " style:font-family-complex=\"OpenSymbol\"" + " style:font-charset-complex=\"x-symbol\"/>\n" + "</style:style>\n")) + return 0; + + /* Ordered list. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Numbering_20_Symbols\"" + " style:display-name=\"Numbering Symbols\"" + " style:family=\"text\"/>\n")) + return 0; + + /* Headers. */ + + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Heading_20_1\"" + " style:display-name=\"Heading 1\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Heading\"" + " style:next-style-name=\"Text_20_body\"" + " style:default-outline-level=\"1\"" + " style:class=\"text\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0.423cm\"" + " fo:margin-bottom=\"0.212cm\"" + " style:contextual-spacing=\"false\"/>\n" + "<style:text-properties" + " fo:font-size=\"130%\"" + " fo:font-weight=\"bold\"" + " style:font-size-asian=\"130%\"" + " style:font-weight-asian=\"bold\"" + " style:font-size-complex=\"130%\"" + " style:font-weight-complex=\"bold\"/>\n" + "</style:style>\n")) + return 0; + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Heading_20_2\"" + " style:display-name=\"Heading 2\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Heading\"" + " style:next-style-name=\"Text_20_body\"" + " style:default-outline-level=\"2\"" + " style:class=\"text\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0.353cm\"" + " fo:margin-bottom=\"0.212cm\"" + " style:contextual-spacing=\"false\"/>\n" + "<style:text-properties" + " fo:font-size=\"115%\"" + " fo:font-weight=\"bold\"" + " style:font-size-asian=\"115%\"" + " style:font-weight-asian=\"bold\"" + " style:font-size-complex=\"115%\"" + " style:font-weight-complex=\"bold\"/>\n" + "</style:style>\n")) + return 0; + if (!HBUF_PUTSL(ob, + "<style:style" + " style:name=\"Heading_20_3\"" + " style:display-name=\"Heading 3\"" + " style:family=\"paragraph\"" + " style:parent-style-name=\"Heading\"" + " style:next-style-name=\"Text_20_body\"" + " style:default-outline-level=\"3\"" + " style:class=\"text\">\n" + "<style:paragraph-properties" + " fo:margin-top=\"0.247cm\"" + " fo:margin-bottom=\"0.212cm\"" + " style:contextual-spacing=\"false\"/>\n" + "<style:text-properties" + " fo:font-size=\"101%\"" + " fo:font-weight=\"bold\"" + " style:font-size-asian=\"101%\"" + " style:font-weight-asian=\"bold\"" + " style:font-size-complex=\"101%\"" + " style:font-weight-complex=\"bold\"/>\n" + "</style:style>\n")) + return 0; + + /* Table frames. */ + + if (!HBUF_PUTSL(ob, + "<style:style style:name=\"fr1\"" + " style:family=\"graphic\"" + " style:parent-style-name=\"Frame\">\n" + "<style:graphic-properties" + " style:run-through=\"foreground\"" + " style:wrap=\"parallel\"" + " style:number-wrapped-paragraphs=\"no-limit\"" + " style:vertical-pos=\"middle\"" + " style:vertical-rel=\"baseline\"" + " style:horizontal-pos=\"center\"" + " style:horizontal-rel=\"paragraph\"/>\n" + " </style:style>\n")) + return 0; + + return HBUF_PUTSL(ob, "</office:styles>\n"); +} + +/* + * Flush out the elements for scripts and styles. Return FALSE on + * failure, TRUE on success. + */ +static int +odt_styles_flush(struct lowdown_buf *ob, const struct odt *st) +{ + size_t i; + + if ((st->flags & LOWDOWN_STANDALONE) && + !odt_styles_flush_fixed(ob, st)) + return 0; + + if (!HBUF_PUTSL(ob, "<office:automatic-styles>\n")) + return 0; + for (i = 0; i < st->stysz; i++) + if (!odt_sty_flush(ob, st, &st->stys[i])) + return 0; + + /* + * I'm not sure why the page layout goes into the automatic + * styles and not the fixed styles, but if placed in fixed + * styles, this isn't processed. + */ + + if (!HBUF_PUTSL(ob, + "<style:page-layout style:name=\"pm1\">\n" + "<style:page-layout-properties" + " fo:page-width=\"21.001cm\"" + " fo:page-height=\"29.7cm\"" + " style:num-format=\"1\"" + " style:print-orientation=\"portrait\"" + " fo:margin-top=\"2cm\"" + " fo:margin-bottom=\"2cm\"" + " fo:margin-left=\"2cm\"" + " fo:margin-right=\"2cm\"" + " style:writing-mode=\"lr-tb\"" + " style:footnote-max-height=\"0cm\">\n" + "</style:page-layout-properties>\n" + "</style:page-layout>\n")) + return 0; + + if (!HBUF_PUTSL(ob, "</office:automatic-styles>\n")) + return 0; + + /* + * Since this references an automatic style (pm1), emit this + * regardless of whether we're in standalone or not. + */ + + return HBUF_PUTSL(ob, + "<office:master-styles>\n" + "<style:master-page " + " style:name=\"Standard\"" + " style:page-layout-name=\"pm1\"/>\n" + "</office:master-styles>\n"); +} + +/* + * Use our metadata to grab change identifiers. Return FALSE on + * failure, TRUE on success. + */ +static int +odt_changes_flush(struct lowdown_buf *ob, + const struct lowdown_metaq *mq, + const struct odt *st) +{ + const struct lowdown_meta *m; + const char *author = NULL, *date = NULL, + *rcsauthor = NULL, *rcsdate = NULL; + char buf[64]; + size_t i; + time_t t = time(NULL); + + if (st->chngsz == 0) + return 1; + + TAILQ_FOREACH(m, mq, entries) + if (strcasecmp(m->key, "author") == 0) + author = m->value; + else if (strcasecmp(m->key, "date") == 0) + date = m->value; + else if (strcasecmp(m->key, "rcsauthor") == 0) + rcsauthor = rcsauthor2str(m->value); + else if (strcasecmp(m->key, "rcsdate") == 0) + rcsdate = rcsdate2str(m->value); + + /* Overrides. */ + + if (rcsdate != NULL) + date = rcsdate; + if (rcsauthor != NULL) + author = rcsauthor; + + /* We require at least a date. */ + + if (date == NULL) { + if (strftime(buf, sizeof(buf), + "%Y-%m-%dT%H:%M:%S", localtime(&t)) == 0) + date = "1970-01-01"; + else + date = buf; + } + + if (!HBUF_PUTSL(ob, + "<text:tracked-changes" + " text:track-changes=\"false\">\n")) + return 0; + for (i = 0; i < st->chngsz; i++) { + if (!hbuf_printf(ob, + "<text:changed-region" + " xml:id=\"ct%zu\"" + " text:id=\"ct%zu\">\n" + "<text:%s>\n" + "<office:change-info>\n", i, i, + st->chngs[i].ins ? "insertion" : "deletion")) + return 0; + if (author != NULL) { + if (!HBUF_PUTSL(ob, "<dc:creator>")) + return 0; + if (!hesc_html(ob, author, + strlen(author), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</dc:creator>\n")) + return 0; + } + if (!HBUF_PUTSL(ob, "<dc:date>")) + return 0; + if (!hesc_html(ob, date, strlen(date), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</dc:date>\n")) + return 0; + if (!hbuf_printf(ob, + "</office:change-info>\n" + "</text:%s>\n" + "</text:changed-region>\n", + st->chngs[i].ins ? "insertion" : "deletion")) + return 0; + } + + return HBUF_PUTSL(ob, "</text:tracked-changes>\n"); +} + +/* + * Flush out the <office:meta> element, if applicable. Return FALSE on + * failure, TRUE on success. + */ +static int +odt_metaq_flush(struct lowdown_buf *ob, + const struct lowdown_metaq *mq, + const struct odt *st) +{ + const struct lowdown_meta *m; + const char *author = NULL, *title = NULL, + *date = NULL, *rcsauthor = NULL, + *rcsdate = NULL; + + if (mq == NULL || TAILQ_EMPTY(mq)) + return 1; + + TAILQ_FOREACH(m, mq, entries) + if (strcasecmp(m->key, "author") == 0) + author = m->value; + else if (strcasecmp(m->key, "date") == 0) + date = m->value; + else if (strcasecmp(m->key, "rcsauthor") == 0) + rcsauthor = rcsauthor2str(m->value); + else if (strcasecmp(m->key, "rcsdate") == 0) + rcsdate = rcsdate2str(m->value); + else if (strcasecmp(m->key, "title") == 0) + title = m->value; + + /* Overrides. */ + + if (title == NULL) + title = "Untitled article"; + if (rcsdate != NULL) + date = rcsdate; + if (rcsauthor != NULL) + author = rcsauthor; + + if (!HBUF_PUTSL(ob, "<office:meta>\n")) + return 0; + + if (!HBUF_PUTSL(ob, "<dc:title>")) + return 0; + if (!hesc_html(ob, title, strlen(title), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</dc:title>\n")) + return 0; + + if (author != NULL) { + if (!HBUF_PUTSL(ob, "<dc:creator>")) + return 0; + if (!hesc_html(ob, author, strlen(author), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</dc:creator>\n")) + return 0; + if (!HBUF_PUTSL(ob, "<meta:initial-creator>")) + return 0; + if (!hesc_html(ob, author, strlen(author), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</meta:initial-creator>\n")) + return 0; + } + + if (date != NULL) { + if (!HBUF_PUTSL(ob, "<dc:date>")) + return 0; + if (!hesc_html(ob, date, strlen(date), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</dc:date>\n")) + return 0; + if (!HBUF_PUTSL(ob, "<meta:creation-date>")) + return 0; + if (!hesc_html(ob, date, strlen(date), 1, 0, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</meta:creation-date>\n")) + return 0; + } + + return HBUF_PUTSL(ob, "</office:meta>\n"); +} + +/* + * Escape regular text that shouldn't be HTML. Return FALSE on failure, + * TRUE on success. + */ +static int +escape_html(struct lowdown_buf *ob, const char *source, + size_t length, const struct odt *st) +{ + + return hesc_html(ob, source, length, 1, 0, 1); +} + +/* + * See escape_html(). + */ +static int +escape_htmlb(struct lowdown_buf *ob, + const struct lowdown_buf *in, const struct odt *st) +{ + + return escape_html(ob, in->data, in->size, st); +} + +/* + * Escape an href link. Return FALSE on failure, TRUE on success. + */ +static int +escape_href(struct lowdown_buf *ob, const struct lowdown_buf *in, + const struct odt *st) +{ + + return hesc_href(ob, in->data, in->size); +} + +static int +escape_attr(struct lowdown_buf *ob, const struct lowdown_buf *in) +{ + + return hesc_attr(ob, in->data, in->size); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_autolink(struct lowdown_buf *ob, + const struct rndr_autolink *parm, + struct odt *st) +{ + + if (parm->link.size == 0) + return 1; + + if (!HBUF_PUTSL(ob, + "<text:a xlink:type=\"simple\"" + " text:style-name=\"Internet_20_Link\" xlink:href=\"")) + return 0; + if (parm->type == HALINK_EMAIL && !HBUF_PUTSL(ob, "mailto:")) + return 0; + if (!escape_href(ob, &parm->link, st)) + return 0; + if (!HBUF_PUTSL(ob, "\">")) + return 0; + + /* + * Pretty printing: if we get an email address as + * an actual URI, e.g. `mailto:foo@bar.com`, we don't + * want to print the `mailto:` prefix + */ + + if (hbuf_strprefix(&parm->link, "mailto:")) { + if (!escape_html(ob, + parm->link.data + 7, + parm->link.size - 7, st)) + return 0; + } else { + if (!escape_htmlb(ob, &parm->link, st)) + return 0; + } + + return HBUF_PUTSL(ob, "</text:a>"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_blockcode(struct lowdown_buf *ob, + const struct rndr_blockcode *parm, + struct odt *st) +{ + size_t i, j, sz, ssz; + struct odt_sty *s; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == LOWDOWN_PARAGRAPH && + st->stys[i].fmt == ODT_STY_LIT && + st->stys[i].parent == st->list && + st->stys[i].offs == st->offs) + break; + + if (i == st->stysz) { + if ((s = odt_style_add(st)) == NULL) + return 0; + s->type = LOWDOWN_PARAGRAPH; + s->fmt = ODT_STY_LIT; + s->parent = st->list; + s->offs = st->offs; + snprintf(s->name, sizeof(s->name), + "P%zu", st->sty_P++); + } else + s = &st->stys[i]; + + for (i = 0; i < parm->text.size; ) { + if (!hbuf_printf(ob, + "<text:p text:style-name=\"%s\">", s->name)) + return 0; + + /* + * Iterate through each line, printing it in its own + * <text:p>. If we encounter more than one space in a + * row, then use a <text:s text:c> spanner to print the + * literal spaces. + */ + + for (sz = 0, j = i; i < parm->text.size; i++, sz++) { + if (parm->text.data[i] == ' ' && + i < parm->text.size - 1 && + parm->text.data[i + 1] == ' ') { + if (!hesc_html(ob, + &parm->text.data[j], sz, 1, 1, 1)) + return 0; + sz = 0; + for (ssz = 0; i < parm->text.size; + i++, ssz++) + if (parm->text.data[i] != ' ') + break; + j = i; + if (!hbuf_printf(ob, + "<text:s text:c=\"%zu\"/>", ssz)) + return 0; + } + if (i < parm->text.size && + parm->text.data[i] == '\n') + break; + } + if (!hesc_html(ob, &parm->text.data[j], sz, 1, 1, 1)) + return 0; + if (!HBUF_PUTSL(ob, "</text:p>\n")) + return 0; + if (i < parm->text.size) + i++; + } + + return 1; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_codespan(struct lowdown_buf *ob, + const struct rndr_codespan *param, + struct odt *st) +{ + + if (!HBUF_PUTSL(ob, + "<text:span text:style-name=\"Source_20_Text\">")) + return 0; + if (!escape_htmlb(ob, ¶m->text, st)) + return 0; + return HBUF_PUTSL(ob, "</text:span>"); +} + +/* + * This covers all manner of span types: italic, bold, etc. Return + * FALSE on failure, TRUE on success. + */ +static int +rndr_span(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct lowdown_node *n, struct odt *st) +{ + const char *sty; + + if ((sty = odt_style_add_text(st, n->type)) == NULL) + return 0; + if (!hbuf_printf(ob, + "<text:span text:style-name=\"%s\">", sty)) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</text:span>"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_linebreak(struct lowdown_buf *ob) +{ + + return HBUF_PUTSL(ob, "<text:line-break/>\n"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_header(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct lowdown_node *n, + struct odt *st) +{ + struct odt_sty *sty; + ssize_t level; + size_t i; + int fl, rc = 0; + const struct lowdown_buf *buf; + struct lowdown_buf *nbuf = NULL; + + level = (ssize_t)n->rndr_header.level + st->headers_offs; + if (level < 1) + level = 1; + else if (level > 3) + level = 3; + + if (level == 1) + fl = ODT_STY_H1; + else if (level == 2) + fl = ODT_STY_H2; + else + fl = ODT_STY_H3; + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == LOWDOWN_HEADER && + st->stys[i].fmt == fl) + break; + if (i == st->stysz) { + if ((sty = odt_style_add(st)) == NULL) + return 0; + sty->fmt = fl; + sty->type = LOWDOWN_HEADER; + snprintf(sty->name, sizeof(sty->name), + "P%zu", st->sty_P++); + } else + sty = &st->stys[i]; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!hbuf_printf(ob, + "<text:h" + " text:outline-level=\"%zu\"" + " text:style-name=\"%s\"", + level, sty->name)) + return 0; + + if (n->rndr_header.attr_cls.size > 0) { + if (!HBUF_PUTSL(ob, " text:class-names=\"")) + return 0; + if (!hbuf_putb(ob, &n->rndr_header.attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } + + if (!HBUF_PUTSL(ob, ">")) + return 0; + + if (n->rndr_header.attr_id.size) { + if ((nbuf = hbuf_new(32)) == NULL) + goto out; + if (!escape_href(nbuf, &n->rndr_header.attr_id, st)) + goto out; + buf = nbuf; + } else + buf = hbuf_id(NULL, n, &st->headers_used); + + if (buf == NULL) + goto out; + if (!HBUF_PUTSL(ob, "<text:bookmark-start text:name=\"")) + goto out; + if (!hbuf_putb(ob, buf)) + goto out; + if (!HBUF_PUTSL(ob, "\" />")) + goto out; + if (!hbuf_putb(ob, content)) + goto out; + if (!HBUF_PUTSL(ob, "<text:bookmark-end text:name=\"")) + goto out; + if (!hbuf_putb(ob, buf)) + goto out; + if (!HBUF_PUTSL(ob, "\" />")) + goto out; + if (!HBUF_PUTSL(ob, "</text:h>\n")) + goto out; + rc = 1; +out: + hbuf_free(nbuf); + return rc; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_link(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_link *param, + struct odt *st) +{ + + if (param->attr_id.size > 0) { + if (!HBUF_PUTSL(ob, "<text:bookmark-start text:name=\"")) + return 0; + if (!hbuf_putb(ob, ¶m->attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "\" />")) + return 0; + } + + if (!HBUF_PUTSL(ob, + "<text:a" + " xlink:type=\"simple\"" + " text:style-name=\"Internet_20_Link\"")) + return 0; + + if (param->attr_cls.size > 0) { + if (!HBUF_PUTSL(ob, " text:class-names=\"")) + return 0; + if (!hbuf_putb(ob, ¶m->attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } + if (!HBUF_PUTSL(ob, " xlink:href=\"")) + return 0; + if (!escape_href(ob, ¶m->link, st)) + return 0; + if (!HBUF_PUTSL(ob, "\">") || + !hbuf_putb(ob, content) || + !HBUF_PUTSL(ob, "</text:a>")) + return 0; + + if (param->attr_id.size > 0) { + if (!HBUF_PUTSL(ob, "<text:bookmark-end text:name=\"")) + return 0; + if (!hbuf_putb(ob, ¶m->attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "\" />")) + return 0; + } + return 1; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_list(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_list *param, + const char *name) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!HBUF_PUTSL(ob, "<text:list")) + return 0; + if (name != NULL && !hbuf_printf(ob, + " text:style-name=\"%s\"", name)) + return 0; + if (!HBUF_PUTSL(ob, ">\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</text:list>\n"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_listitem(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct lowdown_node *n, + struct odt *st) +{ + size_t i, size; + struct odt_sty *sty; + + if (!(n->rndr_listitem.flags & HLIST_FL_DEF)) { + assert(st->list != (size_t)-1); + if (!HBUF_PUTSL(ob, "<text:list-item>")) + return 0; + } + + /* + * Non-definition, non-block lists have an initial paragraph + * that must link to the root list of the current tree. + */ + + if (!(n->rndr_listitem.flags & HLIST_FL_DEF) && + !(n->rndr_listitem.flags & HLIST_FL_BLOCK)) { + assert(st->list != (size_t)-1); + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == LOWDOWN_PARAGRAPH && + st->stys[i].fmt == ODT_STY_PARA && + st->stys[i].foot == st->foot && + st->stys[i].parent == st->list) + break; + if (i == st->stysz) { + if ((sty = odt_style_add(st)) == NULL) + return 0; + sty->parent = st->list; + sty->foot = st->foot; + sty->fmt = ODT_STY_PARA; + sty->type = LOWDOWN_PARAGRAPH; + snprintf(sty->name, sizeof(sty->name), + "P%zu", st->sty_P++); + } else + sty = &st->stys[i]; + + if (!hbuf_printf(ob, + "<text:p text:style-name=\"%s\">", sty->name)) + return 0; + } + + if (n->rndr_listitem.flags & HLIST_FL_UNCHECKED) { + if (!HBUF_PUTSL(ob, "☐ ")) + return 0; + } + if (n->rndr_listitem.flags & HLIST_FL_CHECKED) { + if (!HBUF_PUTSL(ob, "☑ ")) + return 0; + } + + /* Cut off any trailing space. */ + + if ((size = content->size) > 0) { + while (size && content->data[size - 1] == '\n') + size--; + if (!hbuf_put(ob, content->data, size)) + return 0; + } + + if (!(n->rndr_listitem.flags & HLIST_FL_DEF) && + !(n->rndr_listitem.flags & HLIST_FL_BLOCK)) + if (!HBUF_PUTSL(ob, "</text:p>")) + return 0; + + if (!(n->rndr_listitem.flags & HLIST_FL_DEF)) + if (!HBUF_PUTSL(ob, "</text:list-item>\n")) + return 0; + + return 1; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_paragraph(struct lowdown_buf *ob, + const struct lowdown_buf *content, + struct odt *st) +{ + size_t i = 0, j; + struct odt_sty *sty; + + if (content->size == 0) + return 1; + while (i < content->size && + isspace((unsigned char)content->data[i])) + i++; + if (i == content->size) + return 1; + + /* + * Paragraphs need to either set their left margin, if in + * blockquotes, or link to the root list, if applicable. The + * foot bits are because footer paragraphs inherit the footnote + * font. + */ + + for (j = 0; j < st->stysz; j++) + if (st->stys[j].type == LOWDOWN_PARAGRAPH && + st->stys[j].parent == st->list && + st->stys[j].foot == st->foot && + st->stys[j].fmt == ODT_STY_PARA && + st->stys[j].offs == st->offs) + break; + + if (j == st->stysz) { + if ((sty = odt_style_add(st)) == NULL) + return 0; + sty->foot = st->foot; + sty->fmt = ODT_STY_PARA; + sty->type = LOWDOWN_PARAGRAPH; + sty->parent = st->list; + sty->offs = st->offs; + snprintf(sty->name, sizeof(sty->name), + "P%zu", st->sty_P++); + } else + sty = &st->stys[j]; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + if (!hbuf_printf(ob, + "<text:p text:style-name=\"%s\">", sty->name)) + return 0; + if (!hbuf_put(ob, content->data + i, content->size - i)) + return 0; + return HBUF_PUTSL(ob, "</text:p>\n"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_html(struct lowdown_buf *ob, + const struct lowdown_buf *param, + const struct odt *st) +{ + + if (st->flags & LOWDOWN_ODT_SKIP_HTML) + return 1; + return escape_htmlb(ob, param, st); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_hrule(struct lowdown_buf *ob, struct odt *st) +{ + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + return HBUF_PUTSL(ob, + "<text:p text:style-name=\"Horizontal_20_Line\"/>\n"); +} + +static int +rndr_image(struct lowdown_buf *ob, + const struct rndr_image *param, + const struct odt *st) +{ + unsigned int x = 0, y = 0; + char dimbuf[32]; + + /* + * Scan in our dimensions, if applicable. + * It's unreasonable for them to be over 32 characters, so use + * that as a cap to the size. + */ + + if (param->dims.size && + param->dims.size < sizeof(dimbuf) - 1) { + memset(dimbuf, 0, sizeof(dimbuf)); + memcpy(dimbuf, param->dims.data, param->dims.size); + if (sscanf(dimbuf, "%ux%u", &x, &y) != 2) + x = y = 0; + } + + if (!HBUF_PUTSL(ob, + "<draw:frame" + " draw:name=\"Image1\"" + " text:anchor-type=\"as-char\"" + " draw:z-index=\"0\"" + " draw:style-name=\"Graphics\"")) + return 0; + + if (param->attr_cls.size > 0) { + if (!HBUF_PUTSL(ob, " draw:class-names=\"")) + return 0; + if (!hbuf_putb(ob, ¶m->attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\"")) + return 0; + } + + if (param->attr_width.size || param->attr_height.size) { + if (param->attr_width.size) + if (!HBUF_PUTSL(ob, " svg:width=\"") || + !escape_attr(ob, ¶m->attr_width) || + !HBUF_PUTSL(ob, "\"")) + return 0; + if (param->attr_height.size) + if (!HBUF_PUTSL(ob, " svg:height=\"") || + !escape_attr(ob, ¶m->attr_height) || + !HBUF_PUTSL(ob, "\"")) + return 0; + } else if (x > 0 && y > 0) { + if (!hbuf_printf(ob, + " svg:width=\"%u px\"" + " svg:height=\"%u px\"", x, y)) + return 0; + } + + if (!HBUF_PUTSL(ob, "><draw:image xlink:href=\"")) + return 0; + if (!hbuf_putb(ob, ¶m->link)) + return 0; + if (!HBUF_PUTSL(ob, "\"" + " xlink:type=\"simple\"" + " xlink:show=\"embed\"" + " xlink:actuate=\"onLoad\"" + " draw:filter-name=\"<All images>\" />")) + return 0; + if (!HBUF_PUTSL(ob, "<svg:title>")) + return 0; + if (!hbuf_putb(ob, ¶m->alt)) + return 0; + return HBUF_PUTSL(ob, "</svg:title></draw:frame>"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_table(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_table *param, + struct odt *st) +{ + size_t i, pid; + struct odt_sty *s; + + /* + * First find the outer paragraph. If we're in the footer, this + * must be linked to the footer; and if in a list, to the list. + * We don't do offset here: that's part of the table itself. + */ + + for (pid = 0; pid < st->stysz; pid++) + if (st->stys[pid].type == LOWDOWN_PARAGRAPH && + st->stys[pid].fmt == ODT_STY_PARA && + st->stys[pid].offs == 0 && + st->stys[pid].foot == st->foot && + st->stys[pid].parent == st->list) + break; + if (pid == st->stysz) { + if ((s = odt_style_add(st)) == NULL) + return 0; + s->parent = st->list; + s->foot = st->foot; + s->fmt = ODT_STY_PARA; + s->type = LOWDOWN_PARAGRAPH; + snprintf(s->name, sizeof(s->name), + "P%zu", st->sty_P++); + } + + /* + * Now the table itself. Tables are only unique insofar as they + * have different offsets and possible are in lists. + */ + + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == LOWDOWN_TABLE_BLOCK && + st->stys[i].parent == st->list && + st->stys[i].foot == st->foot && + st->stys[i].offs == st->offs) + break; + + if (i == st->stysz) { + if ((s = odt_style_add(st)) == NULL) + return 0; + s->type = LOWDOWN_TABLE_BLOCK; + s->fmt = ODT_STY_TBL; + s->foot = st->foot; + s->parent = st->list; + s->offs = st->offs; + snprintf(s->name, sizeof(s->name), + "Table%zu", st->sty_Table++); + } else + s = &st->stys[i]; + + if (ob->size && !hbuf_putc(ob, '\n')) + return 0; + + if (!hbuf_printf(ob, + "<text:p text:style-name=\"%s\">\n", + st->stys[pid].name)) + return 0; + + if (!hbuf_printf(ob, + "<draw:frame draw:style-name=\"fr1\"" + " draw:name=\"Frame\"" + " draw:z-index=\"0\">\n" + "<draw:text-box" + " fo:min-height=\"0.499cm\"" + " fo:min-width=\"0.34cm\">\n" + "<table:table" + " table:style-name=\"%s\"" + " table:name=\"%s\">\n" + "<table:table-column" + " table:number-columns-repeated=\"%zu\"/>\n", + s->name, s->name, param->columns)) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + if (!HBUF_PUTSL(ob, "</table:table>\n")) + return 0; + if (!hbuf_printf(ob, + "</draw:text-box>\n</draw:frame>\n</text:p>\n")) + return 0; + return 1; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_tablerow(struct lowdown_buf *ob, + const struct lowdown_buf *content) +{ + + if (!HBUF_PUTSL(ob, "<table:table-row>\n")) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</table:table-row>\n"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_tablecell(struct lowdown_buf *ob, + const struct lowdown_buf *content, + const struct rndr_table_cell *param, + struct odt *st) +{ + size_t i; + struct odt_sty *s; + + /* + * Reference if we're in a footnote, as the paragraph will want + * to inherit the Footnote smaller font. + */ + + for (i = 0; i < st->stysz; i++) + if (st->stys[i].type == LOWDOWN_PARAGRAPH && + st->stys[i].foot == st->foot && + st->stys[i].fmt == ODT_STY_TBL_PARA) + break; + + if (i == st->stysz) { + if ((s = odt_style_add(st)) == NULL) + return 0; + s->type = LOWDOWN_PARAGRAPH; + s->foot = st->foot; + s->fmt = ODT_STY_TBL_PARA; + snprintf(s->name, sizeof(s->name), + "P%zu", st->sty_P++); + } else + s = &st->stys[i]; + + if (!hbuf_printf(ob, + "<table:table-cell office:value-type=\"string\">" + "<text:p text:style-name=\"%s\">", s->name)) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + return HBUF_PUTSL(ob, "</text:p></table:table-cell>\n"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_footnote_ref(struct lowdown_buf *ob, + const struct lowdown_buf *content, struct odt *st) +{ + struct odt tmp; + + /* Save state values. */ + + tmp = *st; + st->offs = 0; + st->list = (size_t)-1; + st->foot = 1; + st->footcount++; + + if (!hbuf_printf(ob, + "<text:note text:id=\"ftn%zu\"" + " text:note-class=\"footnote\">" + "<text:note-citation>%zu</text:note-citation>" + "<text:note-body>\n", st->footcount, st->footcount)) + return 0; + if (!hbuf_putb(ob, content)) + return 0; + if (!HBUF_PUTSL(ob, + "</text:note-body></text:note>\n")) + return 0; + + /* Restore state values. */ + + st->offs = tmp.offs; + st->list = tmp.list; + st->foot = 0; + return 1; +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_math(struct lowdown_buf *ob, + const struct rndr_math *param, + const struct odt *st) +{ + + if (param->blockmode && !HBUF_PUTSL(ob, "\\[")) + return 0; + else if (!param->blockmode && !HBUF_PUTSL(ob, "\\(")) + return 0; + if (!escape_htmlb(ob, ¶m->text, st)) + return 0; + return param->blockmode ? + HBUF_PUTSL(ob, "\\]") : + HBUF_PUTSL(ob, "\\)"); +} + +/* + * Return FALSE on failure, TRUE on success. + */ +static int +rndr_root(struct lowdown_buf *ob, const struct lowdown_metaq *mq, + const struct lowdown_buf *content, const struct odt *st) +{ + + if ((st->flags & LOWDOWN_STANDALONE) && !HBUF_PUTSL(ob, + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<office:document\n" + " xmlns:calcext=\"urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0\"\n" + " xmlns:chart=\"urn:oasis:names:tc:opendocument:xmlns:chart:1.0\"\n" + " xmlns:config=\"urn:oasis:names:tc:opendocument:xmlns:config:1.0\"\n" + " xmlns:css3t=\"http://www.w3.org/TR/css3-text/\"\n" + " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" + " xmlns:dom=\"http://www.w3.org/2001/xml-events\"\n" + " xmlns:dr3d=\"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0\"\n" + " xmlns:draw=\"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0\"\n" + " xmlns:drawooo=\"http://openoffice.org/2010/draw\"\n" + " xmlns:field=\"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0\"\n" + " xmlns:fo=\"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0\"\n" + " xmlns:form=\"urn:oasis:names:tc:opendocument:xmlns:form:1.0\"\n" + " xmlns:formx=\"urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0\"\n" + " xmlns:grddl=\"http://www.w3.org/2003/g/data-view#\"\n" + " xmlns:loext=\"urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0\"\n" + " xmlns:math=\"http://www.w3.org/1998/Math/MathML\"\n" + " xmlns:meta=\"urn:oasis:names:tc:opendocument:xmlns:meta:1.0\"\n" + " xmlns:number=\"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0\"\n" + " xmlns:of=\"urn:oasis:names:tc:opendocument:xmlns:of:1.2\"\n" + " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"\n" + " xmlns:officeooo=\"http://openoffice.org/2009/office\"\n" + " xmlns:ooo=\"http://openoffice.org/2004/office\"\n" + " xmlns:oooc=\"http://openoffice.org/2004/calc\"\n" + " xmlns:ooow=\"http://openoffice.org/2004/writer\"\n" + " xmlns:rpt=\"http://openoffice.org/2005/report\"\n" + " xmlns:script=\"urn:oasis:names:tc:opendocument:xmlns:script:1.0\"\n" + " xmlns:style=\"urn:oasis:names:tc:opendocument:xmlns:style:1.0\"\n" + " xmlns:svg=\"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0\"\n" + " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\"\n" + " xmlns:tableooo=\"http://openoffice.org/2009/table\"\n" + " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\"\n" + " xmlns:xforms=\"http://www.w3.org/2002/xforms\"\n" + " xmlns:xhtml=\"http://www.w3.org/1999/xhtml\"\n" + " xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n" + " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + " office:mimetype=\"application/vnd.oasis.opendocument.text\"\n" + " office:version=\"1.3\">\n")) + return 0; + + if ((st->flags & LOWDOWN_STANDALONE) && + !odt_metaq_flush(ob, mq, st)) + return 0; + + if (!odt_styles_flush(ob, st)) + return 0; + + if (!HBUF_PUTSL(ob, "<office:body>\n<office:text>\n")) + return 0; + if (!odt_changes_flush(ob, mq, st)) + return 0; + + if (!hbuf_putb(ob, content)) + return 0; + + if (!HBUF_PUTSL(ob, "</office:text>\n</office:body>\n")) + return 0; + + if ((st->flags & LOWDOWN_STANDALONE) && !HBUF_PUTSL(ob, + "</office:document>\n")) + return 0; + return 1; +} + +/* + * Allocate a meta-data value on the queue "mq". Return FALSE on + * failure, TRUE on success. + */ +static int +rndr_meta(struct lowdown_buf *ob, + const struct lowdown_buf *content, + struct lowdown_metaq *mq, + const struct lowdown_node *n, struct odt *st) +{ + struct lowdown_meta *m; + ssize_t val; + const char *ep; + + m = calloc(1, sizeof(struct lowdown_meta)); + if (m == NULL) + return 0; + TAILQ_INSERT_TAIL(mq, m, entries); + + m->key = strndup(n->rndr_meta.key.data, + n->rndr_meta.key.size); + if (m->key == NULL) + return 0; + m->value = strndup(content->data, content->size); + if (m->value == NULL) + return 0; + + if (strcmp(m->key, "shiftheadinglevelby") == 0) { + val = (ssize_t)strtonum + (m->value, -100, 100, &ep); + if (ep == NULL) + st->headers_offs = val + 1; + } else if (strcmp(m->key, "baseheaderlevel") == 0) { + val = (ssize_t)strtonum + (m->value, 1, 100, &ep); + if (ep == NULL) + st->headers_offs = val; + } + + return 1; +} + +static int +rndr(struct lowdown_buf *ob, + struct lowdown_metaq *mq, void *ref, + const struct lowdown_node *n) +{ + const struct lowdown_node *child; + struct lowdown_buf *tmp; + int32_t ent; + struct odt *st = ref; + struct odt_sty *sty = NULL; + size_t curid = (size_t)-1, curoffs, + chngid = (size_t)-1; + int ret = 1; + void *pp; + + if ((tmp = hbuf_new(64)) == NULL) + return 0; + + /* + * Manage our position in the output. If we're in a blockquote + * and not a list, then increment our indent. If we're in a + * list, we're not allowed to have indents between the list and + * content (OpenDocument limitations), so don't touch the + * indentation. + */ + + /* + * TODO: keep a "real offset" if we have an embedded table and + * want to set the width to be the real offset minus page width. + * Without doing so, list-embedded tables run off the right + * margin for OpenDocument reasons. + */ + + switch (n->type) { + case LOWDOWN_DEFINITION_DATA: + case LOWDOWN_BLOCKQUOTE: + if (st->list == (size_t)-1) + st->offs++; + break; + case LOWDOWN_LIST: + if (st->list != (size_t)-1) + break; + for (st->list = 0; st->list < st->stysz; st->list++) { + if (st->stys[st->list].type != LOWDOWN_LIST) + continue; + if (st->stys[st->list].offs != st->offs) + continue; + if ((n->rndr_list.flags & HLIST_FL_UNORDERED) && + st->stys[st->list].fmt != ODT_STY_UL) + continue; + if ((n->rndr_list.flags & HLIST_FL_ORDERED) && + st->stys[st->list].fmt != ODT_STY_OL) + continue; + break; + } + if (st->list == st->stysz) { + if ((sty = odt_style_add(st)) == NULL) + return 0; + sty->type = LOWDOWN_LIST; + if (n->rndr_list.flags & HLIST_FL_ORDERED) + sty->fmt = ODT_STY_OL; + if (n->rndr_list.flags & HLIST_FL_UNORDERED) + sty->fmt = ODT_STY_UL; + sty->offs = st->offs; + snprintf(sty->name, sizeof(sty->name), + "L%zu", st->sty_L++); + } + curoffs = st->offs; + st->offs = 0; + curid = st->list; + break; + default: + break; + } + + TAILQ_FOREACH(child, &n->children, entries) + if (!rndr(tmp, mq, st, child)) + goto out; + + if (n->chng == LOWDOWN_CHNG_INSERT || + n->chng == LOWDOWN_CHNG_DELETE) { + pp = reallocarray(st->chngs, + st->chngsz + 1, sizeof(struct odt_chng)); + if (pp == NULL) + goto out; + st->chngs = pp; + st->chngs[st->chngsz].ins = + n->chng == LOWDOWN_CHNG_INSERT; + chngid = st->chngsz++; + if (!hbuf_printf(ob, + "<text:change-start" + " text:change-id=\"ct%zu\"/>", chngid)) + goto out; + } + + switch (n->type) { + case LOWDOWN_ROOT: + if (!rndr_root(ob, mq, tmp, st)) + goto out; + break; + case LOWDOWN_BLOCKCODE: + if (!rndr_blockcode(ob, &n->rndr_blockcode, st)) + goto out; + break; + case LOWDOWN_META: + if (n->chng != LOWDOWN_CHNG_DELETE && + !rndr_meta(ob, tmp, mq, n, st)) + goto out; + break; + case LOWDOWN_HEADER: + if (!rndr_header(ob, tmp, n, st)) + goto out; + break; + case LOWDOWN_HRULE: + if (!rndr_hrule(ob, st)) + goto out; + break; + case LOWDOWN_LIST: + if (!rndr_list(ob, tmp, &n->rndr_list, + curid == (size_t)-1 ? NULL : st->stys[curid].name)) + goto out; + break; + case LOWDOWN_LISTITEM: + if (!rndr_listitem(ob, tmp, n, st)) + goto out; + break; + case LOWDOWN_DEFINITION_TITLE: + case LOWDOWN_DEFINITION_DATA: + case LOWDOWN_PARAGRAPH: + if (!rndr_paragraph(ob, tmp, st)) + goto out; + break; + case LOWDOWN_TABLE_BLOCK: + if (!rndr_table(ob, tmp, &n->rndr_table, st)) + goto out; + break; + case LOWDOWN_TABLE_ROW: + if (!rndr_tablerow(ob, tmp)) + goto out; + break; + case LOWDOWN_TABLE_CELL: + if (!rndr_tablecell(ob, tmp, &n->rndr_table_cell, st)) + goto out; + break; + case LOWDOWN_BLOCKHTML: + if (!rndr_html(ob, &n->rndr_blockhtml.text, st)) + goto out; + break; + case LOWDOWN_LINK_AUTO: + if (!rndr_autolink(ob, &n->rndr_autolink, st)) + goto out; + break; + case LOWDOWN_CODESPAN: + if (!rndr_codespan(ob, &n->rndr_codespan, st)) + goto out; + break; + case LOWDOWN_TRIPLE_EMPHASIS: + case LOWDOWN_DOUBLE_EMPHASIS: + case LOWDOWN_EMPHASIS: + case LOWDOWN_STRIKETHROUGH: + case LOWDOWN_HIGHLIGHT: + case LOWDOWN_SUPERSCRIPT: + if (!rndr_span(ob, tmp, n, st)) + goto out; + break; + case LOWDOWN_IMAGE: + if (!rndr_image(ob, &n->rndr_image, st)) + goto out; + break; + case LOWDOWN_LINEBREAK: + if (!rndr_linebreak(ob)) + goto out; + break; + case LOWDOWN_LINK: + if (!rndr_link(ob, tmp, &n->rndr_link, st)) + goto out; + break; + case LOWDOWN_FOOTNOTE: + if (!rndr_footnote_ref(ob, tmp, st)) + goto out; + break; + case LOWDOWN_MATH_BLOCK: + if (!rndr_math(ob, &n->rndr_math, st)) + goto out; + break; + case LOWDOWN_RAW_HTML: + if (!rndr_html(ob, &n->rndr_raw_html.text, st)) + goto out; + break; + case LOWDOWN_NORMAL_TEXT: + if (!escape_htmlb(ob, &n->rndr_normal_text.text, st)) + goto out; + break; + case LOWDOWN_ENTITY: + ent = entity_find_iso(&n->rndr_entity.text); + if (ent > 0 && !hbuf_printf(ob, "&#%" PRId32 ";", ent)) + goto out; + if (ent <= 0 && !hbuf_putb(ob, &n->rndr_entity.text)) + goto out; + break; + default: + if (!hbuf_putb(ob, tmp)) + goto out; + break; + } + + if (n->chng == LOWDOWN_CHNG_INSERT || + n->chng == LOWDOWN_CHNG_DELETE) { + assert(chngid != (size_t)-1); + if (!hbuf_printf(ob, + "<text:change-end" + " text:change-id=\"ct%zu\"/>", chngid)) + goto out; + } + + switch (n->type) { + case LOWDOWN_DEFINITION_DATA: + case LOWDOWN_BLOCKQUOTE: + if (st->list == (size_t)-1) + st->offs--; + break; + case LOWDOWN_LIST: + if (curid != (size_t)-1) { + st->list = (size_t)-1; + st->offs = curoffs; + } + break; + default: + break; + } + + ret = 1; +out: + hbuf_free(tmp); + return ret; +} + +int +lowdown_odt_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct odt *st = arg; + struct lowdown_metaq metaq; + int rc; + + TAILQ_INIT(&st->headers_used); + TAILQ_INIT(&metaq); + st->headers_offs = 1; + st->stys = NULL; + st->stysz = 0; + st->list = (size_t)-1; + st->foot = 0; + st->footcount = 0; + st->sty_T = st->sty_L = st->sty_P = st->sty_Table = 1; + st->chngs = NULL; + st->chngsz = 0; + + rc = rndr(ob, &metaq, st, n); + + free(st->stys); + free(st->chngs); + lowdown_metaq_free(&metaq); + hentryq_clear(&st->headers_used); + return rc; +} + +void * +lowdown_odt_new(const struct lowdown_opts *opts) +{ + struct odt *p; + + if ((p = calloc(1, sizeof(struct odt))) == NULL) + return NULL; + + p->flags = opts == NULL ? 0 : opts->oflags; + if (opts != NULL && opts->odt.sty != NULL && + (p->sty = strdup(opts->odt.sty)) == NULL) { + free(p); + p = NULL; + } + + return p; +} + +void +lowdown_odt_free(void *arg) +{ + struct odt *p = arg; + + if (p != NULL) + free(p->sty); + + free(p); +} diff --git a/odt.o b/odt.o Binary files differ. diff --git a/regress/bang-before-footnote.gemini b/regress/bang-before-footnote.gemini @@ -0,0 +1,5 @@ +This is a test![1] + +~~~~~~~~ + +[1] footnote text. diff --git a/regress/bang-before-footnote.html b/regress/bang-before-footnote.html @@ -0,0 +1,12 @@ +<p>This is a test!<sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>footnote text. <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/bang-before-footnote.latex b/regress/bang-before-footnote.latex @@ -0,0 +1,5 @@ + +This is a test!\footnote[1]{ +footnote text. +} + diff --git a/regress/bang-before-footnote.man b/regress/bang-before-footnote.man @@ -0,0 +1,7 @@ +.PP +This is a test!\u\s-31\s+3\d +.LP +.sp 3 +\l'2i' +.LP +\0\fI\u\s-31\s+3\d\fP\0footnote text. diff --git a/regress/bang-before-footnote.md b/regress/bang-before-footnote.md @@ -0,0 +1,4 @@ + +This is a test![^01] + +[^01]: footnote text. diff --git a/regress/bang-before-footnote.ms b/regress/bang-before-footnote.ms @@ -0,0 +1,5 @@ +.PP +This is a test!\** +.FS +footnote text. +.FE diff --git a/regress/bang-before-metadata.gemini b/regress/bang-before-metadata.gemini @@ -0,0 +1 @@ +This is a test!bar diff --git a/regress/bang-before-metadata.html b/regress/bang-before-metadata.html @@ -0,0 +1 @@ +<p>This is a test!bar</p> diff --git a/regress/bang-before-metadata.latex b/regress/bang-before-metadata.latex @@ -0,0 +1,2 @@ + +This is a test!bar diff --git a/regress/bang-before-metadata.man b/regress/bang-before-metadata.man @@ -0,0 +1,2 @@ +.PP +This is a test!bar diff --git a/regress/bang-before-metadata.md b/regress/bang-before-metadata.md @@ -0,0 +1,3 @@ +foo: bar + +This is a test![%foo] diff --git a/regress/bang-before-metadata.ms b/regress/bang-before-metadata.ms @@ -0,0 +1,2 @@ +.PP +This is a test!bar diff --git a/regress/diff/diff.html b/regress/diff/diff.html @@ -0,0 +1,411 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<meta name="creator" content="BSD.lv" /> +<meta name="author" content="Kristaps Dzonsons" /> +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" /> +<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Alegreya+Sans:400,400italic,500,700" /> +<link rel="stylesheet" href="diff.css" /> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script src="diff.js"></script> +<title>Lowdown Diffing Engine</title> +</head> +<body> + +<h1 id="lowdown-diffing-engine">Lowdown Diffing Engine</h1> + +<p>In this paper, I briefly describe the “diff” engine used in +<a href="https://kristaps.bsd.lv/lowdown/lowdown.1.html">lowdown-diff(1)</a> tool +in <a href="https://kristaps.bsd.lv/lowdown/index.html">lowdown</a>. The work is +motivated by the need to provide formatted output describing the +difference between two documents—specifically, formatted PDF via the +<strong>-Tms</strong> output, although <strong>-Thtml</strong> and the other output modes are of +course supported.</p> +<ins> +<p>This documents an early work in progress—both source code and +documentation. The source is documented fully in +<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>. +This paper itself is available as +<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.md">diff.md</a>, or downloadable as +<a href="https://kristaps.bsd.lv/lowdown/diff.pdf">diff.pdf</a>. +Please direct comments to me by e-mail or just use the <a href="https://github.com/kristapsdz/lowdown">GitHub +interface</a>.</p> +</ins> +<p>For a quick example of this functionality, see +<a href="https://kristaps.bsd.lv/lowdown/diff.diff.html">diff.diff.html</a><ins> +(or </ins><ins><a href="https://kristaps.bsd.lv/lowdown/diff.diff.pdf">diff.diff.pdf</a></ins><ins>), which +shows the difference between this document and a [fabricated] earlier +version.</ins><del>.</del></p> + +<h2 id="introduction">Introduction</h2> + +<p>Let two source files, <del><em>foo.md</em></del><ins><em>old.md</em></ins> and <del><em>bar.md</em></del><ins><em>new.md</em></ins>, refer to the old and new versions of a file respectively.<ins>The</ins> <ins>goal</ins> <ins>is</ins> <ins>to</ins> <ins>establish</ins> <ins>the</ins> <ins>changes</ins> <ins>between</ins> <ins>these</ins> <ins>snippets</ins> <ins>in</ins> <ins>formatted</ins> <ins>output.</ins> <ins>Let</ins>’s <ins>begin</ins> <ins>with</ins> <ins>the</ins> <ins>old</ins> <ins>version,</ins> <ins><em>old.md</em></ins><ins>.</ins></p> + +<pre><code class="language-markdown">*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore magna +aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +in reprehenderit... +</code></pre> + +<p>In the new version, <em>new.md</em>, I add some more links and styles.</p> + +<pre><code class="language-markdown">*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [magna +aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation +ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute +irure* dolor in reprehenderit... +</code></pre> + +<p>The most simple way of viewing changes is with the venerable +<a href="https://man.openbsd.org/diff.1">diff(1)</a> utility. However, this will +only reflect changes in the input document—not the formatted output.</p> + +<pre><code class="language-diff">--- old.md Tue Oct 17 11:25:01 2017 ++++ new.md Tue Oct 17 11:25:01 2017 +@@ -1,5 +1,5 @@ + *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +-eiusmod tempor incididunt ut [labore](index.html) et dolore magna +-aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +-laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +-in reprehenderit... ++eiusmod tempor incididunt ut [labore](index.html) et dolore [magna ++aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation ++ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute ++irure* dolor in reprehenderit... +</code></pre> + +<p>Not very helpful for any but source-level change investigation. And +given that Markdown doesn’t encourage the usual “new sentence, new line” +of some languages (like <a href="https://man.openbsd.org/mdoc.7">mdoc(7)</a>), even +this level of change analysis is difficult: a single change might affect +multiple re-wrapped lines.</p> + +<p>A similar possibility is to use +<a href="https://www.gnu.org/software/wdiff/">wdiff(1)</a>, which produces a set of +word-by-word differences.</p> + +<pre><code class="language-markdown">*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [-magna +aliqua.-] {+[magna aliqua](index.html).+} Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut _aliquip_ ex ea commodo +consequat. Duis [-aute irure-] {+*aute irure*+} dolor in +reprehenderit... +</code></pre> + +<p>One could then extend the Markdown language to accept the insertion and +deletion operations and let the output flow from there. +(In fact, that was my first approach to solving the problem.)</p> + +<p>Unfortunately, doing so entails extending a language already prone to extension and non-standardisation. <del>Here</del> <del>is</del> <del>some</del> <del>added</del> <del>text.</del> More unfortunately, a word-based diff will not be sensitive to the <del>shmarkdown</del> <ins>Markdown</ins> language itself, and in establishing context-free <del>foo</del> <del>bar</del> <del>baz</del> <ins>sequences</ins> of similar words, will overrun block and span element boundaries.</p> + +<p>On the other end of the spectrum are difference tools specific to the +output media.</p> +<ins> +<p>One can directly analyse PDF output using (for example) the +<a href="https://poppler.freedesktop.org/">poppler</a> tools, which would extract +text, then examine the output with a linear difference engine such as +<a href="https://code.google.com/archive/p/google-diff-match-patch/">Diff, Match, +Patch</a>. +This is not an optimal solution because, as with the word diff above, it +only compares words and cannot distinguish semantic artefacts such as in +italics, links, code blocks, and so on.</p> +</ins> +<p>There are even more <a href="https://www.w3.org/wiki/HtmlDiff">HTML diff</a> tools +available, so it’s tempting to use one of these tools to produce an HTML +file consisting of differences, then further use a converter like +<a href="https://wkhtmltopdf.org/">wkhtmltopdf</a> to generate PDFs.</p> + +<p>Since the HTML difference engines often respect the structure of HTML, +this is much more optimal in handling semantic difference. However, +re-structuring the difference does not easily produce a document of the +same style or readability as the PDFs themselves.</p> + +<p>The most elegant (and reliable) solution is to attack the problem from +the language-level itself. Since the +<a href="https://kristaps.bsd.lv/lowdown/lowdown.3.html">lowdown(3)</a> +library is able to produce a full parse tree for analysis, it stands to +reason, given the wealth of literature on tree differences (instead of +the usual linear difference, as in the case of +<a href="https://man.openbsd.org/diff.1">diff(1)</a> and friends), one can work +within the language to produce differences.<del><sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></del></p> + +<h2 id="algorithm">Algorithm</h2> + +<p>The algorithm is in effect an ordered tree diff. I began with +well-studied algorithms for a well-studied problem: XML tree +differences. (The HTML difference tools described above inherit from +those algorithms.) For an overview of algorithms, see Change Detection +in XML Trees: a Survey<sup id="fnref2"><a href="#fn2" rel="footnote">2</a></sup>. I base the +<a href="https://kristaps.bsd.lv/lowdown/lowdown.1.html">lowdown-diff(1)</a> algorithm off +of Detecting Changes in XML Documents<sup id="fnref3"><a href="#fn3" rel="footnote">3</a></sup>.</p> + +<p>The reason for this choice instead of another is primarily the ease in +implementation. Moreover, since the programmatic output of the +algorithm is a generic AST, it’s feasible to re-implement the algorithm +in different ways, or augment it at a later date.</p> + +<p>The BULD algorithm described in this paper is straightforward. It +begins with a short sanitisation pass.</p> + +<ol> +<li><p>Annotate each node in the parse tree with a hash of the subtree +rooted at the node, inclusive. +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>annotate_sigs()</code>)</p></li> +<li><p>Annotate each node with a weight corresponding to the subtree rooted +at the node. +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>annotate_sigs()</code>)</p></li> +<li><p>Enqueue the new document’s root node in a priority queue ordered by +weight. Then, while the priority queue is non-empty: +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>lowdown_diff()</code>)</p> + +<ol> +<li>Pop the first node of the priority queue.</li> +<li>Look for candidates in the old document whose hash matches the +popped node’s hash. +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>candidate()</code>)</li> +<li>Choose an optimal candidate and mark it as matched. +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>optimality()</code>)</li> +<li>If the no candidates were found, enqueue the node’s children into +the priority queue.</li> +<li>A a candidate was selected, mark all of its subtree nodes as +matching the corresponding nodes in the old tree (“propogate +down”), then mark ancestor nodes similarly (“propogate up”). +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>match_up()</code>, <code>match_down()</code>)</li> +</ol></li> +<li><p>Run an optimisation phase over the new document’s root node. +(<ins><a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a></ins><del><a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a></del>, +<del><code>node_optimise()</code></del><ins><code>node_optimise_bottomup()</code></ins><ins> and </ins><ins><code>node_optimise_topdown()</code></ins>)</p></li> +<li><p>Step through both trees and create a new tree with nodes cloned from +both and marked as inserted or deleted. +(<a href="https://github.com/kristapsdz/lowdown/blob/master/diff.c">diff.c</a>, +<code>node_merge()</code>)</p></li> +</ol> + +<p>My implementation changes or extends the BULD algorithm in several small +ways, described in the per-step documentation below.</p> + +<h3 id="sanitise">Sanitise</h3> + +<p>Before the BULD algorithm is run, the input tree is sanitised. This +process merges all adjacent text nodes into a single text node. By +doing so, possible differences are pushed into large blocks of +contiguous text—which in this case are managed by the word-difference +algorithm described later in this paper.</p> + +<h3 id="annotation">Annotation</h3> + +<p>Each node in the tree is annotated with a hash and a weight. The hash, +MD5, is computed in all data concerning a node. For example, normal +text nodes (<code>LOWDOWN_NORMAL_TEXT</code>) have the hash produced from the +enclosed text. Autolinks (<code>LOWDOWN_LINK_AUTO</code>) use the link text, link, +and type.</p> + +<p>There are some nodes whose data is not hashed. For example, list +numbers: since these may change when nodes are moved, the numbers are +not part of the hash. In general, all volatile information that may be +inferred from the document structure (column number, list item number, +footnote number, etc.) is disregarded.</p> + +<p>Non-leaf nodes compute their hashes from the node type and the hashes of +all of their children. Thus, this step is a bottom-up search.</p> + +<p>Node weight is computed exactly as noted in the paper.</p> + +<h3 id="optimal-candidacy">Optimal candidacy</h3> + +<p>A node’s candidate in the old tree is one whose hash matches. In most +documents, there are many candidates for certain types of nodes. +(Usually text nodes.)</p> + +<p>Candidate optimality is computed by looking at the number of parent +nodes that match on both sides. The number of parents to consider is +noted in the next sub-section. The distance climbed is bounded by the +weight of the sub-tree as defined in the paper.</p> + +<p>In the event of similar optimality, the node “closest” to the current +node is chosen. Proximity is defined by the node identifier, which is +its prefix order in the parse tree.</p> + +<h3 id="propogate-up">“Propogate up”</h3> + +<p>When propogating a match upward, the distance upward is bound depending +on the matched sub-tree as defined in the paper. This makes it so that +“small” similarities (such as text) don’t erroneously match two larger +sub-trees that are otherwise different. Upward matches occur while the +nodes’ labels are the same, including attributes (e.g., link text).</p> + +<p>I did modify the algorithm to propogate upward “for free” through +similar singleton nodes, even if it means going beyond the maximum +number allowed by the sub-tree weight.</p> + +<h3 id="optimisation">Optimisation</h3> + +<p>The <a href="https://kristaps.bsd.lv/lowdown/lowdown.1.html">lowdown-diff(1)</a> +algorithm has two optimisations, both lightly derived from the paper: +top-down and bottom-up propogation.</p> + +<h4 id="top-down">Top-down</h4> + +<p>The top-down optimisation, which is performed first, takes matched nodes and matches un-matched, non-terminal children by label.<ins>The</ins> <ins>children</ins> <ins>examined</ins> <ins>must</ins> <ins>be</ins> <ins>siblings</ins> <ins>of</ins> <ins>adjacent</ins> <ins>matching</ins> <ins>nodes.</ins></p> + +<p>This is useful when, say, a document consists of several paragraphs +where the text has changed within paragraphs. It won’t be able to match +the text content, but it will match the paragraphs, which will push the +difference downward in the tree.</p> + +<h4 id="bottom-up">Bottom-up</h4> + +<p>In the bottom-up propogation, the weight of any given sub-tree is used +to compute how high a match will propogate. I extend the paper’s +version optimisation by looking at the cumulative weight of matching +children.</p> + +<p>This works well for Markdown documents, which are generally quite +shallow and text-heavy.</p> + +<p>For each unmatched non-terminal node with at least one +matched child, the weights of all matched children with the same parents +(where the parent node is equal in label and attributes to the examined +node) are computed. If any given parent of the matched children has +greater than 50% of the possible weight, it is matched.</p> + +<h3 id="merging">Merging</h3> + +<p>The merging phase, which is not described in the paper, is very +straightforward. It uses a recursive merge algorithm starting at the +root node of the new tree and the root node of the old tree.</p> + +<ol> +<li>The invariant is that the current node is matched by the corresonding +node in the old tree.</li> +<li>First, step through child nodes in the old tree. Mark as deleted all +nodes not being matched to the new tree.</li> +<li>Next, step through child nodes in the new tree. Mark as inserted all +nodes not having a match in the old tree.</li> +<li>Now, starting with the first node in the new tree having a match in +the old tree, search for that match in the list of old tree children.</li> +<li>If found, mark as deleted all previous old tree nodes up until the +match. Then re-run the merge algorithm starting at the matching +child nodes.</li> +<li>If not found, mark the new node as inserted and return to (2).</li> +<li>Continue (after the recursive step) by moving after both matching +nodes and returning to (2).</li> +</ol> + +<p>If adjacent nodes are un-matched normal text, the normal text nodes are +compared word-by-word to compute a shortest-edit script. This is +enabled by <a href="https://github.com/kristapsdz/libdiff">libdiff</a>, which +implements an algorithm for computing the longest common +subsequence<sup id="fnref4"><a href="#fn4" rel="footnote">4</a></sup>. See <sup id="fnref5"><a href="#fn5" rel="footnote">5</a></sup> for background information.</p> +<ins> +<h3 id="tables">Tables</h3> +</ins><ins> +<p>Tables are currently in the “hacks” state in that they’re considered as +opaque bodies. Tables that have changed in any way are simply deleted +and re-added: there’s no attempt to discern actual differences.</p> +</ins><ins> +<p>There are ways to reduce this opacity, such as being able to detect and +account for added or removed rows. However, ultimately there is some +opacity in that changed table headers do not have a representable form +in the output.</p> +</ins><ins> +<h3 id="metadata">Metadata</h3> +</ins><ins> +<p>Like tables, metadata key-value pairs are opaque bodies. Metadata has a +complex relationship with Markdown documents, which leaves how to handle +the “difference” uncertain.</p> +</ins><ins> +<p>The difference engine, after computing differences like any other opaque +nodes, simply passes the difference to front-ends, which determine how +to handle this for themselves. For front-ends that use the metadata in +creating document headers (e.g., HTML, LaTeX, roff), the policy is not +to process deleted metadata.</p> +</ins><ins> +<p>Thus, metadata won’t strictly represent the document differences.</p> +</ins><ins> +<h3 id="footnotes">Footnotes</h3> +</ins><ins> +<p>Footnotes required some consideration because the order in which the +definitions and references are printed must be synchronised. Now a +process keeps track of all references (added, deleted, unchanged) and +renumbers then. When references are emitted, these are emitted in the +correct order.</p> +</ins> +<h2 id="api">API</h2> + +<p>The result of the algorithm is a new tree marked with insertions and +deletions. These are specifically noted with the <code>LOWDOWN_CHNG_INSERT</code> +and <code>LOWDOWN_CHNG_DELETE</code> variables.</p> + +<p>The algorithm may be run with the <code>lowdown_diff()</code> function, which +produces the merged tree.</p> + +<p>A set of convenience functions, <code>lowdown_buf_diff()</code> and +<code>lowdown_file_diff()</code>, also provide this functionality.</p> + +<h2 id="future-work">Future work</h2> + +<p>There are many possible improvements to the algorithm.</p> +<del> +<p>Foremost is the issue of normal text nodes. There should be a process +first that merges consecutive text nodes. This happens, for example, +when the <code>w</code> character is encountered at any time and might signify a +link. The parsing algorithm will start a new text node at each such +character. </p> +</del> +<p>The merging algorithm can also take advantage of +<a href="https://github.com/kristapsdz/libdiff">libdiff</a> when ordering the +output of inserted and deleted components. Right now, the algorithm is +simple in the sense of stopping at the earliest matched node without +considering subsequences.</p> +<del> +<p>Lastly, the <strong>-Tms</strong> and <strong>-Tman</strong> output needs work to make sure that +the insert/delete macros don’t disrupt the flow of text.</p> +</del> +<p>Document last updated: $Date$</p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>This is just to illustrate a removed footnote. <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +<li id="fn2"> +<p><a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.73.8912&rep=rep1&type=pdf">Change Detection in XML Trees: a +Survey</a> +(2005), Luuk Peters. <a href="#fnref2" rev="footnote">↩</a></p> +</li> + +<li id="fn3"> +<p><a href="https://www.cs.rutgers.edu/~amelie/papers/2002/diff.pdf">Detecting Changes in XML +Documents</a> +(2002), Gregory Cobena, Serge Abiteboul, Amelie Marian. <a href="#fnref3" rev="footnote">↩</a></p> +</li> + +<li id="fn4"> +<p><a href="https://publications.mpi-cbg.de/Wu_1990_6334.pdf">An O(NP) sequence comparison +algorithm</a> (1990), +Sun Wu, Udi Manber, Gene Myers. <a href="#fnref4" rev="footnote">↩</a></p> +</li> + +<li id="fn5"> +<p><a href="https://www.cs.dartmouth.edu/~doug/diff.pdf">An Algorithm for Differential File +Comparison</a>(1976), +James W. Hunt, M. Douglas McIlroy. <a href="#fnref5" rev="footnote">↩</a></p> +</li> + +</ol> +</div> +</body> +</html> diff --git a/regress/diff/diff.new.md b/regress/diff/diff.new.md @@ -0,0 +1,386 @@ +javascript: https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js + diff.js +author: Kristaps Dzonsons +affiliation: BSD.lv +rcsdate: $Date$ +title: Lowdown Diffing Engine +css: https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css + https://fonts.googleapis.com/css?family=Alegreya+Sans:400,400italic,500,700 + diff.css + +# Lowdown Diffing Engine + +In this paper, I briefly describe the "diff" engine used in +[lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) tool +in [lowdown](https://kristaps.bsd.lv/lowdown/index.html). The work is +motivated by the need to provide formatted output describing the +difference between two documents---specifically, formatted PDF via the +**-Tms** output, although **-Thtml** and the other output modes are of +course supported. + +This documents an early work in progress---both source code and +documentation. The source is documented fully in +[diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c). +This paper itself is available as +[diff.md](https://github.com/kristapsdz/lowdown/blob/master/diff.md), or downloadable as +[diff.pdf](https://kristaps.bsd.lv/lowdown/diff.pdf). +Please direct comments to me by e-mail or just use the [GitHub +interface](https://github.com/kristapsdz/lowdown). + +For a quick example of this functionality, see +[diff.diff.html](https://kristaps.bsd.lv/lowdown/diff.diff.html) +(or [diff.diff.pdf](https://kristaps.bsd.lv/lowdown/diff.diff.pdf)), which +shows the difference between this document and a [fabricated] earlier +version. + +## Introduction + +Let two source files, *old.md* and *new.md*, refer to the old and new +versions of a file respectively. The goal is to establish the changes +between these snippets in formatted output. Let's begin with the old +version, *old.md*. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore magna +aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +in reprehenderit... +``` + +In the new version, *new.md*, I add some more links and styles. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [magna +aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation +ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute +irure* dolor in reprehenderit... +``` + +The most simple way of viewing changes is with the venerable +[diff(1)](https://man.openbsd.org/diff.1) utility. However, this will +only reflect changes in the input document---not the formatted output. + +```diff +--- old.md Tue Oct 17 11:25:01 2017 ++++ new.md Tue Oct 17 11:25:01 2017 +@@ -1,5 +1,5 @@ + *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +-eiusmod tempor incididunt ut [labore](index.html) et dolore magna +-aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +-laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +-in reprehenderit... ++eiusmod tempor incididunt ut [labore](index.html) et dolore [magna ++aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation ++ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute ++irure* dolor in reprehenderit... +``` + +Not very helpful for any but source-level change investigation. And +given that Markdown doesn't encourage the usual "new sentence, new line" +of some languages (like [mdoc(7)](https://man.openbsd.org/mdoc.7)), even +this level of change analysis is difficult: a single change might affect +multiple re-wrapped lines. + +A similar possibility is to use +[wdiff(1)](https://www.gnu.org/software/wdiff/), which produces a set of +word-by-word differences. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [-magna +aliqua.-] {+[magna aliqua](index.html).+} Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut _aliquip_ ex ea commodo +consequat. Duis [-aute irure-] {+*aute irure*+} dolor in +reprehenderit... +``` + +One could then extend the Markdown language to accept the insertion and +deletion operations and let the output flow from there. +(In fact, that was my first approach to solving the problem.) + +Unfortunately, doing so entails extending a language already prone to +extension and non-standardisation. More unfortunately, a word-based +diff will not be sensitive to the Markdown language itself, and in +establishing context-free sequences of similar words, will overrun block +and span element boundaries. + +On the other end of the spectrum are difference tools specific to the +output media. + +One can directly analyse PDF output using (for example) the +[poppler](https://poppler.freedesktop.org/) tools, which would extract +text, then examine the output with a linear difference engine such as +[Diff, Match, +Patch](https://code.google.com/archive/p/google-diff-match-patch/). +This is not an optimal solution because, as with the word diff above, it +only compares words and cannot distinguish semantic artefacts such as in +italics, links, code blocks, and so on. + +There are even more [HTML diff](https://www.w3.org/wiki/HtmlDiff) tools +available, so it's tempting to use one of these tools to produce an HTML +file consisting of differences, then further use a converter like +[wkhtmltopdf](https://wkhtmltopdf.org/) to generate PDFs. + +Since the HTML difference engines often respect the structure of HTML, +this is much more optimal in handling semantic difference. However, +re-structuring the difference does not easily produce a document of the +same style or readability as the PDFs themselves. + +The most elegant (and reliable) solution is to attack the problem from +the language-level itself. Since the +[lowdown(3)](https://kristaps.bsd.lv/lowdown/lowdown.3.html) +library is able to produce a full parse tree for analysis, it stands to +reason, given the wealth of literature on tree differences (instead of +the usual linear difference, as in the case of +[diff(1)](https://man.openbsd.org/diff.1) and friends), one can work +within the language to produce differences. + +## Algorithm + +The algorithm is in effect an ordered tree diff. I began with +well-studied algorithms for a well-studied problem: XML tree +differences. (The HTML difference tools described above inherit from +those algorithms.) For an overview of algorithms, see Change Detection +in XML Trees: a Survey[^Peters2005]. I base the +[lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) algorithm off +of Detecting Changes in XML Documents[^Cobena2002]. + +The reason for this choice instead of another is primarily the ease in +implementation. Moreover, since the programmatic output of the +algorithm is a generic AST, it's feasible to re-implement the algorithm +in different ways, or augment it at a later date. + +The BULD algorithm described in this paper is straightforward. It +begins with a short sanitisation pass. + +1. Annotate each node in the parse tree with a hash of the subtree + rooted at the node, inclusive. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `annotate_sigs()`) + +2. Annotate each node with a weight corresponding to the subtree rooted + at the node. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `annotate_sigs()`) + +3. Enqueue the new document's root node in a priority queue ordered by + weight. Then, while the priority queue is non-empty: + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `lowdown_diff()`) + + 1. Pop the first node of the priority queue. + 2. Look for candidates in the old document whose hash matches the + popped node's hash. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `candidate()`) + 3. Choose an optimal candidate and mark it as matched. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `optimality()`) + 4. If the no candidates were found, enqueue the node's children into + the priority queue. + 5. A a candidate was selected, mark all of its subtree nodes as + matching the corresponding nodes in the old tree ("propogate + down"), then mark ancestor nodes similarly ("propogate up"). + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `match_up()`, `match_down()`) + +4. Run an optimisation phase over the new document's root node. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `node_optimise_bottomup()` and `node_optimise_topdown()`) + +5. Step through both trees and create a new tree with nodes cloned from + both and marked as inserted or deleted. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `node_merge()`) + +My implementation changes or extends the BULD algorithm in several small +ways, described in the per-step documentation below. + +### Sanitise + +Before the BULD algorithm is run, the input tree is sanitised. This +process merges all adjacent text nodes into a single text node. By +doing so, possible differences are pushed into large blocks of +contiguous text---which in this case are managed by the word-difference +algorithm described later in this paper. + +### Annotation + +Each node in the tree is annotated with a hash and a weight. The hash, +MD5, is computed in all data concerning a node. For example, normal +text nodes (`LOWDOWN_NORMAL_TEXT`) have the hash produced from the +enclosed text. Autolinks (`LOWDOWN_LINK_AUTO`) use the link text, link, +and type. + +There are some nodes whose data is not hashed. For example, list +numbers: since these may change when nodes are moved, the numbers are +not part of the hash. In general, all volatile information that may be +inferred from the document structure (column number, list item number, +footnote number, etc.) is disregarded. + +Non-leaf nodes compute their hashes from the node type and the hashes of +all of their children. Thus, this step is a bottom-up search. + +Node weight is computed exactly as noted in the paper. + +### Optimal candidacy + +A node's candidate in the old tree is one whose hash matches. In most +documents, there are many candidates for certain types of nodes. +(Usually text nodes.) + +Candidate optimality is computed by looking at the number of parent +nodes that match on both sides. The number of parents to consider is +noted in the next sub-section. The distance climbed is bounded by the +weight of the sub-tree as defined in the paper. + +In the event of similar optimality, the node "closest" to the current +node is chosen. Proximity is defined by the node identifier, which is +its prefix order in the parse tree. + +### "Propogate up" + +When propogating a match upward, the distance upward is bound depending +on the matched sub-tree as defined in the paper. This makes it so that +"small" similarities (such as text) don't erroneously match two larger +sub-trees that are otherwise different. Upward matches occur while the +nodes' labels are the same, including attributes (e.g., link text). + +I did modify the algorithm to propogate upward "for free" through +similar singleton nodes, even if it means going beyond the maximum +number allowed by the sub-tree weight. + +### Optimisation + +The [lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) +algorithm has two optimisations, both lightly derived from the paper: +top-down and bottom-up propogation. + +#### Top-down + +The top-down optimisation, which is performed first, takes matched nodes +and matches un-matched, non-terminal children by label. The children +examined must be siblings of adjacent matching nodes. + +This is useful when, say, a document consists of several paragraphs +where the text has changed within paragraphs. It won't be able to match +the text content, but it will match the paragraphs, which will push the +difference downward in the tree. + +#### Bottom-up + +In the bottom-up propogation, the weight of any given sub-tree is used +to compute how high a match will propogate. I extend the paper's +version optimisation by looking at the cumulative weight of matching +children. + +This works well for Markdown documents, which are generally quite +shallow and text-heavy. + +For each unmatched non-terminal node with at least one +matched child, the weights of all matched children with the same parents +(where the parent node is equal in label and attributes to the examined +node) are computed. If any given parent of the matched children has +greater than 50% of the possible weight, it is matched. + +### Merging + +The merging phase, which is not described in the paper, is very +straightforward. It uses a recursive merge algorithm starting at the +root node of the new tree and the root node of the old tree. + +1. The invariant is that the current node is matched by the corresonding + node in the old tree. +2. First, step through child nodes in the old tree. Mark as deleted all + nodes not being matched to the new tree. +3. Next, step through child nodes in the new tree. Mark as inserted all + nodes not having a match in the old tree. +4. Now, starting with the first node in the new tree having a match in + the old tree, search for that match in the list of old tree children. +5. If found, mark as deleted all previous old tree nodes up until the + match. Then re-run the merge algorithm starting at the matching + child nodes. +6. If not found, mark the new node as inserted and return to (2). +7. Continue (after the recursive step) by moving after both matching + nodes and returning to (2). + +If adjacent nodes are un-matched normal text, the normal text nodes are +compared word-by-word to compute a shortest-edit script. This is +enabled by [libdiff](https://github.com/kristapsdz/libdiff), which +implements an algorithm for computing the longest common +subsequence[^Wu90]. See [^McIlroy1976] for background information. + +### Tables + +Tables are currently in the "hacks" state in that they're considered as +opaque bodies. Tables that have changed in any way are simply deleted +and re-added: there's no attempt to discern actual differences. + +There are ways to reduce this opacity, such as being able to detect and +account for added or removed rows. However, ultimately there is some +opacity in that changed table headers do not have a representable form +in the output. + +### Metadata + +Like tables, metadata key-value pairs are opaque bodies. Metadata has a +complex relationship with Markdown documents, which leaves how to handle +the "difference" uncertain. + +The difference engine, after computing differences like any other opaque +nodes, simply passes the difference to front-ends, which determine how +to handle this for themselves. For front-ends that use the metadata in +creating document headers (e.g., HTML, LaTeX, roff), the policy is not +to process deleted metadata. + +Thus, metadata won't strictly represent the document differences. + +### Footnotes + +Footnotes required some consideration because the order in which the +definitions and references are printed must be synchronised. Now a +process keeps track of all references (added, deleted, unchanged) and +renumbers then. When references are emitted, these are emitted in the +correct order. + +## API + +The result of the algorithm is a new tree marked with insertions and +deletions. These are specifically noted with the `LOWDOWN_CHNG_INSERT` +and `LOWDOWN_CHNG_DELETE` variables. + +The algorithm may be run with the `lowdown_diff()` function, which +produces the merged tree. + +A set of convenience functions, `lowdown_buf_diff()` and +`lowdown_file_diff()`, also provide this functionality. + +## Future work + +There are many possible improvements to the algorithm. + +The merging algorithm can also take advantage of +[libdiff](https://github.com/kristapsdz/libdiff) when ordering the +output of inserted and deleted components. Right now, the algorithm is +simple in the sense of stopping at the earliest matched node without +considering subsequences. + +Document last updated: [%rcsdate] + +[^Cobena2002]: [Detecting Changes in XML + Documents](https://www.cs.rutgers.edu/~amelie/papers/2002/diff.pdf) + (2002), Gregory Cobena, Serge Abiteboul, Amelie Marian. + +[^Peters2005]: [Change Detection in XML Trees: a + Survey](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.73.8912&rep=rep1&type=pdf) + (2005), Luuk Peters. + +[^McIlroy1976]: [An Algorithm for Differential File + Comparison](https://www.cs.dartmouth.edu/~doug/diff.pdf)(1976), + James W. Hunt, M. Douglas McIlroy. + +[^Wu90]: [An O(NP) sequence comparison + algorithm](https://publications.mpi-cbg.de/Wu_1990_6334.pdf) (1990), + Sun Wu, Udi Manber, Gene Myers. diff --git a/regress/diff/diff.old.md b/regress/diff/diff.old.md @@ -0,0 +1,340 @@ +javascript: https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js + diff.js +author: Kristaps Dzonsons +affiliation: BSD.lv +rcsdate: $Date$ +title: Lowdown Diffing Engine +css: https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css + https://fonts.googleapis.com/css?family=Alegreya+Sans:400,400italic,500,700 + diff.css + +# Lowdown Diffing Engine + +In this paper, I briefly describe the "diff" engine used in +[lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) tool +in [lowdown](https://kristaps.bsd.lv/lowdown/index.html). The work is +motivated by the need to provide formatted output describing the +difference between two documents---specifically, formatted PDF via the +**-Tms** output, although **-Thtml** and the other output modes are of +course supported. + +For a quick example of this functionality, see +[diff.diff.html](https://kristaps.bsd.lv/lowdown/diff.diff.html). + +## Introduction + +Let two source files, *foo.md* and *bar.md*, refer to the old and new +versions of a file respectively. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore magna +aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +in reprehenderit... +``` + +In the new version, *new.md*, I add some more links and styles. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [magna +aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation +ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute +irure* dolor in reprehenderit... +``` + +The most simple way of viewing changes is with the venerable +[diff(1)](https://man.openbsd.org/diff.1) utility. However, this will +only reflect changes in the input document---not the formatted output. + +```diff +--- old.md Tue Oct 17 11:25:01 2017 ++++ new.md Tue Oct 17 11:25:01 2017 +@@ -1,5 +1,5 @@ + *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +-eiusmod tempor incididunt ut [labore](index.html) et dolore magna +-aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco +-laboris nisi ut _aliquip_ ex ea commodo consequat. Duis aute irure dolor +-in reprehenderit... ++eiusmod tempor incididunt ut [labore](index.html) et dolore [magna ++aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation ++ullamco laboris nisi ut _aliquip_ ex ea commodo consequat. Duis *aute ++irure* dolor in reprehenderit... +``` + +Not very helpful for any but source-level change investigation. And +given that Markdown doesn't encourage the usual "new sentence, new line" +of some languages (like [mdoc(7)](https://man.openbsd.org/mdoc.7)), even +this level of change analysis is difficult: a single change might affect +multiple re-wrapped lines. + +A similar possibility is to use +[wdiff(1)](https://www.gnu.org/software/wdiff/), which produces a set of +word-by-word differences. + +```markdown +*Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do +eiusmod tempor incididunt ut [labore](index.html) et dolore [-magna +aliqua.-] {+[magna aliqua](index.html).+} Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut _aliquip_ ex ea commodo +consequat. Duis [-aute irure-] {+*aute irure*+} dolor in +reprehenderit... +``` + +One could then extend the Markdown language to accept the insertion and +deletion operations and let the output flow from there. +(In fact, that was my first approach to solving the problem.) + +Unfortunately, doing so entails extending a language already prone to +extension and non-standardisation. Here is some added text. More unfortunately, a word-based +diff will not be sensitive to the shmarkdown language itself, and in +establishing context-free foo bar baz of similar words, will overrun block +and span element boundaries. + +On the other end of the spectrum are difference tools specific to the +output media. + +There are even more [HTML diff](https://www.w3.org/wiki/HtmlDiff) tools +available, so it's tempting to use one of these tools to produce an HTML +file consisting of differences, then further use a converter like +[wkhtmltopdf](https://wkhtmltopdf.org/) to generate PDFs. + +Since the HTML difference engines often respect the structure of HTML, +this is much more optimal in handling semantic difference. However, +re-structuring the difference does not easily produce a document of the +same style or readability as the PDFs themselves. + +The most elegant (and reliable) solution is to attack the problem from +the language-level itself. Since the +[lowdown(3)](https://kristaps.bsd.lv/lowdown/lowdown.3.html) +library is able to produce a full parse tree for analysis, it stands to +reason, given the wealth of literature on tree differences (instead of +the usual linear difference, as in the case of +[diff(1)](https://man.openbsd.org/diff.1) and friends), one can work +within the language to produce differences.[^oldfootnote] + +[^oldfootnote]: This is just to illustrate a removed footnote. + +## Algorithm + +The algorithm is in effect an ordered tree diff. I began with +well-studied algorithms for a well-studied problem: XML tree +differences. (The HTML difference tools described above inherit from +those algorithms.) For an overview of algorithms, see Change Detection +in XML Trees: a Survey[^Peters2005]. I base the +[lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) algorithm off +of Detecting Changes in XML Documents[^Cobena2002]. + +The reason for this choice instead of another is primarily the ease in +implementation. Moreover, since the programmatic output of the +algorithm is a generic AST, it's feasible to re-implement the algorithm +in different ways, or augment it at a later date. + +The BULD algorithm described in this paper is straightforward. It +begins with a short sanitisation pass. + +1. Annotate each node in the parse tree with a hash of the subtree + rooted at the node, inclusive. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `annotate_sigs()`) + +2. Annotate each node with a weight corresponding to the subtree rooted + at the node. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `annotate_sigs()`) + +3. Enqueue the new document's root node in a priority queue ordered by + weight. Then, while the priority queue is non-empty: + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `lowdown_diff()`) + + 1. Pop the first node of the priority queue. + 2. Look for candidates in the old document whose hash matches the + popped node's hash. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `candidate()`) + 3. Choose an optimal candidate and mark it as matched. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `optimality()`) + 4. If the no candidates were found, enqueue the node's children into + the priority queue. + 5. A a candidate was selected, mark all of its subtree nodes as + matching the corresponding nodes in the old tree ("propogate + down"), then mark ancestor nodes similarly ("propogate up"). + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `match_up()`, `match_down()`) + +4. Run an optimisation phase over the new document's root node. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `node_optimise()`) + +5. Step through both trees and create a new tree with nodes cloned from + both and marked as inserted or deleted. + ([diff.c](https://github.com/kristapsdz/lowdown/blob/master/diff.c), + `node_merge()`) + +My implementation changes or extends the BULD algorithm in several small +ways, described in the per-step documentation below. + +### Sanitise + +Before the BULD algorithm is run, the input tree is sanitised. This +process merges all adjacent text nodes into a single text node. By +doing so, possible differences are pushed into large blocks of +contiguous text---which in this case are managed by the word-difference +algorithm described later in this paper. + +### Annotation + +Each node in the tree is annotated with a hash and a weight. The hash, +MD5, is computed in all data concerning a node. For example, normal +text nodes (`LOWDOWN_NORMAL_TEXT`) have the hash produced from the +enclosed text. Autolinks (`LOWDOWN_LINK_AUTO`) use the link text, link, +and type. + +There are some nodes whose data is not hashed. For example, list +numbers: since these may change when nodes are moved, the numbers are +not part of the hash. In general, all volatile information that may be +inferred from the document structure (column number, list item number, +footnote number, etc.) is disregarded. + +Non-leaf nodes compute their hashes from the node type and the hashes of +all of their children. Thus, this step is a bottom-up search. + +Node weight is computed exactly as noted in the paper. + +### Optimal candidacy + +A node's candidate in the old tree is one whose hash matches. In most +documents, there are many candidates for certain types of nodes. +(Usually text nodes.) + +Candidate optimality is computed by looking at the number of parent +nodes that match on both sides. The number of parents to consider is +noted in the next sub-section. The distance climbed is bounded by the +weight of the sub-tree as defined in the paper. + +In the event of similar optimality, the node "closest" to the current +node is chosen. Proximity is defined by the node identifier, which is +its prefix order in the parse tree. + +### "Propogate up" + +When propogating a match upward, the distance upward is bound depending +on the matched sub-tree as defined in the paper. This makes it so that +"small" similarities (such as text) don't erroneously match two larger +sub-trees that are otherwise different. Upward matches occur while the +nodes' labels are the same, including attributes (e.g., link text). + +I did modify the algorithm to propogate upward "for free" through +similar singleton nodes, even if it means going beyond the maximum +number allowed by the sub-tree weight. + +### Optimisation + +The [lowdown-diff(1)](https://kristaps.bsd.lv/lowdown/lowdown.1.html) +algorithm has two optimisations, both lightly derived from the paper: +top-down and bottom-up propogation. + +#### Top-down + +The top-down optimisation, which is performed first, takes matched nodes +and matches un-matched, non-terminal children by label. + +This is useful when, say, a document consists of several paragraphs +where the text has changed within paragraphs. It won't be able to match +the text content, but it will match the paragraphs, which will push the +difference downward in the tree. + +#### Bottom-up + +In the bottom-up propogation, the weight of any given sub-tree is used +to compute how high a match will propogate. I extend the paper's +version optimisation by looking at the cumulative weight of matching +children. + +This works well for Markdown documents, which are generally quite +shallow and text-heavy. + +For each unmatched non-terminal node with at least one +matched child, the weights of all matched children with the same parents +(where the parent node is equal in label and attributes to the examined +node) are computed. If any given parent of the matched children has +greater than 50% of the possible weight, it is matched. + +### Merging + +The merging phase, which is not described in the paper, is very +straightforward. It uses a recursive merge algorithm starting at the +root node of the new tree and the root node of the old tree. + +1. The invariant is that the current node is matched by the corresonding + node in the old tree. +2. First, step through child nodes in the old tree. Mark as deleted all + nodes not being matched to the new tree. +3. Next, step through child nodes in the new tree. Mark as inserted all + nodes not having a match in the old tree. +4. Now, starting with the first node in the new tree having a match in + the old tree, search for that match in the list of old tree children. +5. If found, mark as deleted all previous old tree nodes up until the + match. Then re-run the merge algorithm starting at the matching + child nodes. +6. If not found, mark the new node as inserted and return to (2). +7. Continue (after the recursive step) by moving after both matching + nodes and returning to (2). + +If adjacent nodes are un-matched normal text, the normal text nodes are +compared word-by-word to compute a shortest-edit script. This is +enabled by [libdiff](https://github.com/kristapsdz/libdiff), which +implements an algorithm for computing the longest common +subsequence[^Wu90]. See [^McIlroy1976] for background information. + +## API + +The result of the algorithm is a new tree marked with insertions and +deletions. These are specifically noted with the `LOWDOWN_CHNG_INSERT` +and `LOWDOWN_CHNG_DELETE` variables. + +The algorithm may be run with the `lowdown_diff()` function, which +produces the merged tree. + +A set of convenience functions, `lowdown_buf_diff()` and +`lowdown_file_diff()`, also provide this functionality. + +## Future work + +There are many possible improvements to the algorithm. + +Foremost is the issue of normal text nodes. There should be a process +first that merges consecutive text nodes. This happens, for example, +when the `w` character is encountered at any time and might signify a +link. The parsing algorithm will start a new text node at each such +character. + +The merging algorithm can also take advantage of +[libdiff](https://github.com/kristapsdz/libdiff) when ordering the +output of inserted and deleted components. Right now, the algorithm is +simple in the sense of stopping at the earliest matched node without +considering subsequences. + +Lastly, the **-Tms** and **-Tman** output needs work to make sure that +the insert/delete macros don't disrupt the flow of text. + +Document last updated: [%rcsdate] + +[^Cobena2002]: [Detecting Changes in XML + Documents](https://www.cs.rutgers.edu/~amelie/papers/2002/diff.pdf) + (2002), Gregory Cobena, Serge Abiteboul, Amelie Marian. + +[^Peters2005]: [Change Detection in XML Trees: a + Survey](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.73.8912&rep=rep1&type=pdf) + (2005), Luuk Peters. + +[^McIlroy1976]: [An Algorithm for Differential File + Comparison](https://www.cs.dartmouth.edu/~doug/diff.pdf)(1976), + James W. Hunt, M. Douglas McIlroy. + +[^Wu90]: [An O(NP) sequence comparison + algorithm](https://publications.mpi-cbg.de/Wu_1990_6334.pdf) (1990), + Sun Wu, Udi Manber, Gene Myers. diff --git a/regress/diff/metadata-add.html b/regress/diff/metadata-add.html @@ -0,0 +1,15 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<meta name="author" content="kristaps" /> +<title>Untitled article</title> +</head> +<body> + +<h1 id="section">section</h1> + +<p>body</p> +</body> +</html> diff --git a/regress/diff/metadata-add.latex b/regress/diff/metadata-add.latex @@ -0,0 +1,31 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[11pt,a4paper]{article} +\usepackage{amsmath,amssymb} +\usepackage{lmodern} +\usepackage{iftex} +\ifPDFTeX + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{hyperref} +\begin{document} +\title{Untitled article} +\author{kristaps} +\maketitle + +\hypertarget{section}{% +\section{section}\label{section}} + +body +\end{document} diff --git a/regress/diff/metadata-add.man b/regress/diff/metadata-add.man @@ -0,0 +1,6 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" "" +.SH +section +.LP +body diff --git a/regress/diff/metadata-add.ms b/regress/diff/metadata-add.ms @@ -0,0 +1,11 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.AU +kristaps +.NH 1 +section +.pdfhref O 1 section +.pdfhref M section +.LP +body diff --git a/regress/diff/metadata-add.new.md b/regress/diff/metadata-add.new.md @@ -0,0 +1,5 @@ +author: kristaps + +# section + +body diff --git a/regress/diff/metadata-add.old.md b/regress/diff/metadata-add.old.md @@ -0,0 +1,4 @@ + +# section + +body diff --git a/regress/diff/metadata-change.html b/regress/diff/metadata-change.html @@ -0,0 +1,15 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<meta name="author" content="shmistaps" /> +<title>Untitled article</title> +</head> +<body> + +<h1 id="section">section</h1> + +<p>body</p> +</body> +</html> diff --git a/regress/diff/metadata-change.latex b/regress/diff/metadata-change.latex @@ -0,0 +1,31 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[11pt,a4paper]{article} +\usepackage{amsmath,amssymb} +\usepackage{lmodern} +\usepackage{iftex} +\ifPDFTeX + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{hyperref} +\begin{document} +\title{Untitled article} +\author{shmistaps} +\maketitle + +\hypertarget{section}{% +\section{section}\label{section}} + +body +\end{document} diff --git a/regress/diff/metadata-change.ms b/regress/diff/metadata-change.ms @@ -0,0 +1,11 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.AU +shmistaps +.NH 1 +section +.pdfhref O 1 section +.pdfhref M section +.LP +body diff --git a/regress/diff/metadata-change.new.md b/regress/diff/metadata-change.new.md @@ -0,0 +1,5 @@ +author: shmistaps + +# section + +body diff --git a/regress/diff/metadata-change.old.md b/regress/diff/metadata-change.old.md @@ -0,0 +1,5 @@ +author: kristaps + +# section + +body diff --git a/regress/diff/metadata-keep.html b/regress/diff/metadata-keep.html @@ -0,0 +1,13 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<meta name="author" content="kristaps" /> +<title>Untitled article</title> +</head> +<body> + +<p><ins>shmext</ins> <del>text</del> here</p> +</body> +</html> diff --git a/regress/diff/metadata-keep.latex b/regress/diff/metadata-keep.latex @@ -0,0 +1,28 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[11pt,a4paper]{article} +\usepackage{amsmath,amssymb} +\usepackage{lmodern} +\usepackage{iftex} +\ifPDFTeX + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{hyperref} +\begin{document} +\title{Untitled article} +\author{kristaps} +\maketitle + +{\color{blue} shmext} {\color{red} text} here +\end{document} diff --git a/regress/diff/metadata-keep.man b/regress/diff/metadata-keep.man @@ -0,0 +1,10 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" "" +.PP +.gcolor blue +shmext +.gcolor black +.gcolor red +text +.gcolor black +here diff --git a/regress/diff/metadata-keep.ms b/regress/diff/metadata-keep.ms @@ -0,0 +1,13 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.AU +kristaps +.PP +.gcolor blue +shmext +.gcolor black +.gcolor red +text +.gcolor black +here diff --git a/regress/diff/metadata-keep.new.md b/regress/diff/metadata-keep.new.md @@ -0,0 +1,3 @@ +author: kristaps + +shmext here diff --git a/regress/diff/metadata-keep.old.md b/regress/diff/metadata-keep.old.md @@ -0,0 +1,3 @@ +author: kristaps + +text here diff --git a/regress/diff/metadata-remove.html b/regress/diff/metadata-remove.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> + +<h1 id="section">section</h1> + +<p>body</p> +</body> +</html> diff --git a/regress/diff/metadata-remove.latex b/regress/diff/metadata-remove.latex @@ -0,0 +1,30 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[11pt,a4paper]{article} +\usepackage{amsmath,amssymb} +\usepackage{lmodern} +\usepackage{iftex} +\ifPDFTeX + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{hyperref} +\begin{document} +\title{Untitled article} +\maketitle + +\hypertarget{section}{% +\section{section}\label{section}} + +body +\end{document} diff --git a/regress/diff/metadata-remove.man b/regress/diff/metadata-remove.man @@ -0,0 +1,6 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" "" +.SH +section +.LP +body diff --git a/regress/diff/metadata-remove.ms b/regress/diff/metadata-remove.ms @@ -0,0 +1,9 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.NH 1 +section +.pdfhref O 1 section +.pdfhref M section +.LP +body diff --git a/regress/diff/metadata-remove.new.md b/regress/diff/metadata-remove.new.md @@ -0,0 +1,4 @@ + +# section + +body diff --git a/regress/diff/metadata-remove.old.md b/regress/diff/metadata-remove.old.md @@ -0,0 +1,5 @@ +author: kristaps + +# section + +body diff --git a/regress/diff/no-similarity.html b/regress/diff/no-similarity.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> +<del> +<p>hi</p> +</del><ins> +<h1 id="soooo">soooo</h1> +</ins></body> +</html> diff --git a/regress/diff/no-similarity.new.md b/regress/diff/no-similarity.new.md @@ -0,0 +1 @@ +# soooo diff --git a/regress/diff/no-similarity.old.md b/regress/diff/no-similarity.old.md @@ -0,0 +1,3 @@ +key: value + +hi diff --git a/regress/diff/stripped-attr.html b/regress/diff/stripped-attr.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> +<del> +<h1 id="link"><a href="dest">link</a></h1> +</del><ins> +<p>foo <code>bar</code> baz</p> +</ins><ins> +<p><code>a `b` c</code></p> +</ins></body> +</html> diff --git a/regress/diff/stripped-attr.new.md b/regress/diff/stripped-attr.new.md @@ -0,0 +1,4 @@ +foo `bar` baz + +``a `b` c`` + diff --git a/regress/diff/stripped-attr.old.md b/regress/diff/stripped-attr.old.md @@ -0,0 +1,3 @@ + +# [link](dest) { .class #id } + diff --git a/regress/diff/table-badfree.html b/regress/diff/table-badfree.html @@ -0,0 +1,45 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> +<del> +<h1 id="section">section</h1> +</del><del> +<p>text <a href="https://foo.com">link</a> with <em>italics <a href="https://link.com">https://link.com</a></em></p> +</del><del> +<ul> +<li><p>what</p> + +<ul> +<li>what <strong>what</strong>?</li> +</ul></li> +<li><p>what?</p></li> +</ul> +</del><del> +<p>lil</p> +</del><del> +<table> +<thead> +<tr> +<th style="text-align: right">table</th> +<th>column</th> +</tr> +</thead> + +<tbody> +<tr> +<td style="text-align: right"><a href="https://fo.com">https://fo.com</a></td> +<td>b</td> +</tr> +</tbody> +</table> +</del><ins> +<h1 id="header-1">header 1</h1> +</ins><ins> +<p>something</p> +</ins></body> +</html> diff --git a/regress/diff/table-badfree.new.md b/regress/diff/table-badfree.new.md @@ -0,0 +1,3 @@ +# header 1 + +something diff --git a/regress/diff/table-badfree.old.md b/regress/diff/table-badfree.old.md @@ -0,0 +1,18 @@ +author: me + +# section + +text [link](https://foo.com) with *italics https://link.com* + +- what + + - what **what**? + +- what? + +lil + +| table | column | +| -------------: | --------------- | +| https://fo.com | b | + diff --git a/regress/diff/text-change-with-link.html b/regress/diff/text-change-with-link.html @@ -0,0 +1,12 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> + +<p>Hello, <del><a href="https://foo.com">world</a></del><ins><a href="https://foo.com">shworld</a></ins>.</p> +</body> +</html> diff --git a/regress/diff/text-change-with-link.new.md b/regress/diff/text-change-with-link.new.md @@ -0,0 +1,2 @@ + +Hello, [shworld](https://foo.com). diff --git a/regress/diff/text-change-with-link.old.md b/regress/diff/text-change-with-link.old.md @@ -0,0 +1,2 @@ + +Hello, [world](https://foo.com). diff --git a/regress/diff/text-change-with-nested.html b/regress/diff/text-change-with-nested.html @@ -0,0 +1,12 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> + +<p>Hello, <em>world <del><a href="https://foo.com">again</a></del><ins><a href="https://foo.com">shmagain</a></ins> cdef</em>.</p> +</body> +</html> diff --git a/regress/diff/text-change-with-nested.new.md b/regress/diff/text-change-with-nested.new.md @@ -0,0 +1,2 @@ + +Hello, *world [shmagain](https://foo.com) cdef*. diff --git a/regress/diff/text-change-with-nested.old.md b/regress/diff/text-change-with-nested.old.md @@ -0,0 +1,2 @@ + +Hello, *world [again](https://foo.com) cdef*. diff --git a/regress/diff/text-change.html b/regress/diff/text-change.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> +<del> +<p>Hello, world.</p> +</del><ins> +<p>Hello, shmorld.</p> +</ins></body> +</html> diff --git a/regress/diff/text-change.new.md b/regress/diff/text-change.new.md @@ -0,0 +1,2 @@ + +Hello, shmorld. diff --git a/regress/diff/text-change.old.md b/regress/diff/text-change.old.md @@ -0,0 +1,2 @@ + +Hello, world. diff --git a/regress/diff/text-change2.html b/regress/diff/text-change2.html @@ -0,0 +1,18 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title>Untitled article</title> +</head> +<body> +<del> +<p>not:a link $or maths</p> +</del><del> +<p>not a wwwlink or @email</p> +</del><ins> +<p>not:a link $shor maths</p> +</ins><ins> +<p>not a wwwlink2 or @email</p> +</ins></body> +</html> diff --git a/regress/diff/text-change2.new.md b/regress/diff/text-change2.new.md @@ -0,0 +1,4 @@ + +not:a link $shor maths + +not a wwwlink2 or @email diff --git a/regress/diff/text-change2.old.md b/regress/diff/text-change2.old.md @@ -0,0 +1,4 @@ + +not:a link $or maths + +not a wwwlink or @email diff --git a/regress/footnote-before-ref.md b/regress/footnote-before-ref.md @@ -0,0 +1,9 @@ +Hi. + +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). + +Hi.[^pt] + diff --git a/regress/footnote-in-table.gemini b/regress/footnote-in-table.gemini @@ -0,0 +1,36 @@ +first[1] + +``` +a | c +--|----------- +9 | foo[4][5] +``` + +Now[6] + +~~~~~~~~ + +[1] one + + +[2] two + + +[3] three + +hello + +* world + + +[4] two + + +[5] three + +hello + +* world + + +[6] four diff --git a/regress/footnote-in-table.html b/regress/footnote-in-table.html @@ -0,0 +1,48 @@ +<p>first<sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></p> + +<table> +<thead> +<tr> +<th>a</th> +<th>c</th> +</tr> +</thead> + +<tbody> +<tr> +<td>9</td> +<td>foo<sup id="fnref2"><a href="#fn2" rel="footnote">2</a></sup><sup id="fnref3"><a href="#fn3" rel="footnote">3</a></sup></td> +</tr> +</tbody> +</table> + +<p>Now<sup id="fnref4"><a href="#fn4" rel="footnote">4</a></sup></p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>one <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +<li id="fn2"> +<p>two <a href="#fnref2" rev="footnote">↩</a></p> +</li> + +<li id="fn3"> +<p>three <a href="#fnref3" rev="footnote">↩</a></p> + +<p>hello</p> + +<ul> +<li>world</li> +</ul> +</li> + +<li id="fn4"> +<p>four <a href="#fnref4" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/footnote-in-table.latex b/regress/footnote-in-table.latex @@ -0,0 +1,28 @@ + +first\footnote[1]{ +one +} + + +\begin{longtable}[]{ll} +a & c \\ +9 & foo\footnote[2]{ +two +} +\footnote[3]{ +three + +hello + +\begin{itemize} +\itemsep -0.2em +\item world +\end{itemize} +} + \\ +\end{longtable} + +Now\footnote[4]{ +four +} + diff --git a/regress/footnote-in-table.man b/regress/footnote-in-table.man @@ -0,0 +1,34 @@ +.PP +first\u\s-31\s+3\d +.TS +tab(|) expand allbox; +lb lb +l l. +T{ +a +T}|T{ +c +T} +T{ +9 +T}|T{ +foo\u\s-32\s+3\d\u\s-33\s+3\d +T} +.TE +.PP +Now\u\s-34\s+3\d +.LP +.sp 3 +\l'2i' +.LP +\0\fI\u\s-31\s+3\d\fP\0one +.LP +\0\fI\u\s-32\s+3\d\fP\0two +.LP +\0\fI\u\s-33\s+3\d\fP\0three +.PP +hello +.IP "\(bu" 2 +world +.LP +\0\fI\u\s-34\s+3\d\fP\0four diff --git a/regress/footnote-in-table.md b/regress/footnote-in-table.md @@ -0,0 +1,20 @@ + +first[^pt0] + +a | c +---|--- +9 | foo[^pt1][^pt2] + +Now[^pt3] + +[^pt0]: one + +[^pt1]: two + +[^pt2]: three + + hello + + - world + +[^pt3]: four diff --git a/regress/footnote-in-table.ms b/regress/footnote-in-table.ms @@ -0,0 +1,37 @@ +.PP +first\** +.FS +one +.FE +.TS H +tab(|) expand allbox; +lb lb +l l. +T{ +a +T}|T{ +c +T} +.TH +T{ +9 +T}|T{ +foo\** +.FS +two +.FE +\** +.FS +three +.PP +hello +.IP "\(bu" 2 +world +.FE +T} +.TE +.PP +Now\** +.FS +four +.FE diff --git a/regress/footnote-multi.html b/regress/footnote-multi.html @@ -0,0 +1,16 @@ +<p>Hi.<sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></p> + +<p>Another hi.[^pt]</p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>Klingon insult, meaning something like “weirdo,” deriving from +the verb “to be weird” (<strong>taQ</strong>), with and [sic] you (plural) +imperative prefix (<strong>pe-</strong>). <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/footnote-multi.md b/regress/footnote-multi.md @@ -0,0 +1,9 @@ +Hi.[^pt] + +Another hi.[^pt] + +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). + diff --git a/regress/footnote-nested.html b/regress/footnote-nested.html @@ -0,0 +1,14 @@ +<p>Hi.<sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>Klingon insult, meaning something like “weirdo,” deriving from +the verb “to be weird” (<strong>taQ</strong>), with and [sic] you (plural) +imperative prefix (<strong>pe-</strong>).[^pt2] <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/footnote-nested.md b/regress/footnote-nested.md @@ -0,0 +1,11 @@ +Hi.[^pt] + +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**).[^pt2] + +[^pt2]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). diff --git a/regress/footnote-nomatch.html b/regress/footnote-nomatch.html @@ -0,0 +1 @@ +<p>Hi.[^asdf]</p> diff --git a/regress/footnote-nomatch.md b/regress/footnote-nomatch.md @@ -0,0 +1,7 @@ +Hi.[^asdf] + +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). + diff --git a/regress/footnote.html b/regress/footnote.html @@ -0,0 +1,14 @@ +<p>Hi.<sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup></p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>Klingon insult, meaning something like “weirdo,” deriving from +the verb “to be weird” (<strong>taQ</strong>), with and [sic] you (plural) +imperative prefix (<strong>pe-</strong>). <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/footnote.md b/regress/footnote.md @@ -0,0 +1,7 @@ +Hi.[^pt] + +[^pt]: + Klingon insult, meaning something like "weirdo," deriving from + the verb "to be weird" (**taQ**), with and [sic] you (plural) + imperative prefix (**pe-**). + diff --git a/regress/hardbreak.gemini b/regress/hardbreak.gemini @@ -0,0 +1,2 @@ +Darmok and Jalad… +at Tanagra. diff --git a/regress/hardbreak.html b/regress/hardbreak.html @@ -0,0 +1,2 @@ +<p>Darmok and Jalad…<br/> +at Tanagra.</p> diff --git a/regress/hardbreak.man b/regress/hardbreak.man @@ -0,0 +1,4 @@ +.PP +Darmok and Jalad\[u2026] +.br +at Tanagra. diff --git a/regress/hardbreak.md b/regress/hardbreak.md @@ -0,0 +1,2 @@ +Darmok and Jalad...\ +at Tanagra. diff --git a/regress/hardbreak.ms b/regress/hardbreak.ms @@ -0,0 +1,4 @@ +.PP +Darmok and Jalad\[u2026] +.br +at Tanagra. diff --git a/regress/header-attr-class-multi.html b/regress/header-attr-class-multi.html @@ -0,0 +1,7 @@ +<h1 id="a" class="b c">a</h1> + +<p>c</p> + +<h1 id="a-1" class="b e">a</h1> + +<p>c</p> diff --git a/regress/header-attr-class-multi.md b/regress/header-attr-class-multi.md @@ -0,0 +1,8 @@ +# a {.b .c} + +c + +a {.b .e} +======= + +c diff --git a/regress/header-attr-class.html b/regress/header-attr-class.html @@ -0,0 +1,7 @@ +<h1 id="a" class="b">a</h1> + +<p>c</p> + +<h1 id="a-1" class="b">a</h1> + +<p>c</p> diff --git a/regress/header-attr-class.md b/regress/header-attr-class.md @@ -0,0 +1,8 @@ +# a {.b} + +c + +a {.b} +======= + +c diff --git a/regress/header-attr-id-empty.html b/regress/header-attr-id-empty.html @@ -0,0 +1,7 @@ +<h1 id="b"></h1> + +<p>c</p> + +<h1 id="b"></h1> + +<p>c</p> diff --git a/regress/header-attr-id-empty.md b/regress/header-attr-id-empty.md @@ -0,0 +1,9 @@ + +# {#b} + +c + +{#b} +======= + +c diff --git a/regress/header-attr-id-empty.ms b/regress/header-attr-id-empty.ms @@ -0,0 +1,10 @@ +.NH 1 +.pdfhref O 1 +.pdfhref M b +.LP +c +.NH 1 +.pdfhref O 1 +.pdfhref M b +.LP +c diff --git a/regress/header-attr-id-zerolen.html b/regress/header-attr-id-zerolen.html @@ -0,0 +1,7 @@ +<h1 id="a">a</h1> + +<p>c</p> + +<h1 id="b">b</h1> + +<p>c</p> diff --git a/regress/header-attr-id-zerolen.md b/regress/header-attr-id-zerolen.md @@ -0,0 +1,8 @@ +# a {#} + +c + +b {#} +======= + +c diff --git a/regress/header-attr-id-zerolen.ms b/regress/header-attr-id-zerolen.ms @@ -0,0 +1,12 @@ +.NH 1 +a +.pdfhref O 1 a +.pdfhref M a +.LP +c +.NH 1 +b +.pdfhref O 1 b +.pdfhref M b +.LP +c diff --git a/regress/header-attr-id.fodt b/regress/header-attr-id.fodt @@ -0,0 +1,24 @@ +<office:automatic-styles> +<style:style style:family="paragraph" style:name="P1" style:parent-style-name="Heading_20_1"> +</style:style> +<style:style style:family="paragraph" style:name="P2" style:parent-style-name="Standard"> +</style:style> +<style:page-layout style:name="pm1"> +<style:page-layout-properties fo:page-width="21.001cm" fo:page-height="29.7cm" style:num-format="1" style:print-orientation="portrait" fo:margin-top="2cm" fo:margin-bottom="2cm" fo:margin-left="2cm" fo:margin-right="2cm" style:writing-mode="lr-tb" style:footnote-max-height="0cm"> +</style:page-layout-properties> +</style:page-layout> +</office:automatic-styles> +<office:master-styles> +<style:master-page style:name="Standard" style:page-layout-name="pm1"/> +</office:master-styles> +<office:body> +<office:text> +<text:h text:outline-level="1" text:style-name="P1"><text:bookmark-start text:name="b" />a<text:bookmark-end text:name="b" /></text:h> + +<text:p text:style-name="P2">c</text:p> + +<text:h text:outline-level="1" text:style-name="P1"><text:bookmark-start text:name="b" />a<text:bookmark-end text:name="b" /></text:h> + +<text:p text:style-name="P2">c</text:p> +</office:text> +</office:body> diff --git a/regress/header-attr-id.html b/regress/header-attr-id.html @@ -0,0 +1,7 @@ +<h1 id="b">a</h1> + +<p>c</p> + +<h1 id="b">a</h1> + +<p>c</p> diff --git a/regress/header-attr-id.latex b/regress/header-attr-id.latex @@ -0,0 +1,9 @@ +\hypertarget{b}{% +\section{a}\label{b}} + +c + +\hypertarget{b}{% +\section{a}\label{b}} + +c diff --git a/regress/header-attr-id.md b/regress/header-attr-id.md @@ -0,0 +1,9 @@ + +# a {#b} + +c + +a {#b} +======= + +c diff --git a/regress/header-attr-id.ms b/regress/header-attr-id.ms @@ -0,0 +1,12 @@ +.NH 1 +a +.pdfhref O 1 a +.pdfhref M b +.LP +c +.NH 1 +a +.pdfhref O 1 a +.pdfhref M b +.LP +c diff --git a/regress/header-attr-id2.html b/regress/header-attr-id2.html @@ -0,0 +1,7 @@ +<h1 id="wwaaww">a</h1> + +<p>c</p> + +<h1 id="www">a</h1> + +<p>c</p> diff --git a/regress/header-attr-id2.latex b/regress/header-attr-id2.latex @@ -0,0 +1,9 @@ +\hypertarget{wwaaww}{% +\section{a}\label{wwaaww}} + +c + +\hypertarget{www}{% +\section{a}\label{www}} + +c diff --git a/regress/header-attr-id2.md b/regress/header-attr-id2.md @@ -0,0 +1,9 @@ + +# a { #wwaaww } + +c + +a {#www} +======= + +c diff --git a/regress/header-attr-id2.ms b/regress/header-attr-id2.ms @@ -0,0 +1,12 @@ +.NH 1 +a +.pdfhref O 1 a +.pdfhref M wwaaww +.LP +c +.NH 1 +a +.pdfhref O 1 a +.pdfhref M www +.LP +c diff --git a/regress/header-ids.html b/regress/header-ids.html @@ -0,0 +1,15 @@ +<h1 id="section">section</h1> + +<h1 id="section-2">section-2</h1> + +<h1 id="section-1">section</h1> + +<h1 id="section-3">section</h1> + +<h1 id="section-3-1">section-3</h1> + +<h1 id="section-3-2">section 3</h1> + +<h1 id="section3">section()3</h1> + +<h1 id="section-3-3">section () 3</h1> diff --git a/regress/header-ids.md b/regress/header-ids.md @@ -0,0 +1,16 @@ + +# section + +# section-2 + +# section + +# section + +# section-3 + +# section 3 + +# section()3 + +# section () 3 diff --git a/regress/header-with-entities.html b/regress/header-with-entities.html @@ -0,0 +1 @@ +<h1 id="a-bcd">a <em>bc&d</em></h1> diff --git a/regress/header-with-entities.latex b/regress/header-with-entities.latex @@ -0,0 +1,2 @@ +\hypertarget{a-bcd}{% +\section{a \emph{bc\&{}d}}\label{a-bcd}} diff --git a/regress/header-with-entities.md b/regress/header-with-entities.md @@ -0,0 +1,2 @@ + +# a *bc&d* diff --git a/regress/header-with-image.html b/regress/header-with-image.html @@ -0,0 +1 @@ +<h1 id="a-abc-nested-image">a <a href="foo.com">abc <img src="foo.jpg" alt="nested image" /></a></h1> diff --git a/regress/header-with-image.md b/regress/header-with-image.md @@ -0,0 +1,2 @@ + +# a [abc ](foo.com) diff --git a/regress/header-with-image.ms b/regress/header-with-image.ms @@ -0,0 +1,5 @@ +.NH 1 +a \c +.pdfhref W -D foo.com -- abc \fBnested image\fR (Image: \fIfoo.jpg\fR) +.pdfhref O 1 a abc nested image +.pdfhref M a-abc-nested-image diff --git a/regress/header-with-links-nospace.man b/regress/header-with-links-nospace.man @@ -0,0 +1,4 @@ +.SH +a \fIhttps://foo.com\fR now\fBworld\fR <\fIhttps://bar.com\fR>bc +.LP +c diff --git a/regress/header-with-links-nospace.md b/regress/header-with-links-nospace.md @@ -0,0 +1,4 @@ + +# a https://foo.com now[world](https://bar.com)bc + +c diff --git a/regress/header-with-links-nospace.ms b/regress/header-with-links-nospace.ms @@ -0,0 +1,10 @@ +.NH 1 +a \c +.pdfhref W -D https://foo.com -- https://foo.com +now\c +.pdfhref W -A "\c" -D https://bar.com -- world +bc +.pdfhref O 1 a https://foo.com nowworldbc +.pdfhref M a-httpsfoo.com-nowworldbc +.LP +c diff --git a/regress/header-with-links.html b/regress/header-with-links.html @@ -0,0 +1,3 @@ +<h1 id="a-httpsfoo.com-now-world-b">a <a href="https://foo.com">https://foo.com</a> now <a href="https://bar.com">world</a> b</h1> + +<p>c</p> diff --git a/regress/header-with-links.latex b/regress/header-with-links.latex @@ -0,0 +1,4 @@ +\hypertarget{a-httpsfoo.com-now-world-b}{% +\section{a \url{https://foo.com} now \href{https://bar.com}{world} b}\label{a-httpsfoo.com-now-world-b}} + +c diff --git a/regress/header-with-links.man b/regress/header-with-links.man @@ -0,0 +1,4 @@ +.SH +a \fIhttps://foo.com\fR now \fBworld\fR <\fIhttps://bar.com\fR> b +.LP +c diff --git a/regress/header-with-links.md b/regress/header-with-links.md @@ -0,0 +1,4 @@ + +# a https://foo.com now [world](https://bar.com) b + +c diff --git a/regress/header-with-links.ms b/regress/header-with-links.ms @@ -0,0 +1,10 @@ +.NH 1 +a \c +.pdfhref W -D https://foo.com -- https://foo.com +now \c +.pdfhref W -D https://bar.com -- world +b +.pdfhref O 1 a https://foo.com now world b +.pdfhref M a-httpsfoo.com-now-world-b +.LP +c diff --git a/regress/header-with-nested-elements.html b/regress/header-with-nested-elements.html @@ -0,0 +1 @@ +<h1 id="a-b-c-httpsfoo.com-d-f">a <em>b c <a href="https://foo.com">https://foo.com</a> d</em> f</h1> diff --git a/regress/header-with-nested-elements.latex b/regress/header-with-nested-elements.latex @@ -0,0 +1,2 @@ +\hypertarget{a-b-c-httpsfoo.com-d-f}{% +\section{a \emph{b c \url{https://foo.com} d} f}\label{a-b-c-httpsfoo.com-d-f}} diff --git a/regress/header-with-nested-elements.md b/regress/header-with-nested-elements.md @@ -0,0 +1,2 @@ + +# a *b c https://foo.com d* f diff --git a/regress/header-with-nested-elements.ms b/regress/header-with-nested-elements.ms @@ -0,0 +1,6 @@ +.NH 1 +a \fIb c \c +.pdfhref W -D https://foo.com -- https://foo.com +d\fR f +.pdfhref O 1 a b c https://foo.com d f +.pdfhref M a-b-c-httpsfoo.com-d-f diff --git a/regress/header-with-no-text.html b/regress/header-with-no-text.html @@ -0,0 +1,3 @@ +<h1 id="section"><>><</h1> + +<p>a</p> diff --git a/regress/header-with-no-text.md b/regress/header-with-no-text.md @@ -0,0 +1,4 @@ + +# <>>< + +a diff --git a/regress/header-with-no-text2.html b/regress/header-with-no-text2.html @@ -0,0 +1,3 @@ +<h1 id="section">< >> <</h1> + +<p>a</p> diff --git a/regress/header-with-no-text2.md b/regress/header-with-no-text2.md @@ -0,0 +1,4 @@ + +# < >> < + +a diff --git a/regress/header-with-quoted-contents.html b/regress/header-with-quoted-contents.html @@ -0,0 +1 @@ +<h1 id="a-b-de-fg-hijk">a <em>b</em> d#e f"g “hijk”</h1> diff --git a/regress/header-with-quoted-contents.latex b/regress/header-with-quoted-contents.latex @@ -0,0 +1,2 @@ +\hypertarget{a-b-de-fg-hijk}{% +\section{a \emph{b} d\#e f"g ``hijk''}\label{a-b-de-fg-hijk}} diff --git a/regress/header-with-quoted-contents.man b/regress/header-with-quoted-contents.man @@ -0,0 +1,2 @@ +.SH +a \fIb\fR d#e f\(dqg \(lqhijk\(rq diff --git a/regress/header-with-quoted-contents.md b/regress/header-with-quoted-contents.md @@ -0,0 +1,2 @@ + +# a *b* d#e f"g "hijk" diff --git a/regress/header-with-quoted-contents.ms b/regress/header-with-quoted-contents.ms @@ -0,0 +1,4 @@ +.NH 1 +a \fIb\fR d#e f\(dqg \(lqhijk\(rq +.pdfhref O 1 a b d#e f\(dqg hijk +.pdfhref M a-b-de-fg-hijk diff --git a/regress/header-with-self-link.html b/regress/header-with-self-link.html @@ -0,0 +1,3 @@ +<h1 id="header-with-link">Header with <a href="#link">link</a></h1> + +<h1 id="link"><a href="#link">link</a></h1> diff --git a/regress/header-with-self-link.latex b/regress/header-with-self-link.latex @@ -0,0 +1,5 @@ +\hypertarget{header-with-link}{% +\section{Header with \hyperlink{link}{link}}\label{header-with-link}} + +\hypertarget{link}{% +\section{\hyperlink{link}{link}}\label{link}} diff --git a/regress/header-with-self-link.md b/regress/header-with-self-link.md @@ -0,0 +1,4 @@ + +# Header with [link](#link) + +# [link](#link) diff --git a/regress/header-with-self-link.ms b/regress/header-with-self-link.ms @@ -0,0 +1,9 @@ +.NH 1 +Header with \c +.pdfhref L -D link -- link +.pdfhref O 1 Header with link +.pdfhref M header-with-link +.NH 1 +.pdfhref L -D link -- link +.pdfhref O 1 link +.pdfhref M link diff --git a/regress/html-escapes.html b/regress/html-escapes.html @@ -0,0 +1,28 @@ +<p>a b> c d< e f & g h </p> + +<p>- + _ + . + + + ! + * + ‘ + ( + ) + , + % + # + @ + ? + = + ; + : + / + , + + + & + $ + ~ + alphanum</p> + +<p><a href="http://api.plos.org/search?q=title:%22Drosophila%22%20AND%20body:%22RNA%22&fl=id,abstract">link</a></p> diff --git a/regress/html-escapes.md b/regress/html-escapes.md @@ -0,0 +1,29 @@ + +a b> c d< e f & g h + + - + _ + . + + + ! + * + ' + ( + ) + , + % + # + @ + ? + = + ; + : + / + , + + + & + $ + ~ + alphanum + +[link](http://api.plos.org/search?q=title:"Drosophila" AND body:"RNA"&fl=id,abstract) diff --git a/regress/img-attr-class.fodt b/regress/img-attr-class.fodt @@ -0,0 +1,16 @@ +<office:automatic-styles> +<style:style style:family="paragraph" style:name="P1" style:parent-style-name="Standard"> +</style:style> +<style:page-layout style:name="pm1"> +<style:page-layout-properties fo:page-width="21.001cm" fo:page-height="29.7cm" style:num-format="1" style:print-orientation="portrait" fo:margin-top="2cm" fo:margin-bottom="2cm" fo:margin-left="2cm" fo:margin-right="2cm" style:writing-mode="lr-tb" style:footnote-max-height="0cm"> +</style:page-layout-properties> +</style:page-layout> +</office:automatic-styles> +<office:master-styles> +<style:master-page style:name="Standard" style:page-layout-name="pm1"/> +</office:master-styles> +<office:body> +<office:text> +<text:p text:style-name="P1">link <draw:frame draw:name="Image1" text:anchor-type="as-char" draw:z-index="0" draw:style-name="Graphics" draw:class-names="cls"><draw:image xlink:href="address" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" draw:filter-name="<All images>" /><svg:title>text</svg:title></draw:frame></text:p> +</office:text> +</office:body> diff --git a/regress/img-attr-class.html b/regress/img-attr-class.html @@ -0,0 +1 @@ +<p>link <img src="address" alt="text" class="cls" /></p> diff --git a/regress/img-attr-class.md b/regress/img-attr-class.md @@ -0,0 +1,2 @@ + +link { .cls } diff --git a/regress/img-attr-height.html b/regress/img-attr-height.html @@ -0,0 +1 @@ +<p>link <img src="address" alt="text" class="cls" style="height:50%;" /></p> diff --git a/regress/img-attr-height.md b/regress/img-attr-height.md @@ -0,0 +1,2 @@ + +link { .cls height=50% } diff --git a/regress/img-attr-id.html b/regress/img-attr-id.html @@ -0,0 +1 @@ +<p>link <img src="address" alt="text" id="world" /></p> diff --git a/regress/img-attr-id.md b/regress/img-attr-id.md @@ -0,0 +1,2 @@ + +link { #world } diff --git a/regress/img-attr-width.html b/regress/img-attr-width.html @@ -0,0 +1 @@ +<p>link <img src="address" alt="text" class="cls" style="width:50%;" /></p> diff --git a/regress/img-attr-width.latex b/regress/img-attr-width.latex @@ -0,0 +1,2 @@ + +link \includegraphics[width=0.50\linewidth]{address} diff --git a/regress/img-attr-width.md b/regress/img-attr-width.md @@ -0,0 +1,2 @@ + +link { .cls width=50% } diff --git a/regress/img-nested-attr-class.html b/regress/img-nested-attr-class.html @@ -0,0 +1 @@ +<p>link <a href="foo.com" class="cls2"><img src="img.jpg" alt="text" class="cls" /></a></p> diff --git a/regress/img-nested-attr-class.md b/regress/img-nested-attr-class.md @@ -0,0 +1,2 @@ + +link [{ .cls }](foo.com){ .cls2 } diff --git a/regress/img-ref-attr.html b/regress/img-ref-attr.html @@ -0,0 +1 @@ +<p>a <img src="foo.jpg" alt="b" class="class" id="id" style="width:50%;" /></p> diff --git a/regress/img-ref-attr.md b/regress/img-ref-attr.md @@ -0,0 +1,5 @@ + +a ![b][cdef] + +[cdef]: foo.jpg { width=50% .class #id } + diff --git a/regress/img-ref.html b/regress/img-ref.html @@ -0,0 +1 @@ +<p>a <img src="foo.jpg" alt="b" /></p> diff --git a/regress/img-ref.md b/regress/img-ref.md @@ -0,0 +1,5 @@ + +a ![b][cdef] + +[cdef]: foo.jpg + diff --git a/regress/intraemph.html b/regress/intraemph.html @@ -0,0 +1 @@ +<p>The-following<em>should-be-emphasised</em>while-this-is-not.</p> diff --git a/regress/intraemph.latex b/regress/intraemph.latex @@ -0,0 +1,2 @@ + +The-following\emph{should-be-emphasised}while-this-is-not. diff --git a/regress/intraemph.man b/regress/intraemph.man @@ -0,0 +1,2 @@ +.PP +The-following\fIshould-be-emphasised\fRwhile-this-is-not. diff --git a/regress/intraemph.md b/regress/intraemph.md @@ -0,0 +1,2 @@ + +The-following_should-be-emphasised_while-this-is-not. diff --git a/regress/intraemph.ms b/regress/intraemph.ms @@ -0,0 +1,2 @@ +.PP +The-following\fIshould-be-emphasised\fRwhile-this-is-not. diff --git a/regress/link-attr-class-eoln.html b/regress/link-attr-class-eoln.html @@ -0,0 +1 @@ +<p>link <a href="address" class="cls">text</a></p> diff --git a/regress/link-attr-class-eoln.md b/regress/link-attr-class-eoln.md @@ -0,0 +1,2 @@ + +link [text](address){ .cls diff --git a/regress/link-attr-class-onechar.html b/regress/link-attr-class-onechar.html @@ -0,0 +1 @@ +<p>link <a href="address">text</a></p> diff --git a/regress/link-attr-class-onechar.md b/regress/link-attr-class-onechar.md @@ -0,0 +1,2 @@ + +link [text](address){ . cls } diff --git a/regress/link-attr-class-spaced.html b/regress/link-attr-class-spaced.html @@ -0,0 +1 @@ +<p>link <a href="address">text</a> { .cls }</p> diff --git a/regress/link-attr-class-spaced.md b/regress/link-attr-class-spaced.md @@ -0,0 +1,2 @@ + +link [text](address) { .cls } diff --git a/regress/link-attr-class.fodt b/regress/link-attr-class.fodt @@ -0,0 +1,16 @@ +<office:automatic-styles> +<style:style style:family="paragraph" style:name="P1" style:parent-style-name="Standard"> +</style:style> +<style:page-layout style:name="pm1"> +<style:page-layout-properties fo:page-width="21.001cm" fo:page-height="29.7cm" style:num-format="1" style:print-orientation="portrait" fo:margin-top="2cm" fo:margin-bottom="2cm" fo:margin-left="2cm" fo:margin-right="2cm" style:writing-mode="lr-tb" style:footnote-max-height="0cm"> +</style:page-layout-properties> +</style:page-layout> +</office:automatic-styles> +<office:master-styles> +<style:master-page style:name="Standard" style:page-layout-name="pm1"/> +</office:master-styles> +<office:body> +<office:text> +<text:p text:style-name="P1">link <text:a xlink:type="simple" text:style-name="Internet_20_Link" text:class-names="cls" xlink:href="address">text</text:a></text:p> +</office:text> +</office:body> diff --git a/regress/link-attr-class.html b/regress/link-attr-class.html @@ -0,0 +1 @@ +<p>link <a href="address" class="cls">text</a></p> diff --git a/regress/link-attr-class.md b/regress/link-attr-class.md @@ -0,0 +1,2 @@ + +link [text](address){ .cls } diff --git a/regress/link-attr-id.fodt b/regress/link-attr-id.fodt @@ -0,0 +1,16 @@ +<office:automatic-styles> +<style:style style:family="paragraph" style:name="P1" style:parent-style-name="Standard"> +</style:style> +<style:page-layout style:name="pm1"> +<style:page-layout-properties fo:page-width="21.001cm" fo:page-height="29.7cm" style:num-format="1" style:print-orientation="portrait" fo:margin-top="2cm" fo:margin-bottom="2cm" fo:margin-left="2cm" fo:margin-right="2cm" style:writing-mode="lr-tb" style:footnote-max-height="0cm"> +</style:page-layout-properties> +</style:page-layout> +</office:automatic-styles> +<office:master-styles> +<style:master-page style:name="Standard" style:page-layout-name="pm1"/> +</office:master-styles> +<office:body> +<office:text> +<text:p text:style-name="P1">link<text:bookmark-start text:name="www" /><text:a xlink:type="simple" text:style-name="Internet_20_Link" xlink:href="address">text</text:a><text:bookmark-end text:name="www" />tail</text:p> +</office:text> +</office:body> diff --git a/regress/link-attr-id.html b/regress/link-attr-id.html @@ -0,0 +1 @@ +<p>link<a href="address" id="www">text</a>tail</p> diff --git a/regress/link-attr-id.latex b/regress/link-attr-id.latex @@ -0,0 +1,3 @@ + +link\hypertarget{www}{% +\href{address}{text}}tail diff --git a/regress/link-attr-id.md b/regress/link-attr-id.md @@ -0,0 +1,2 @@ + +link[text](address){ #www }tail diff --git a/regress/link-attr-id.ms b/regress/link-attr-id.ms @@ -0,0 +1,5 @@ +.PP +link\c +.pdfhref M www +.pdfhref W -A "\c" -D address -- text +tail diff --git a/regress/link-ref-attr-multi.html b/regress/link-ref-attr-multi.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" class="class" id="id">b</a></p> diff --git a/regress/link-ref-attr-multi.md b/regress/link-ref-attr-multi.md @@ -0,0 +1,4 @@ +a [b][cdef] + +[cdef]: foo.com { .class #id some=key } + diff --git a/regress/link-ref-attr-secondline.html b/regress/link-ref-attr-secondline.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="hello, world" class="class">b</a></p> diff --git a/regress/link-ref-attr-secondline.md b/regress/link-ref-attr-secondline.md @@ -0,0 +1,6 @@ + +a [b][cdef] + +[cdef]: foo.com +(hello, world) { .class } + diff --git a/regress/link-ref-attr-title-embedded.html b/regress/link-ref-attr-title-embedded.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="title "foobar" class="class">b</a></p> diff --git a/regress/link-ref-attr-title-embedded.md b/regress/link-ref-attr-title-embedded.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com "title "foobar" { .class } + diff --git a/regress/link-ref-attr-title.html b/regress/link-ref-attr-title.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="title" class="class">b</a></p> diff --git a/regress/link-ref-attr-title.md b/regress/link-ref-attr-title.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com "title" { .class } + diff --git a/regress/link-ref-attr.html b/regress/link-ref-attr.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" class="class">b</a></p> diff --git a/regress/link-ref-attr.md b/regress/link-ref-attr.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com { .class } + diff --git a/regress/link-ref-garbage.html b/regress/link-ref-garbage.html @@ -0,0 +1,3 @@ +<p>a [b][cdef]</p> + +<p>[cdef]: foo.com garbage</p> diff --git a/regress/link-ref-garbage.md b/regress/link-ref-garbage.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com garbage + diff --git a/regress/link-ref-title-alt.html b/regress/link-ref-title-alt.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="this is "a title">b</a></p> diff --git a/regress/link-ref-title-alt.md b/regress/link-ref-title-alt.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com (this is "a title) + diff --git a/regress/link-ref-title-newline.html b/regress/link-ref-title-newline.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="this is a title">b</a></p> diff --git a/regress/link-ref-title-newline.md b/regress/link-ref-title-newline.md @@ -0,0 +1,6 @@ + +a [b][cdef] + +[cdef]: foo.com +"this is a title" + diff --git a/regress/link-ref-title-twoline.html b/regress/link-ref-title-twoline.html @@ -0,0 +1,5 @@ +<p>a [b][cdef]</p> + +<p>[cdef]: foo.com +“this is a bad +title”</p> diff --git a/regress/link-ref-title-twoline.md b/regress/link-ref-title-twoline.md @@ -0,0 +1,7 @@ + +a [b][cdef] + +[cdef]: foo.com +"this is a bad +title" + diff --git a/regress/link-ref-title-withline.html b/regress/link-ref-title-withline.html @@ -0,0 +1,4 @@ +<p>a [b][cdef]</p> + +<p>[cdef]: foo.com “this is +a title”</p> diff --git a/regress/link-ref-title-withline.md b/regress/link-ref-title-withline.md @@ -0,0 +1,6 @@ + +a [b][cdef] + +[cdef]: foo.com "this is +a title" + diff --git a/regress/link-ref-title.html b/regress/link-ref-title.html @@ -0,0 +1 @@ +<p>a <a href="foo.com" title="this is a title">b</a></p> diff --git a/regress/link-ref-title.md b/regress/link-ref-title.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com "this is a title" + diff --git a/regress/link-ref.html b/regress/link-ref.html @@ -0,0 +1 @@ +<p>a <a href="foo.com">b</a></p> diff --git a/regress/link-ref.md b/regress/link-ref.md @@ -0,0 +1,5 @@ + +a [b][cdef] + +[cdef]: foo.com + diff --git a/regress/link-with-links.md b/regress/link-with-links.md @@ -0,0 +1,4 @@ +a + +- b https://foo.com c +- d e diff --git a/regress/metadata/canonicalise.md b/regress/metadata/canonicalise.md @@ -0,0 +1,3 @@ +t E S t: foo + +Hello, world. diff --git a/regress/metadata/canonicalise.txt b/regress/metadata/canonicalise.txt @@ -0,0 +1 @@ +foo diff --git a/regress/metadata/dupes-canonicalise.md b/regress/metadata/dupes-canonicalise.md @@ -0,0 +1,4 @@ +test: 1 2 3 +t e S t: 4 5 6 + +Hello, world. diff --git a/regress/metadata/dupes-canonicalise.txt b/regress/metadata/dupes-canonicalise.txt @@ -0,0 +1 @@ +4 5 6 diff --git a/regress/metadata/dupes.md b/regress/metadata/dupes.md @@ -0,0 +1,4 @@ +test: 1 2 3 +test: 4 5 6 + +Hello, world. diff --git a/regress/metadata/dupes.txt b/regress/metadata/dupes.txt @@ -0,0 +1 @@ +4 5 6 diff --git a/regress/metadata/multiline.md b/regress/metadata/multiline.md @@ -0,0 +1,5 @@ +test: asdf +asdf asdf +fdsa fdsa + +Hello, world. diff --git a/regress/metadata/multiline.txt b/regress/metadata/multiline.txt @@ -0,0 +1,3 @@ +asdf +asdf asdf +fdsa fdsa diff --git a/regress/metadata/simple.md b/regress/metadata/simple.md @@ -0,0 +1,3 @@ +test: foo + +Hello, world. diff --git a/regress/metadata/simple.txt b/regress/metadata/simple.txt @@ -0,0 +1 @@ +foo diff --git a/regress/newline-before-link.md b/regress/newline-before-link.md @@ -0,0 +1,3 @@ + +a bwww wcdef +[g](https://foo.com) diff --git a/regress/newline-before-link.term b/regress/newline-before-link.term @@ -0,0 +1 @@ + a bwww wcdef [1;93mg[0m [4;32mhttps://foo.com[0m diff --git a/regress/original/Amps_and_angle_encoding.html b/regress/original/Amps_and_angle_encoding.html @@ -0,0 +1,17 @@ +<p>AT&T has an ampersand in their name.</p> + +<p>AT&T is another way to write it.</p> + +<p>This & that.</p> + +<p>4 < 5.</p> + +<p>6 > 5.</p> + +<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p> + +<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p> + +<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p> + +<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p> diff --git a/regress/original/Amps_and_angle_encoding.text b/regress/original/Amps_and_angle_encoding.text @@ -0,0 +1,21 @@ +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 < 5. + +6 > 5. + +Here's a [link] [1] with an ampersand in the URL. + +Here's a link with an amersand in the link text: [AT&T] [2]. + +Here's an inline [link](/script?foo=1&bar=2). + +Here's an inline [link](</script?foo=1&bar=2>). + + +[1]: http://example.com/?foo=1&bar=2 +[2]: http://att.com/ "AT&T" +\ No newline at end of file diff --git a/regress/original/Auto_links.html b/regress/original/Auto_links.html @@ -0,0 +1,18 @@ +<p>Link: <a href="http://example.com/">http://example.com/</a>.</p> + +<p>With an ampersand: <a href="http://example.com/?foo=1&bar=2">http://example.com/?foo=1&bar=2</a></p> + +<ul> +<li>In a list?</li> +<li><a href="http://example.com/">http://example.com/</a></li> +<li>It should.</li> +</ul> + +<blockquote> + <p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p> +</blockquote> + +<p>Auto-links should not occur here: <code><http://example.com/></code></p> + +<pre><code>or here: <http://example.com/> +</code></pre> diff --git a/regress/original/Auto_links.text b/regress/original/Auto_links.text @@ -0,0 +1,13 @@ +Link: <http://example.com/>. + +With an ampersand: <http://example.com/?foo=1&bar=2> + +* In a list? +* <http://example.com/> +* It should. + +> Blockquoted: <http://example.com/> + +Auto-links should not occur here: `<http://example.com/>` + + or here: <http://example.com/> +\ No newline at end of file diff --git a/regress/original/Backslash_escapes.html b/regress/original/Backslash_escapes.html @@ -0,0 +1,118 @@ +<p>These should all get escaped:</p> + +<p>Backslash: \</p> + +<p>Backtick: `</p> + +<p>Asterisk: *</p> + +<p>Underscore: _</p> + +<p>Left brace: {</p> + +<p>Right brace: }</p> + +<p>Left bracket: [</p> + +<p>Right bracket: ]</p> + +<p>Left paren: (</p> + +<p>Right paren: )</p> + +<p>Greater-than: ></p> + +<p>Hash: #</p> + +<p>Period: .</p> + +<p>Bang: !</p> + +<p>Plus: +</p> + +<p>Minus: -</p> + +<p>These should not, because they occur within a code block:</p> + +<pre><code>Backslash: \\ + +Backtick: \` + +Asterisk: \* + +Underscore: \_ + +Left brace: \{ + +Right brace: \} + +Left bracket: \[ + +Right bracket: \] + +Left paren: \( + +Right paren: \) + +Greater-than: \> + +Hash: \# + +Period: \. + +Bang: \! + +Plus: \+ + +Minus: \- +</code></pre> + +<p>Nor should these, which occur in code spans:</p> + +<p>Backslash: <code>\\</code></p> + +<p>Backtick: <code>\`</code></p> + +<p>Asterisk: <code>\*</code></p> + +<p>Underscore: <code>\_</code></p> + +<p>Left brace: <code>\{</code></p> + +<p>Right brace: <code>\}</code></p> + +<p>Left bracket: <code>\[</code></p> + +<p>Right bracket: <code>\]</code></p> + +<p>Left paren: <code>\(</code></p> + +<p>Right paren: <code>\)</code></p> + +<p>Greater-than: <code>\></code></p> + +<p>Hash: <code>\#</code></p> + +<p>Period: <code>\.</code></p> + +<p>Bang: <code>\!</code></p> + +<p>Plus: <code>\+</code></p> + +<p>Minus: <code>\-</code></p> + + +<p>These should get escaped, even though they're matching pairs for +other Markdown constructs:</p> + +<p>*asterisks*</p> + +<p>_underscores_</p> + +<p>`backticks`</p> + +<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p> + +<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p> + +<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p> diff --git a/regress/original/Backslash_escapes.text b/regress/original/Backslash_escapes.text @@ -0,0 +1,120 @@ +These should all get escaped: + +Backslash: \\ + +Backtick: \` + +Asterisk: \* + +Underscore: \_ + +Left brace: \{ + +Right brace: \} + +Left bracket: \[ + +Right bracket: \] + +Left paren: \( + +Right paren: \) + +Greater-than: \> + +Hash: \# + +Period: \. + +Bang: \! + +Plus: \+ + +Minus: \- + + + +These should not, because they occur within a code block: + + Backslash: \\ + + Backtick: \` + + Asterisk: \* + + Underscore: \_ + + Left brace: \{ + + Right brace: \} + + Left bracket: \[ + + Right bracket: \] + + Left paren: \( + + Right paren: \) + + Greater-than: \> + + Hash: \# + + Period: \. + + Bang: \! + + Plus: \+ + + Minus: \- + + +Nor should these, which occur in code spans: + +Backslash: `\\` + +Backtick: `` \` `` + +Asterisk: `\*` + +Underscore: `\_` + +Left brace: `\{` + +Right brace: `\}` + +Left bracket: `\[` + +Right bracket: `\]` + +Left paren: `\(` + +Right paren: `\)` + +Greater-than: `\>` + +Hash: `\#` + +Period: `\.` + +Bang: `\!` + +Plus: `\+` + +Minus: `\-` + + +These should get escaped, even though they're matching pairs for +other Markdown constructs: + +\*asterisks\* + +\_underscores\_ + +\`backticks\` + +This is a code span with a literal backslash-backtick sequence: `` \` `` + +This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>. + +This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>. diff --git a/regress/original/Blockquotes_with_code_blocks.html b/regress/original/Blockquotes_with_code_blocks.html @@ -0,0 +1,15 @@ +<blockquote> + <p>Example:</p> + +<pre><code>sub status { + print "working"; +} +</code></pre> + + <p>Or:</p> + +<pre><code>sub status { + return "working"; +} +</code></pre> +</blockquote> diff --git a/regress/original/Blockquotes_with_code_blocks.text b/regress/original/Blockquotes_with_code_blocks.text @@ -0,0 +1,11 @@ +> Example: +> +> sub status { +> print "working"; +> } +> +> Or: +> +> sub status { +> return "working"; +> } diff --git a/regress/original/Code_Blocks.html b/regress/original/Code_Blocks.html @@ -0,0 +1,18 @@ +<pre><code>code block on the first line +</code></pre> + +<p>Regular text.</p> + +<pre><code>code block indented by spaces +</code></pre> + +<p>Regular text.</p> + +<pre><code>the lines in this block +all contain trailing spaces +</code></pre> + +<p>Regular Text.</p> + +<pre><code>code block on the last line +</code></pre> diff --git a/regress/original/Code_Blocks.text b/regress/original/Code_Blocks.text @@ -0,0 +1,14 @@ + code block on the first line + +Regular text. + + code block indented by spaces + +Regular text. + + the lines in this block + all contain trailing spaces + +Regular Text. + + code block on the last line +\ No newline at end of file diff --git a/regress/original/Code_Spans.html b/regress/original/Code_Spans.html @@ -0,0 +1,5 @@ +<p><code><test a="</code> content of attribute <code>"></code></p> + +<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p> + +<p>Here's how you put <code>`backticks`</code> in a code span.</p> diff --git a/regress/original/Code_Spans.text b/regress/original/Code_Spans.text @@ -0,0 +1,5 @@ +`<test a="` content of attribute `">` + +Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span> + +Here's how you put `` `backticks` `` in a code span. +\ No newline at end of file diff --git a/regress/original/Hard-wrapped_paragraphs_with_list-like_lines.html b/regress/original/Hard-wrapped_paragraphs_with_list-like_lines.html @@ -0,0 +1,8 @@ +<p>In Markdown 1.0.0 and earlier. Version +8. This line turns into a list item. +Because a hard-wrapped line in the +middle of a paragraph looked like a +list item.</p> + +<p>Here's one with a bullet. +* criminey.</p> diff --git a/regress/original/Hard-wrapped_paragraphs_with_list-like_lines.text b/regress/original/Hard-wrapped_paragraphs_with_list-like_lines.text @@ -0,0 +1,8 @@ +In Markdown 1.0.0 and earlier. Version +8. This line turns into a list item. +Because a hard-wrapped line in the +middle of a paragraph looked like a +list item. + +Here's one with a bullet. +* criminey. diff --git a/regress/original/Horizontal_rules.html b/regress/original/Horizontal_rules.html @@ -0,0 +1,71 @@ +<p>Dashes:</p> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>--- +</code></pre> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>- - - +</code></pre> + +<p>Asterisks:</p> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>*** +</code></pre> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>* * * +</code></pre> + +<p>Underscores:</p> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>___ +</code></pre> + +<hr /> + +<hr /> + +<hr /> + +<hr /> + +<pre><code>_ _ _ +</code></pre> diff --git a/regress/original/Horizontal_rules.text b/regress/original/Horizontal_rules.text @@ -0,0 +1,67 @@ +Dashes: + +--- + + --- + + --- + + --- + + --- + +- - - + + - - - + + - - - + + - - - + + - - - + + +Asterisks: + +*** + + *** + + *** + + *** + + *** + +* * * + + * * * + + * * * + + * * * + + * * * + + +Underscores: + +___ + + ___ + + ___ + + ___ + + ___ + +_ _ _ + + _ _ _ + + _ _ _ + + _ _ _ + + _ _ _ diff --git a/regress/original/Inline_HTML_Advanced.html b/regress/original/Inline_HTML_Advanced.html @@ -0,0 +1,15 @@ +<p>Simple block on one line:</p> + +<div>foo</div> + +<p>And nested without indentation:</p> + +<div> +<div> +<div> +foo +</div> +<div style=">"/> +</div> +<div>bar</div> +</div> diff --git a/regress/original/Inline_HTML_Advanced.text b/regress/original/Inline_HTML_Advanced.text @@ -0,0 +1,15 @@ +Simple block on one line: + +<div>foo</div> + +And nested without indentation: + +<div> +<div> +<div> +foo +</div> +<div style=">"/> +</div> +<div>bar</div> +</div> diff --git a/regress/original/Inline_HTML_Simple.html b/regress/original/Inline_HTML_Simple.html @@ -0,0 +1,72 @@ +<p>Here's a simple block:</p> + +<div> + foo +</div> + +<p>This should be a code block, though:</p> + +<pre><code><div> + foo +</div> +</code></pre> + +<p>As should this:</p> + +<pre><code><div>foo</div> +</code></pre> + +<p>Now, nested:</p> + +<div> + <div> + <div> + foo + </div> + </div> +</div> + +<p>This should just be an HTML comment:</p> + +<!-- Comment --> + +<p>Multiline:</p> + +<!-- +Blah +Blah +--> + +<p>Code block:</p> + +<pre><code><!-- Comment --> +</code></pre> + +<p>Just plain comment, with trailing spaces on the line:</p> + +<!-- foo --> + +<p>Code:</p> + +<pre><code><hr /> +</code></pre> + +<p>Hr's:</p> + +<hr> + +<hr/> + +<hr /> + +<hr> + +<hr/> + +<hr /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar"/> + +<hr class="foo" id="bar" > diff --git a/regress/original/Inline_HTML_Simple.text b/regress/original/Inline_HTML_Simple.text @@ -0,0 +1,69 @@ +Here's a simple block: + +<div> + foo +</div> + +This should be a code block, though: + + <div> + foo + </div> + +As should this: + + <div>foo</div> + +Now, nested: + +<div> + <div> + <div> + foo + </div> + </div> +</div> + +This should just be an HTML comment: + +<!-- Comment --> + +Multiline: + +<!-- +Blah +Blah +--> + +Code block: + + <!-- Comment --> + +Just plain comment, with trailing spaces on the line: + +<!-- foo --> + +Code: + + <hr /> + +Hr's: + +<hr> + +<hr/> + +<hr /> + +<hr> + +<hr/> + +<hr /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar"/> + +<hr class="foo" id="bar" > + diff --git a/regress/original/Inline_HTML_comments.html b/regress/original/Inline_HTML_comments.html @@ -0,0 +1,13 @@ +<p>Paragraph one.</p> + +<!-- This is a simple comment --> + +<!-- + This is another comment. +--> + +<p>Paragraph two.</p> + +<!-- one comment block -- -- with two comments --> + +<p>The end.</p> diff --git a/regress/original/Inline_HTML_comments.text b/regress/original/Inline_HTML_comments.text @@ -0,0 +1,13 @@ +Paragraph one. + +<!-- This is a simple comment --> + +<!-- + This is another comment. +--> + +Paragraph two. + +<!-- one comment block -- -- with two comments --> + +The end. diff --git a/regress/original/Links,_inline_style.html b/regress/original/Links,_inline_style.html @@ -0,0 +1,11 @@ +<p>Just a <a href="/url/">URL</a>.</p> + +<p><a href="/url/" title="title">URL and title</a>.</p> + +<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p> + +<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p> + +<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p> + +<p><a href="">Empty</a>.</p> diff --git a/regress/original/Links,_inline_style.text b/regress/original/Links,_inline_style.text @@ -0,0 +1,12 @@ +Just a [URL](/url/). + +[URL and title](/url/ "title"). + +[URL and title](/url/ "title preceded by two spaces"). + +[URL and title](/url/ "title preceded by a tab"). + +[URL and title](/url/ "title has spaces afterward" ). + + +[Empty](). diff --git a/regress/original/Links,_reference_style.html b/regress/original/Links,_reference_style.html @@ -0,0 +1,52 @@ +<p>Foo <a href="/url/" title="Title">bar</a>.</p> + +<p>Foo <a href="/url/" title="Title">bar</a>.</p> + +<p>Foo <a href="/url/" title="Title">bar</a>.</p> + +<p>With <a href="/url/">embedded [brackets]</a>.</p> + +<p>Indented <a href="/url">once</a>.</p> + +<p>Indented <a href="/url">twice</a>.</p> + +<p>Indented <a href="/url">thrice</a>.</p> + +<p>Indented [four][] times.</p> + +<pre><code>[four]: /url +</code></pre> + +<hr /> + +<p><a href="foo">this</a> should work</p> + +<p>So should <a href="foo">this</a>.</p> + +<p>And <a href="foo">this</a>.</p> + +<p>And <a href="foo">this</a>.</p> + +<p>And <a href="foo">this</a>.</p> + +<p>But not [that] [].</p> + +<p>Nor [that][].</p> + +<p>Nor [that].</p> + +<p>[Something in brackets like <a href="foo">this</a> should work]</p> + +<p>[Same with <a href="foo">this</a>.]</p> + +<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p> + +<p>Backslashing should suppress [this] and [this].</p> + +<hr /> + +<p>Here's one where the <a href="/url/">link +breaks</a> across lines.</p> + +<p>Here's another where the <a href="/url/">link +breaks</a> across lines, but with a line-ending space.</p> diff --git a/regress/original/Links,_reference_style.text b/regress/original/Links,_reference_style.text @@ -0,0 +1,71 @@ +Foo [bar] [1]. + +Foo [bar][1]. + +Foo [bar] +[1]. + +[1]: /url/ "Title" + + +With [embedded [brackets]] [b]. + + +Indented [once][]. + +Indented [twice][]. + +Indented [thrice][]. + +Indented [four][] times. + + [once]: /url + + [twice]: /url + + [thrice]: /url + + [four]: /url + + +[b]: /url/ + +* * * + +[this] [this] should work + +So should [this][this]. + +And [this] []. + +And [this][]. + +And [this]. + +But not [that] []. + +Nor [that][]. + +Nor [that]. + +[Something in brackets like [this][] should work] + +[Same with [this].] + +In this case, [this](/somethingelse/) points to something else. + +Backslashing should suppress \[this] and [this\]. + +[this]: foo + + +* * * + +Here's one where the [link +breaks] across lines. + +Here's another where the [link +breaks] across lines, but with a line-ending space. + + +[link breaks]: /url/ diff --git a/regress/original/Links,_shortcut_references.html b/regress/original/Links,_shortcut_references.html @@ -0,0 +1,9 @@ +<p>This is the <a href="/simple">simple case</a>.</p> + +<p>This one has a <a href="/foo">line +break</a>.</p> + +<p>This one has a <a href="/foo">line +break</a> with a line-ending space.</p> + +<p><a href="/that">this</a> and the <a href="/other">other</a></p> diff --git a/regress/original/Links,_shortcut_references.text b/regress/original/Links,_shortcut_references.text @@ -0,0 +1,20 @@ +This is the [simple case]. + +[simple case]: /simple + + + +This one has a [line +break]. + +This one has a [line +break] with a line-ending space. + +[line break]: /foo + + +[this] [that] and the [other] + +[this]: /this +[that]: /that +[other]: /other diff --git a/regress/original/Literal_quotes_in_titles.html b/regress/original/Literal_quotes_in_titles.html @@ -0,0 +1,3 @@ +<p>Foo <a href="/url/" title="Title with "quotes" inside">bar</a>.</p> + +<p>Foo <a href="/url/" title="Title with "quotes" inside">bar</a>.</p> diff --git a/regress/original/Literal_quotes_in_titles.text b/regress/original/Literal_quotes_in_titles.text @@ -0,0 +1,7 @@ +Foo [bar][]. + +Foo [bar](/url/ "Title with "quotes" inside"). + + + [bar]: /url/ "Title with "quotes" inside" + diff --git a/regress/original/Markdown_Documentation_-_Basics.html b/regress/original/Markdown_Documentation_-_Basics.html @@ -0,0 +1,314 @@ +<h1>Markdown: Basics</h1> + +<ul id="ProjectSubmenu"> + <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li> + <li><a class="selected" title="Markdown Basics">Basics</a></li> + <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li> + <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li> + <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li> +</ul> + +<h2>Getting the Gist of Markdown's Formatting Syntax</h2> + +<p>This page offers a brief overview of what it's like to use Markdown. +The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for +every feature, but Markdown should be very easy to pick up simply by +looking at a few examples of it in action. The examples on this page +are written in a before/after style, showing example syntax and the +HTML output produced by Markdown.</p> + +<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a +web application that allows you type your own Markdown-formatted text +and translate it to XHTML.</p> + +<p><strong>Note:</strong> This document is itself written using Markdown; you +can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p> + +<h2>Paragraphs, Headers, Blockquotes</h2> + +<p>A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs.</p> + +<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>. +Setext-style headers for <code><h1></code> and <code><h2></code> are created by +"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively. +To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the +beginning of the line -- the number of hashes equals the resulting +HTML header level.</p> + +<p>Blockquotes are indicated using email-style '<code>></code>' angle brackets.</p> + +<p>Markdown:</p> + +<pre><code>A First Level Header +==================== + +A Second Level Header +--------------------- + +Now is the time for all good men to come to +the aid of their country. This is just a +regular paragraph. + +The quick brown fox jumped over the lazy +dog's back. + +### Header 3 + +> This is a blockquote. +> +> This is the second paragraph in the blockquote. +> +> ## This is an H2 in a blockquote +</code></pre> + +<p>Output:</p> + +<pre><code><h1>A First Level Header</h1> + +<h2>A Second Level Header</h2> + +<p>Now is the time for all good men to come to +the aid of their country. This is just a +regular paragraph.</p> + +<p>The quick brown fox jumped over the lazy +dog's back.</p> + +<h3>Header 3</h3> + +<blockquote> + <p>This is a blockquote.</p> + + <p>This is the second paragraph in the blockquote.</p> + + <h2>This is an H2 in a blockquote</h2> +</blockquote> +</code></pre> + +<h3>Phrase Emphasis</h3> + +<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p> + +<p>Markdown:</p> + +<pre><code>Some of these words *are emphasized*. +Some of these words _are emphasized also_. + +Use two asterisks for **strong emphasis**. +Or, if you prefer, __use two underscores instead__. +</code></pre> + +<p>Output:</p> + +<pre><code><p>Some of these words <em>are emphasized</em>. +Some of these words <em>are emphasized also</em>.</p> + +<p>Use two asterisks for <strong>strong emphasis</strong>. +Or, if you prefer, <strong>use two underscores instead</strong>.</p> +</code></pre> + +<h2>Lists</h2> + +<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>, +<code>+</code>, and <code>-</code>) as list markers. These three markers are +interchangable; this:</p> + +<pre><code>* Candy. +* Gum. +* Booze. +</code></pre> + +<p>this:</p> + +<pre><code>+ Candy. ++ Gum. ++ Booze. +</code></pre> + +<p>and this:</p> + +<pre><code>- Candy. +- Gum. +- Booze. +</code></pre> + +<p>all produce the same output:</p> + +<pre><code><ul> +<li>Candy.</li> +<li>Gum.</li> +<li>Booze.</li> +</ul> +</code></pre> + +<p>Ordered (numbered) lists use regular numbers, followed by periods, as +list markers:</p> + +<pre><code>1. Red +2. Green +3. Blue +</code></pre> + +<p>Output:</p> + +<pre><code><ol> +<li>Red</li> +<li>Green</li> +<li>Blue</li> +</ol> +</code></pre> + +<p>If you put blank lines between items, you'll get <code><p></code> tags for the +list item text. You can create multi-paragraph list items by indenting +the paragraphs by 4 spaces or 1 tab:</p> + +<pre><code>* A list item. + + With multiple paragraphs. + +* Another item in the list. +</code></pre> + +<p>Output:</p> + +<pre><code><ul> +<li><p>A list item.</p> +<p>With multiple paragraphs.</p></li> +<li><p>Another item in the list.</p></li> +</ul> +</code></pre> + +<h3>Links</h3> + +<p>Markdown supports two styles for creating links: <em>inline</em> and +<em>reference</em>. With both styles, you use square brackets to delimit the +text you want to turn into a link.</p> + +<p>Inline-style links use parentheses immediately after the link text. +For example:</p> + +<pre><code>This is an [example link](http://example.com/). +</code></pre> + +<p>Output:</p> + +<pre><code><p>This is an <a href="http://example.com/"> +example link</a>.</p> +</code></pre> + +<p>Optionally, you may include a title attribute in the parentheses:</p> + +<pre><code>This is an [example link](http://example.com/ "With a Title"). +</code></pre> + +<p>Output:</p> + +<pre><code><p>This is an <a href="http://example.com/" title="With a Title"> +example link</a>.</p> +</code></pre> + +<p>Reference-style links allow you to refer to your links by names, which +you define elsewhere in your document:</p> + +<pre><code>I get 10 times more traffic from [Google][1] than from +[Yahoo][2] or [MSN][3]. + +[1]: http://google.com/ "Google" +[2]: http://search.yahoo.com/ "Yahoo Search" +[3]: http://search.msn.com/ "MSN Search" +</code></pre> + +<p>Output:</p> + +<pre><code><p>I get 10 times more traffic from <a href="http://google.com/" +title="Google">Google</a> than from <a href="http://search.yahoo.com/" +title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/" +title="MSN Search">MSN</a>.</p> +</code></pre> + +<p>The title attribute is optional. Link names may contain letters, +numbers and spaces, but are <em>not</em> case sensitive:</p> + +<pre><code>I start my morning with a cup of coffee and +[The New York Times][NY Times]. + +[ny times]: http://www.nytimes.com/ +</code></pre> + +<p>Output:</p> + +<pre><code><p>I start my morning with a cup of coffee and +<a href="http://www.nytimes.com/">The New York Times</a>.</p> +</code></pre> + +<h3>Images</h3> + +<p>Image syntax is very much like link syntax.</p> + +<p>Inline (titles are optional):</p> + +<pre><code> +</code></pre> + +<p>Reference-style:</p> + +<pre><code>![alt text][id] + +[id]: /path/to/img.jpg "Title" +</code></pre> + +<p>Both of the above examples produce the same output:</p> + +<pre><code><img src="/path/to/img.jpg" alt="alt text" title="Title" /> +</code></pre> + +<h3>Code</h3> + +<p>In a regular paragraph, you can create code span by wrapping text in +backtick quotes. Any ampersands (<code>&</code>) and angle brackets (<code><</code> or +<code>></code>) will automatically be translated into HTML entities. This makes +it easy to use Markdown to write about HTML example code:</p> + +<pre><code>I strongly recommend against using any `<blink>` tags. + +I wish SmartyPants used named entities like `&mdash;` +instead of decimal-encoded entites like `&#8212;`. +</code></pre> + +<p>Output:</p> + +<pre><code><p>I strongly recommend against using any +<code>&lt;blink&gt;</code> tags.</p> + +<p>I wish SmartyPants used named entities like +<code>&amp;mdash;</code> instead of decimal-encoded +entites like <code>&amp;#8212;</code>.</p> +</code></pre> + +<p>To specify an entire block of pre-formatted code, indent every line of +the block by 4 spaces or 1 tab. Just like with code spans, <code>&</code>, <code><</code>, +and <code>></code> characters will be escaped automatically.</p> + +<p>Markdown:</p> + +<pre><code>If you want your page to validate under XHTML 1.0 Strict, +you've got to put paragraph tags in your blockquotes: + + <blockquote> + <p>For example.</p> + </blockquote> +</code></pre> + +<p>Output:</p> + +<pre><code><p>If you want your page to validate under XHTML 1.0 Strict, +you've got to put paragraph tags in your blockquotes:</p> + +<pre><code>&lt;blockquote&gt; + &lt;p&gt;For example.&lt;/p&gt; +&lt;/blockquote&gt; +</code></pre> +</code></pre> diff --git a/regress/original/Markdown_Documentation_-_Basics.text b/regress/original/Markdown_Documentation_-_Basics.text @@ -0,0 +1,306 @@ +Markdown: Basics +================ + +<ul id="ProjectSubmenu"> + <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li> + <li><a class="selected" title="Markdown Basics">Basics</a></li> + <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li> + <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li> + <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li> +</ul> + + +Getting the Gist of Markdown's Formatting Syntax +------------------------------------------------ + +This page offers a brief overview of what it's like to use Markdown. +The [syntax page] [s] provides complete, detailed documentation for +every feature, but Markdown should be very easy to pick up simply by +looking at a few examples of it in action. The examples on this page +are written in a before/after style, showing example syntax and the +HTML output produced by Markdown. + +It's also helpful to simply try Markdown out; the [Dingus] [d] is a +web application that allows you type your own Markdown-formatted text +and translate it to XHTML. + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL] [src]. + + [s]: /projects/markdown/syntax "Markdown Syntax" + [d]: /projects/markdown/dingus "Markdown Dingus" + [src]: /projects/markdown/basics.text + + +## Paragraphs, Headers, Blockquotes ## + +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs. + +Markdown offers two styles of headers: *Setext* and *atx*. +Setext-style headers for `<h1>` and `<h2>` are created by +"underlining" with equal signs (`=`) and hyphens (`-`), respectively. +To create an atx-style header, you put 1-6 hash marks (`#`) at the +beginning of the line -- the number of hashes equals the resulting +HTML header level. + +Blockquotes are indicated using email-style '`>`' angle brackets. + +Markdown: + + A First Level Header + ==================== + + A Second Level Header + --------------------- + + Now is the time for all good men to come to + the aid of their country. This is just a + regular paragraph. + + The quick brown fox jumped over the lazy + dog's back. + + ### Header 3 + + > This is a blockquote. + > + > This is the second paragraph in the blockquote. + > + > ## This is an H2 in a blockquote + + +Output: + + <h1>A First Level Header</h1> + + <h2>A Second Level Header</h2> + + <p>Now is the time for all good men to come to + the aid of their country. This is just a + regular paragraph.</p> + + <p>The quick brown fox jumped over the lazy + dog's back.</p> + + <h3>Header 3</h3> + + <blockquote> + <p>This is a blockquote.</p> + + <p>This is the second paragraph in the blockquote.</p> + + <h2>This is an H2 in a blockquote</h2> + </blockquote> + + + +### Phrase Emphasis ### + +Markdown uses asterisks and underscores to indicate spans of emphasis. + +Markdown: + + Some of these words *are emphasized*. + Some of these words _are emphasized also_. + + Use two asterisks for **strong emphasis**. + Or, if you prefer, __use two underscores instead__. + +Output: + + <p>Some of these words <em>are emphasized</em>. + Some of these words <em>are emphasized also</em>.</p> + + <p>Use two asterisks for <strong>strong emphasis</strong>. + Or, if you prefer, <strong>use two underscores instead</strong>.</p> + + + +## Lists ## + +Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, +`+`, and `-`) as list markers. These three markers are +interchangable; this: + + * Candy. + * Gum. + * Booze. + +this: + + + Candy. + + Gum. + + Booze. + +and this: + + - Candy. + - Gum. + - Booze. + +all produce the same output: + + <ul> + <li>Candy.</li> + <li>Gum.</li> + <li>Booze.</li> + </ul> + +Ordered (numbered) lists use regular numbers, followed by periods, as +list markers: + + 1. Red + 2. Green + 3. Blue + +Output: + + <ol> + <li>Red</li> + <li>Green</li> + <li>Blue</li> + </ol> + +If you put blank lines between items, you'll get `<p>` tags for the +list item text. You can create multi-paragraph list items by indenting +the paragraphs by 4 spaces or 1 tab: + + * A list item. + + With multiple paragraphs. + + * Another item in the list. + +Output: + + <ul> + <li><p>A list item.</p> + <p>With multiple paragraphs.</p></li> + <li><p>Another item in the list.</p></li> + </ul> + + + +### Links ### + +Markdown supports two styles for creating links: *inline* and +*reference*. With both styles, you use square brackets to delimit the +text you want to turn into a link. + +Inline-style links use parentheses immediately after the link text. +For example: + + This is an [example link](http://example.com/). + +Output: + + <p>This is an <a href="http://example.com/"> + example link</a>.</p> + +Optionally, you may include a title attribute in the parentheses: + + This is an [example link](http://example.com/ "With a Title"). + +Output: + + <p>This is an <a href="http://example.com/" title="With a Title"> + example link</a>.</p> + +Reference-style links allow you to refer to your links by names, which +you define elsewhere in your document: + + I get 10 times more traffic from [Google][1] than from + [Yahoo][2] or [MSN][3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Output: + + <p>I get 10 times more traffic from <a href="http://google.com/" + title="Google">Google</a> than from <a href="http://search.yahoo.com/" + title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/" + title="MSN Search">MSN</a>.</p> + +The title attribute is optional. Link names may contain letters, +numbers and spaces, but are *not* case sensitive: + + I start my morning with a cup of coffee and + [The New York Times][NY Times]. + + [ny times]: http://www.nytimes.com/ + +Output: + + <p>I start my morning with a cup of coffee and + <a href="http://www.nytimes.com/">The New York Times</a>.</p> + + +### Images ### + +Image syntax is very much like link syntax. + +Inline (titles are optional): + +  + +Reference-style: + + ![alt text][id] + + [id]: /path/to/img.jpg "Title" + +Both of the above examples produce the same output: + + <img src="/path/to/img.jpg" alt="alt text" title="Title" /> + + + +### Code ### + +In a regular paragraph, you can create code span by wrapping text in +backtick quotes. Any ampersands (`&`) and angle brackets (`<` or +`>`) will automatically be translated into HTML entities. This makes +it easy to use Markdown to write about HTML example code: + + I strongly recommend against using any `<blink>` tags. + + I wish SmartyPants used named entities like `—` + instead of decimal-encoded entites like `—`. + +Output: + + <p>I strongly recommend against using any + <code><blink></code> tags.</p> + + <p>I wish SmartyPants used named entities like + <code>&mdash;</code> instead of decimal-encoded + entites like <code>&#8212;</code>.</p> + + +To specify an entire block of pre-formatted code, indent every line of +the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, +and `>` characters will be escaped automatically. + +Markdown: + + If you want your page to validate under XHTML 1.0 Strict, + you've got to put paragraph tags in your blockquotes: + + <blockquote> + <p>For example.</p> + </blockquote> + +Output: + + <p>If you want your page to validate under XHTML 1.0 Strict, + you've got to put paragraph tags in your blockquotes:</p> + + <pre><code><blockquote> + <p>For example.</p> + </blockquote> + </code></pre> diff --git a/regress/original/Markdown_Documentation_-_Syntax.html b/regress/original/Markdown_Documentation_-_Syntax.html @@ -0,0 +1,942 @@ +<h1>Markdown: Syntax</h1> + +<ul id="ProjectSubmenu"> + <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li> + <li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li> + <li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li> + <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li> + <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li> +</ul> + +<ul> +<li><a href="#overview">Overview</a> +<ul> +<li><a href="#philosophy">Philosophy</a></li> +<li><a href="#html">Inline HTML</a></li> +<li><a href="#autoescape">Automatic Escaping for Special Characters</a></li> +</ul></li> +<li><a href="#block">Block Elements</a> +<ul> +<li><a href="#p">Paragraphs and Line Breaks</a></li> +<li><a href="#header">Headers</a></li> +<li><a href="#blockquote">Blockquotes</a></li> +<li><a href="#list">Lists</a></li> +<li><a href="#precode">Code Blocks</a></li> +<li><a href="#hr">Horizontal Rules</a></li> +</ul></li> +<li><a href="#span">Span Elements</a> +<ul> +<li><a href="#link">Links</a></li> +<li><a href="#em">Emphasis</a></li> +<li><a href="#code">Code</a></li> +<li><a href="#img">Images</a></li> +</ul></li> +<li><a href="#misc">Miscellaneous</a> +<ul> +<li><a href="#backslash">Backslash Escapes</a></li> +<li><a href="#autolink">Automatic Links</a></li> +</ul></li> +</ul> + +<p><strong>Note:</strong> This document is itself written using Markdown; you +can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.</p> + +<hr /> + +<h2 id="overview">Overview</h2> + +<h3 id="philosophy">Philosophy</h3> + +<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p> + +<p>Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>, +<a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email.</p> + +<p>To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like *emphasis*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email.</p> + +<h3 id="html">Inline HTML</h3> + +<p>Markdown's syntax is intended for one purpose: to be used as a +format for <em>writing</em> for the web.</p> + +<p>Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is <em>not</em> to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em> +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text.</p> + +<p>For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags.</p> + +<p>The only restrictions are that block-level HTML elements -- e.g. <code><div></code>, +<code><table></code>, <code><pre></code>, <code><p></code>, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) <code><p></code> tags around HTML block-level tags.</p> + +<p>For example, to add an HTML table to a Markdown article:</p> + +<pre><code>This is a regular paragraph. + +<table> + <tr> + <td>Foo</td> + </tr> +</table> + +This is another regular paragraph. +</code></pre> + +<p>Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an +HTML block.</p> + +<p>Span-level HTML tags -- e.g. <code><span></code>, <code><cite></code>, or <code><del></code> -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML <code><a></code> or <code><img></code> tags instead of Markdown's +link or image syntax, go right ahead.</p> + +<p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within +span-level tags.</p> + +<h3 id="autoescape">Automatic Escaping for Special Characters</h3> + +<p>In HTML, there are two characters that demand special treatment: <code><</code> +and <code>&</code>. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. <code>&lt;</code>, and +<code>&amp;</code>.</p> + +<p>Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write '<code>AT&amp;T</code>'. You even need to +escape ampersands within URLs. Thus, if you want to link to:</p> + +<pre><code>http://images.google.com/images?num=30&q=larry+bird +</code></pre> + +<p>you need to encode the URL as:</p> + +<pre><code>http://images.google.com/images?num=30&amp;q=larry+bird +</code></pre> + +<p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites.</p> + +<p>Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into <code>&amp;</code>.</p> + +<p>So, if you want to include a copyright symbol in your article, you can write:</p> + +<pre><code>&copy; +</code></pre> + +<p>and Markdown will leave it alone. But if you write:</p> + +<pre><code>AT&T +</code></pre> + +<p>Markdown will translate it to:</p> + +<pre><code>AT&amp;T +</code></pre> + +<p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write:</p> + +<pre><code>4 < 5 +</code></pre> + +<p>Markdown will translate it to:</p> + +<pre><code>4 &lt; 5 +</code></pre> + +<p>However, inside Markdown code spans and blocks, angle brackets and +ampersands are <em>always</em> encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single <code><</code> +and <code>&</code> in your example code needs to be escaped.)</p> + +<hr /> + +<h2 id="block">Block Elements</h2> + +<h3 id="p">Paragraphs and Line Breaks</h3> + +<p>A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs.</p> + +<p>The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a <code><br /></code> tag.</p> + +<p>When you <em>do</em> want to insert a <code><br /></code> break tag using Markdown, you +end a line with two or more spaces, then type return.</p> + +<p>Yes, this takes a tad more effort to create a <code><br /></code>, but a simplistic +"every line break is a <code><br /></code>" rule wouldn't work for Markdown. +Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a> +work best -- and look better -- when you format them with hard breaks.</p> + +<h3 id="header">Headers</h3> + +<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.</p> + +<p>Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example:</p> + +<pre><code>This is an H1 +============= + +This is an H2 +------------- +</code></pre> + +<p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.</p> + +<p>Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example:</p> + +<pre><code># This is an H1 + +## This is an H2 + +###### This is an H6 +</code></pre> + +<p>Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) :</p> + +<pre><code># This is an H1 # + +## This is an H2 ## + +### This is an H3 ###### +</code></pre> + +<h3 id="blockquote">Blockquotes</h3> + +<p>Markdown uses email-style <code>></code> characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a <code>></code> before every line:</p> + +<pre><code>> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. +> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. +> +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse +> id sem consectetuer libero luctus adipiscing. +</code></pre> + +<p>Markdown allows you to be lazy and only put the <code>></code> before the first +line of a hard-wrapped paragraph:</p> + +<pre><code>> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. +Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse +id sem consectetuer libero luctus adipiscing. +</code></pre> + +<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of <code>></code>:</p> + +<pre><code>> This is the first level of quoting. +> +> > This is nested blockquote. +> +> Back to the first level. +</code></pre> + +<p>Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks:</p> + +<pre><code>> ## This is a header. +> +> 1. This is the first list item. +> 2. This is the second list item. +> +> Here's some example code: +> +> return shell_exec("echo $input | $markdown_script"); +</code></pre> + +<p>Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu.</p> + +<h3 id="list">Lists</h3> + +<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p> + +<p>Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers:</p> + +<pre><code>* Red +* Green +* Blue +</code></pre> + +<p>is equivalent to:</p> + +<pre><code>+ Red ++ Green ++ Blue +</code></pre> + +<p>and:</p> + +<pre><code>- Red +- Green +- Blue +</code></pre> + +<p>Ordered lists use numbers followed by periods:</p> + +<pre><code>1. Bird +2. McHale +3. Parish +</code></pre> + +<p>It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is:</p> + +<pre><code><ol> +<li>Bird</li> +<li>McHale</li> +<li>Parish</li> +</ol> +</code></pre> + +<p>If you instead wrote the list in Markdown like this:</p> + +<pre><code>1. Bird +1. McHale +1. Parish +</code></pre> + +<p>or even:</p> + +<pre><code>3. Bird +1. McHale +8. Parish +</code></pre> + +<p>you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to.</p> + +<p>If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number.</p> + +<p>List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab.</p> + +<p>To make lists look nice, you can wrap items with hanging indents:</p> + +<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. +</code></pre> + +<p>But if you want to be lazy, you don't have to:</p> + +<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, +viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. +Suspendisse id sem consectetuer libero luctus adipiscing. +</code></pre> + +<p>If list items are separated by blank lines, Markdown will wrap the +items in <code><p></code> tags in the HTML output. For example, this input:</p> + +<pre><code>* Bird +* Magic +</code></pre> + +<p>will turn into:</p> + +<pre><code><ul> +<li>Bird</li> +<li>Magic</li> +</ul> +</code></pre> + +<p>But this:</p> + +<pre><code>* Bird + +* Magic +</code></pre> + +<p>will turn into:</p> + +<pre><code><ul> +<li><p>Bird</p></li> +<li><p>Magic</p></li> +</ul> +</code></pre> + +<p>List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be intended by either 4 spaces +or one tab:</p> + +<pre><code>1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + +2. Suspendisse id sem consectetuer libero luctus adipiscing. +</code></pre> + +<p>It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy:</p> + +<pre><code>* This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're +only required to indent the first line. Lorem ipsum dolor +sit amet, consectetuer adipiscing elit. + +* Another item in the same list. +</code></pre> + +<p>To put a blockquote within a list item, the blockquote's <code>></code> +delimiters need to be indented:</p> + +<pre><code>* A list item with a blockquote: + + > This is a blockquote + > inside a list item. +</code></pre> + +<p>To put a code block within a list item, the code block needs +to be indented <em>twice</em> -- 8 spaces or two tabs:</p> + +<pre><code>* A list item with a code block: + + <code goes here> +</code></pre> + +<p>It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this:</p> + +<pre><code>1986. What a great season. +</code></pre> + +<p>In other words, a <em>number-period-space</em> sequence at the beginning of a +line. To avoid this, you can backslash-escape the period:</p> + +<pre><code>1986\. What a great season. +</code></pre> + +<h3 id="precode">Code Blocks</h3> + +<p>Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both <code><pre></code> and <code><code></code> tags.</p> + +<p>To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input:</p> + +<pre><code>This is a normal paragraph: + + This is a code block. +</code></pre> + +<p>Markdown will generate:</p> + +<pre><code><p>This is a normal paragraph:</p> + +<pre><code>This is a code block. +</code></pre> +</code></pre> + +<p>One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this:</p> + +<pre><code>Here is an example of AppleScript: + + tell application "Foo" + beep + end tell +</code></pre> + +<p>will turn into:</p> + +<pre><code><p>Here is an example of AppleScript:</p> + +<pre><code>tell application "Foo" + beep +end tell +</code></pre> +</code></pre> + +<p>A code block continues until it reaches a line that is not indented +(or the end of the article).</p> + +<p>Within a code block, ampersands (<code>&</code>) and angle brackets (<code><</code> and <code>></code>) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this:</p> + +<pre><code> <div class="footer"> + &copy; 2004 Foo Corporation + </div> +</code></pre> + +<p>will turn into:</p> + +<pre><code><pre><code>&lt;div class="footer"&gt; + &amp;copy; 2004 Foo Corporation +&lt;/div&gt; +</code></pre> +</code></pre> + +<p>Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax.</p> + +<h3 id="hr">Horizontal Rules</h3> + +<p>You can produce a horizontal rule tag (<code><hr /></code>) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule:</p> + +<pre><code>* * * + +*** + +***** + +- - - + +--------------------------------------- + +_ _ _ +</code></pre> + +<hr /> + +<h2 id="span">Span Elements</h2> + +<h3 id="link">Links</h3> + +<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p> + +<p>In both styles, the link text is delimited by [square brackets].</p> + +<p>To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an <em>optional</em> +title for the link, surrounded in quotes. For example:</p> + +<pre><code>This is [an example](http://example.com/ "Title") inline link. + +[This link](http://example.net/) has no title attribute. +</code></pre> + +<p>Will produce:</p> + +<pre><code><p>This is <a href="http://example.com/" title="Title"> +an example</a> inline link.</p> + +<p><a href="http://example.net/">This link</a> has no +title attribute.</p> +</code></pre> + +<p>If you're referring to a local resource on the same server, you can +use relative paths:</p> + +<pre><code>See my [About](/about/) page for details. +</code></pre> + +<p>Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link:</p> + +<pre><code>This is [an example][id] reference-style link. +</code></pre> + +<p>You can optionally use a space to separate the sets of brackets:</p> + +<pre><code>This is [an example] [id] reference-style link. +</code></pre> + +<p>Then, anywhere in the document, you define your link label like this, +on a line by itself:</p> + +<pre><code>[id]: http://example.com/ "Optional Title Here" +</code></pre> + +<p>That is:</p> + +<ul> +<li>Square brackets containing the link identifier (optionally +indented from the left margin using up to three spaces);</li> +<li>followed by a colon;</li> +<li>followed by one or more spaces (or tabs);</li> +<li>followed by the URL for the link;</li> +<li>optionally followed by a title attribute for the link, enclosed +in double or single quotes.</li> +</ul> + +<p>The link URL may, optionally, be surrounded by angle brackets:</p> + +<pre><code>[id]: <http://example.com/> "Optional Title Here" +</code></pre> + +<p>You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs:</p> + +<pre><code>[id]: http://example.com/longish/path/to/resource/here + "Optional Title Here" +</code></pre> + +<p>Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output.</p> + +<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p> + +<pre><code>[link text][a] +[link text][A] +</code></pre> + +<p>are equivalent.</p> + +<p>The <em>implicit link name</em> shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write:</p> + +<pre><code>[Google][] +</code></pre> + +<p>And then define the link:</p> + +<pre><code>[Google]: http://google.com/ +</code></pre> + +<p>Because link names may contain spaces, this shortcut even works for +multiple words in the link text:</p> + +<pre><code>Visit [Daring Fireball][] for more information. +</code></pre> + +<p>And then define the link:</p> + +<pre><code>[Daring Fireball]: http://daringfireball.net/ +</code></pre> + +<p>Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes.</p> + +<p>Here's an example of reference links in action:</p> + +<pre><code>I get 10 times more traffic from [Google] [1] than from +[Yahoo] [2] or [MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" +</code></pre> + +<p>Using the implicit link name shortcut, you could instead write:</p> + +<pre><code>I get 10 times more traffic from [Google][] than from +[Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" +</code></pre> + +<p>Both of the above examples will produce the following HTML output:</p> + +<pre><code><p>I get 10 times more traffic from <a href="http://google.com/" +title="Google">Google</a> than from +<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a> +or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p> +</code></pre> + +<p>For comparison, here is the same paragraph written using +Markdown's inline link style:</p> + +<pre><code>I get 10 times more traffic from [Google](http://google.com/ "Google") +than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or +[MSN](http://search.msn.com/ "MSN Search"). +</code></pre> + +<p>The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text.</p> + +<p>With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose.</p> + +<h3 id="em">Emphasis</h3> + +<p>Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of +emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an +HTML <code><em></code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML +<code><strong></code> tag. E.g., this input:</p> + +<pre><code>*single asterisks* + +_single underscores_ + +**double asterisks** + +__double underscores__ +</code></pre> + +<p>will produce:</p> + +<pre><code><em>single asterisks</em> + +<em>single underscores</em> + +<strong>double asterisks</strong> + +<strong>double underscores</strong> +</code></pre> + +<p>You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span.</p> + +<p>Emphasis can be used in the middle of a word:</p> + +<pre><code>un*fucking*believable +</code></pre> + +<p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a +literal asterisk or underscore.</p> + +<p>To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it:</p> + +<pre><code>\*this text is surrounded by literal asterisks\* +</code></pre> + +<h3 id="code">Code</h3> + +<p>To indicate a span of code, wrap it with backtick quotes (<code>`</code>). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example:</p> + +<pre><code>Use the `printf()` function. +</code></pre> + +<p>will produce:</p> + +<pre><code><p>Use the <code>printf()</code> function.</p> +</code></pre> + +<p>To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters:</p> + +<pre><code>``There is a literal backtick (`) here.`` +</code></pre> + +<p>which will produce this:</p> + +<pre><code><p><code>There is a literal backtick (`) here.</code></p> +</code></pre> + +<p>The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span:</p> + +<pre><code>A single backtick in a code span: `` ` `` + +A backtick-delimited string in a code span: `` `foo` `` +</code></pre> + +<p>will produce:</p> + +<pre><code><p>A single backtick in a code span: <code>`</code></p> + +<p>A backtick-delimited string in a code span: <code>`foo`</code></p> +</code></pre> + +<p>With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this:</p> + +<pre><code>Please don't use any `<blink>` tags. +</code></pre> + +<p>into:</p> + +<pre><code><p>Please don't use any <code>&lt;blink&gt;</code> tags.</p> +</code></pre> + +<p>You can write this:</p> + +<pre><code>`&#8212;` is the decimal-encoded equivalent of `&mdash;`. +</code></pre> + +<p>to produce:</p> + +<pre><code><p><code>&amp;#8212;</code> is the decimal-encoded +equivalent of <code>&amp;mdash;</code>.</p> +</code></pre> + +<h3 id="img">Images</h3> + +<p>Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format.</p> + +<p>Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p> + +<p>Inline image syntax looks like this:</p> + +<pre><code> + + +</code></pre> + +<p>That is:</p> + +<ul> +<li>An exclamation mark: <code>!</code>;</li> +<li>followed by a set of square brackets, containing the <code>alt</code> +attribute text for the image;</li> +<li>followed by a set of parentheses, containing the URL or path to +the image, and an optional <code>title</code> attribute enclosed in double +or single quotes.</li> +</ul> + +<p>Reference-style image syntax looks like this:</p> + +<pre><code>![Alt text][id] +</code></pre> + +<p>Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references:</p> + +<pre><code>[id]: url/to/image "Optional title attribute" +</code></pre> + +<p>As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML <code><img></code> tags.</p> + +<hr /> + +<h2 id="misc">Miscellaneous</h2> + +<h3 id="autolink">Automatic Links</h3> + +<p>Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p> + +<pre><code><http://example.com/> +</code></pre> + +<p>Markdown will turn this into:</p> + +<pre><code><a href="http://example.com/">http://example.com/</a> +</code></pre> + +<p>Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this:</p> + +<pre><code><address@example.com> +</code></pre> + +<p>into something like this:</p> + +<pre><code><a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65; +&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111; +&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61; +&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a> +</code></pre> + +<p>which will render in a browser as a clickable link to "address@example.com".</p> + +<p>(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.)</p> + +<h3 id="backslash">Backslash Escapes</h3> + +<p>Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word with +literal asterisks (instead of an HTML <code><em></code> tag), you can backslashes +before the asterisks, like this:</p> + +<pre><code>\*literal asterisks\* +</code></pre> + +<p>Markdown provides backslash escapes for the following characters:</p> + +<pre><code>\ backslash +` backtick +* asterisk +_ underscore +{} curly braces +[] square brackets +() parentheses +# hash mark ++ plus sign +- minus sign (hyphen) +. dot +! exclamation mark +</code></pre> diff --git a/regress/original/Markdown_Documentation_-_Syntax.text b/regress/original/Markdown_Documentation_-_Syntax.text @@ -0,0 +1,888 @@ +Markdown: Syntax +================ + +<ul id="ProjectSubmenu"> + <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li> + <li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li> + <li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li> + <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li> + <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li> +</ul> + + +* [Overview](#overview) + * [Philosophy](#philosophy) + * [Inline HTML](#html) + * [Automatic Escaping for Special Characters](#autoescape) +* [Block Elements](#block) + * [Paragraphs and Line Breaks](#p) + * [Headers](#header) + * [Blockquotes](#blockquote) + * [Lists](#list) + * [Code Blocks](#precode) + * [Horizontal Rules](#hr) +* [Span Elements](#span) + * [Links](#link) + * [Emphasis](#em) + * [Code](#code) + * [Images](#img) +* [Miscellaneous](#misc) + * [Backslash Escapes](#backslash) + * [Automatic Links](#autolink) + + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL][src]. + + [src]: /projects/markdown/syntax.text + +* * * + +<h2 id="overview">Overview</h2> + +<h3 id="philosophy">Philosophy</h3> + +Markdown is intended to be as easy-to-read and easy-to-write as is feasible. + +Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], +[Grutatext] [5], and [EtText] [6] -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email. + + [1]: http://docutils.sourceforge.net/mirror/setext.html + [2]: http://www.aaronsw.com/2002/atx/ + [3]: http://textism.com/tools/textile/ + [4]: http://docutils.sourceforge.net/rst.html + [5]: http://www.triptico.com/software/grutatxt.html + [6]: http://ettext.taint.org/doc/ + +To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like \*emphasis\*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email. + + + +<h3 id="html">Inline HTML</h3> + +Markdown's syntax is intended for one purpose: to be used as a +format for *writing* for the web. + +Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is *not* to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a *publishing* format; Markdown is a *writing* +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text. + +For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags. + +The only restrictions are that block-level HTML elements -- e.g. `<div>`, +`<table>`, `<pre>`, `<p>`, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) `<p>` tags around HTML block-level tags. + +For example, to add an HTML table to a Markdown article: + + This is a regular paragraph. + + <table> + <tr> + <td>Foo</td> + </tr> + </table> + + This is another regular paragraph. + +Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an +HTML block. + +Span-level HTML tags -- e.g. `<span>`, `<cite>`, or `<del>` -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML `<a>` or `<img>` tags instead of Markdown's +link or image syntax, go right ahead. + +Unlike block-level HTML tags, Markdown syntax *is* processed within +span-level tags. + + +<h3 id="autoescape">Automatic Escaping for Special Characters</h3> + +In HTML, there are two characters that demand special treatment: `<` +and `&`. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. `<`, and +`&`. + +Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write '`AT&T`'. You even need to +escape ampersands within URLs. Thus, if you want to link to: + + http://images.google.com/images?num=30&q=larry+bird + +you need to encode the URL as: + + http://images.google.com/images?num=30&q=larry+bird + +in your anchor tag `href` attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites. + +Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into `&`. + +So, if you want to include a copyright symbol in your article, you can write: + + © + +and Markdown will leave it alone. But if you write: + + AT&T + +Markdown will translate it to: + + AT&T + +Similarly, because Markdown supports [inline HTML](#html), if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write: + + 4 < 5 + +Markdown will translate it to: + + 4 < 5 + +However, inside Markdown code spans and blocks, angle brackets and +ampersands are *always* encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single `<` +and `&` in your example code needs to be escaped.) + + +* * * + + +<h2 id="block">Block Elements</h2> + + +<h3 id="p">Paragraphs and Line Breaks</h3> + +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs. + +The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a `<br />` tag. + +When you *do* want to insert a `<br />` break tag using Markdown, you +end a line with two or more spaces, then type return. + +Yes, this takes a tad more effort to create a `<br />`, but a simplistic +"every line break is a `<br />`" rule wouldn't work for Markdown. +Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] +work best -- and look better -- when you format them with hard breaks. + + [bq]: #blockquote + [l]: #list + + + +<h3 id="header">Headers</h3> + +Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. + +Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example: + + This is an H1 + ============= + + This is an H2 + ------------- + +Any number of underlining `=`'s or `-`'s will work. + +Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example: + + # This is an H1 + + ## This is an H2 + + ###### This is an H6 + +Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) : + + # This is an H1 # + + ## This is an H2 ## + + ### This is an H3 ###### + + +<h3 id="blockquote">Blockquotes</h3> + +Markdown uses email-style `>` characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a `>` before every line: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + > + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + > id sem consectetuer libero luctus adipiscing. + +Markdown allows you to be lazy and only put the `>` before the first +line of a hard-wrapped paragraph: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing. + +Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of `>`: + + > This is the first level of quoting. + > + > > This is nested blockquote. + > + > Back to the first level. + +Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks: + + > ## This is a header. + > + > 1. This is the first list item. + > 2. This is the second list item. + > + > Here's some example code: + > + > return shell_exec("echo $input | $markdown_script"); + +Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu. + + +<h3 id="list">Lists</h3> + +Markdown supports ordered (numbered) and unordered (bulleted) lists. + +Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers: + + * Red + * Green + * Blue + +is equivalent to: + + + Red + + Green + + Blue + +and: + + - Red + - Green + - Blue + +Ordered lists use numbers followed by periods: + + 1. Bird + 2. McHale + 3. Parish + +It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is: + + <ol> + <li>Bird</li> + <li>McHale</li> + <li>Parish</li> + </ol> + +If you instead wrote the list in Markdown like this: + + 1. Bird + 1. McHale + 1. Parish + +or even: + + 3. Bird + 1. McHale + 8. Parish + +you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to. + +If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number. + +List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab. + +To make lists look nice, you can wrap items with hanging indents: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +But if you want to be lazy, you don't have to: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +If list items are separated by blank lines, Markdown will wrap the +items in `<p>` tags in the HTML output. For example, this input: + + * Bird + * Magic + +will turn into: + + <ul> + <li>Bird</li> + <li>Magic</li> + </ul> + +But this: + + * Bird + + * Magic + +will turn into: + + <ul> + <li><p>Bird</p></li> + <li><p>Magic</p></li> + </ul> + +List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be intended by either 4 spaces +or one tab: + + 1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + + 2. Suspendisse id sem consectetuer libero luctus adipiscing. + +It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy: + + * This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're + only required to indent the first line. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. + + * Another item in the same list. + +To put a blockquote within a list item, the blockquote's `>` +delimiters need to be indented: + + * A list item with a blockquote: + + > This is a blockquote + > inside a list item. + +To put a code block within a list item, the code block needs +to be indented *twice* -- 8 spaces or two tabs: + + * A list item with a code block: + + <code goes here> + + +It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this: + + 1986. What a great season. + +In other words, a *number-period-space* sequence at the beginning of a +line. To avoid this, you can backslash-escape the period: + + 1986\. What a great season. + + + +<h3 id="precode">Code Blocks</h3> + +Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both `<pre>` and `<code>` tags. + +To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input: + + This is a normal paragraph: + + This is a code block. + +Markdown will generate: + + <p>This is a normal paragraph:</p> + + <pre><code>This is a code block. + </code></pre> + +One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this: + + Here is an example of AppleScript: + + tell application "Foo" + beep + end tell + +will turn into: + + <p>Here is an example of AppleScript:</p> + + <pre><code>tell application "Foo" + beep + end tell + </code></pre> + +A code block continues until it reaches a line that is not indented +(or the end of the article). + +Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this: + + <div class="footer"> + © 2004 Foo Corporation + </div> + +will turn into: + + <pre><code><div class="footer"> + &copy; 2004 Foo Corporation + </div> + </code></pre> + +Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax. + + + +<h3 id="hr">Horizontal Rules</h3> + +You can produce a horizontal rule tag (`<hr />`) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule: + + * * * + + *** + + ***** + + - - - + + --------------------------------------- + + _ _ _ + + +* * * + +<h2 id="span">Span Elements</h2> + +<h3 id="link">Links</h3> + +Markdown supports two style of links: *inline* and *reference*. + +In both styles, the link text is delimited by [square brackets]. + +To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an *optional* +title for the link, surrounded in quotes. For example: + + This is [an example](http://example.com/ "Title") inline link. + + [This link](http://example.net/) has no title attribute. + +Will produce: + + <p>This is <a href="http://example.com/" title="Title"> + an example</a> inline link.</p> + + <p><a href="http://example.net/">This link</a> has no + title attribute.</p> + +If you're referring to a local resource on the same server, you can +use relative paths: + + See my [About](/about/) page for details. + +Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link: + + This is [an example][id] reference-style link. + +You can optionally use a space to separate the sets of brackets: + + This is [an example] [id] reference-style link. + +Then, anywhere in the document, you define your link label like this, +on a line by itself: + + [id]: http://example.com/ "Optional Title Here" + +That is: + +* Square brackets containing the link identifier (optionally + indented from the left margin using up to three spaces); +* followed by a colon; +* followed by one or more spaces (or tabs); +* followed by the URL for the link; +* optionally followed by a title attribute for the link, enclosed + in double or single quotes. + +The link URL may, optionally, be surrounded by angle brackets: + + [id]: <http://example.com/> "Optional Title Here" + +You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs: + + [id]: http://example.com/longish/path/to/resource/here + "Optional Title Here" + +Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output. + +Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: + + [link text][a] + [link text][A] + +are equivalent. + +The *implicit link name* shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write: + + [Google][] + +And then define the link: + + [Google]: http://google.com/ + +Because link names may contain spaces, this shortcut even works for +multiple words in the link text: + + Visit [Daring Fireball][] for more information. + +And then define the link: + + [Daring Fireball]: http://daringfireball.net/ + +Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes. + +Here's an example of reference links in action: + + I get 10 times more traffic from [Google] [1] than from + [Yahoo] [2] or [MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Using the implicit link name shortcut, you could instead write: + + I get 10 times more traffic from [Google][] than from + [Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" + +Both of the above examples will produce the following HTML output: + + <p>I get 10 times more traffic from <a href="http://google.com/" + title="Google">Google</a> than from + <a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a> + or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p> + +For comparison, here is the same paragraph written using +Markdown's inline link style: + + I get 10 times more traffic from [Google](http://google.com/ "Google") + than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or + [MSN](http://search.msn.com/ "MSN Search"). + +The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text. + +With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose. + + +<h3 id="em">Emphasis</h3> + +Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +emphasis. Text wrapped with one `*` or `_` will be wrapped with an +HTML `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML +`<strong>` tag. E.g., this input: + + *single asterisks* + + _single underscores_ + + **double asterisks** + + __double underscores__ + +will produce: + + <em>single asterisks</em> + + <em>single underscores</em> + + <strong>double asterisks</strong> + + <strong>double underscores</strong> + +You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span. + +Emphasis can be used in the middle of a word: + + un*fucking*believable + +But if you surround an `*` or `_` with spaces, it'll be treated as a +literal asterisk or underscore. + +To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it: + + \*this text is surrounded by literal asterisks\* + + + +<h3 id="code">Code</h3> + +To indicate a span of code, wrap it with backtick quotes (`` ` ``). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example: + + Use the `printf()` function. + +will produce: + + <p>Use the <code>printf()</code> function.</p> + +To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters: + + ``There is a literal backtick (`) here.`` + +which will produce this: + + <p><code>There is a literal backtick (`) here.</code></p> + +The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span: + + A single backtick in a code span: `` ` `` + + A backtick-delimited string in a code span: `` `foo` `` + +will produce: + + <p>A single backtick in a code span: <code>`</code></p> + + <p>A backtick-delimited string in a code span: <code>`foo`</code></p> + +With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this: + + Please don't use any `<blink>` tags. + +into: + + <p>Please don't use any <code><blink></code> tags.</p> + +You can write this: + + `—` is the decimal-encoded equivalent of `—`. + +to produce: + + <p><code>&#8212;</code> is the decimal-encoded + equivalent of <code>&mdash;</code>.</p> + + + +<h3 id="img">Images</h3> + +Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format. + +Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: *inline* and *reference*. + +Inline image syntax looks like this: + +  + +  + +That is: + +* An exclamation mark: `!`; +* followed by a set of square brackets, containing the `alt` + attribute text for the image; +* followed by a set of parentheses, containing the URL or path to + the image, and an optional `title` attribute enclosed in double + or single quotes. + +Reference-style image syntax looks like this: + + ![Alt text][id] + +Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references: + + [id]: url/to/image "Optional title attribute" + +As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML `<img>` tags. + + +* * * + + +<h2 id="misc">Miscellaneous</h2> + +<h3 id="autolink">Automatic Links</h3> + +Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: + + <http://example.com/> + +Markdown will turn this into: + + <a href="http://example.com/">http://example.com/</a> + +Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this: + + <address@example.com> + +into something like this: + + <a href="mailto:addre + ss@example.co + m">address@exa + mple.com</a> + +which will render in a browser as a clickable link to "address@example.com". + +(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.) + + + +<h3 id="backslash">Backslash Escapes</h3> + +Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word with +literal asterisks (instead of an HTML `<em>` tag), you can backslashes +before the asterisks, like this: + + \*literal asterisks\* + +Markdown provides backslash escapes for the following characters: + + \ backslash + ` backtick + * asterisk + _ underscore + {} curly braces + [] square brackets + () parentheses + # hash mark + + plus sign + - minus sign (hyphen) + . dot + ! exclamation mark + diff --git a/regress/original/Nested_blockquotes.html b/regress/original/Nested_blockquotes.html @@ -0,0 +1,9 @@ +<blockquote> + <p>foo</p> + + <blockquote> + <p>bar</p> + </blockquote> + + <p>foo</p> +</blockquote> diff --git a/regress/original/Nested_blockquotes.text b/regress/original/Nested_blockquotes.text @@ -0,0 +1,5 @@ +> foo +> +> > bar +> +> foo diff --git a/regress/original/Ordered_and_unordered_lists.html b/regress/original/Ordered_and_unordered_lists.html @@ -0,0 +1,150 @@ +<h2>Unordered</h2> + +<p>Asterisks tight:</p> + +<ul> +<li>asterisk 1</li> +<li>asterisk 2</li> +<li>asterisk 3</li> +</ul> + +<p>Asterisks loose:</p> + +<ul> +<li><p>asterisk 1</p></li> +<li><p>asterisk 2</p></li> +<li><p>asterisk 3</p></li> +</ul> + +<hr /> + +<p>Pluses tight:</p> + +<ul> +<li>Plus 1</li> +<li>Plus 2</li> +<li>Plus 3</li> +</ul> + +<p>Pluses loose:</p> + +<ul> +<li><p>Plus 1</p></li> +<li><p>Plus 2</p></li> +<li><p>Plus 3</p></li> +</ul> + +<hr /> + +<p>Minuses tight:</p> + +<ul> +<li>Minus 1</li> +<li>Minus 2</li> +<li>Minus 3</li> +</ul> + +<p>Minuses loose:</p> + +<ul> +<li><p>Minus 1</p></li> +<li><p>Minus 2</p></li> +<li><p>Minus 3</p></li> +</ul> + +<h2>Ordered</h2> + +<p>Tight:</p> + +<ol> +<li>First</li> +<li>Second</li> +<li>Third</li> +</ol> + +<p>and:</p> + +<ol> +<li>One</li> +<li>Two</li> +<li>Three</li> +</ol> + +<p>Loose using tabs:</p> + +<ol> +<li><p>First</p></li> +<li><p>Second</p></li> +<li><p>Third</p></li> +</ol> + +<p>and using spaces:</p> + +<ol> +<li><p>One</p></li> +<li><p>Two</p></li> +<li><p>Three</p></li> +</ol> + +<p>Multiple paragraphs:</p> + +<ol> +<li><p>Item 1, graf one.</p> + +<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's +back.</p></li> +<li><p>Item 2.</p></li> +<li><p>Item 3.</p></li> +</ol> + +<h2>Nested</h2> + +<ul> +<li>Tab +<ul> +<li>Tab +<ul> +<li>Tab</li> +</ul></li> +</ul></li> +</ul> + +<p>Here's another:</p> + +<ol> +<li>First</li> +<li>Second: +<ul> +<li>Fee</li> +<li>Fie</li> +<li>Foe</li> +</ul></li> +<li>Third</li> +</ol> + +<p>Same thing but with paragraphs:</p> + +<ol> +<li><p>First</p></li> +<li><p>Second:</p> + +<ul> +<li>Fee</li> +<li>Fie</li> +<li>Foe</li> +</ul></li> +<li><p>Third</p></li> +</ol> + + +<p>This was an error in Markdown 1.0.1:</p> + +<ul> +<li><p>this</p> + +<ul> +<li>sub</li> +</ul> + +<p>that</p></li> +</ul> diff --git a/regress/original/Ordered_and_unordered_lists.text b/regress/original/Ordered_and_unordered_lists.text @@ -0,0 +1,131 @@ +## Unordered + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 + + +Asterisks loose: + +* asterisk 1 + +* asterisk 2 + +* asterisk 3 + +* * * + +Pluses tight: + ++ Plus 1 ++ Plus 2 ++ Plus 3 + + +Pluses loose: + ++ Plus 1 + ++ Plus 2 + ++ Plus 3 + +* * * + + +Minuses tight: + +- Minus 1 +- Minus 2 +- Minus 3 + + +Minuses loose: + +- Minus 1 + +- Minus 2 + +- Minus 3 + + +## Ordered + +Tight: + +1. First +2. Second +3. Third + +and: + +1. One +2. Two +3. Three + + +Loose using tabs: + +1. First + +2. Second + +3. Third + +and using spaces: + +1. One + +2. Two + +3. Three + +Multiple paragraphs: + +1. Item 1, graf one. + + Item 2. graf two. The quick brown fox jumped over the lazy dog's + back. + +2. Item 2. + +3. Item 3. + + + +## Nested + +* Tab + * Tab + * Tab + +Here's another: + +1. First +2. Second: + * Fee + * Fie + * Foe +3. Third + +Same thing but with paragraphs: + +1. First + +2. Second: + * Fee + * Fie + * Foe + +3. Third + + +This was an error in Markdown 1.0.1: + +* this + + * sub + + that diff --git a/regress/original/README b/regress/original/README @@ -0,0 +1 @@ +From MarkdownTest 1.0.3. diff --git a/regress/original/README.md b/regress/original/README.md @@ -0,0 +1,9 @@ +This contains Markdown testing suites, currently limited to the Markdown +suite v1.0.3. + +It's in its original form except for the following: + +- *Amps and angle encoding.html* has line 9 changed so that \> must be + escaped to \>. + +- *Backslash escapes.html* has the same change on line 23. diff --git a/regress/original/Strong_and_em_together.html b/regress/original/Strong_and_em_together.html @@ -0,0 +1,7 @@ +<p><strong><em>This is strong and em.</em></strong></p> + +<p>So is <strong><em>this</em></strong> word.</p> + +<p><strong><em>This is strong and em.</em></strong></p> + +<p>So is <strong><em>this</em></strong> word.</p> diff --git a/regress/original/Strong_and_em_together.text b/regress/original/Strong_and_em_together.text @@ -0,0 +1,7 @@ +***This is strong and em.*** + +So is ***this*** word. + +___This is strong and em.___ + +So is ___this___ word. diff --git a/regress/original/Tabs.html b/regress/original/Tabs.html @@ -0,0 +1,25 @@ +<ul> +<li><p>this is a list item +indented with tabs</p></li> +<li><p>this is a list item +indented with spaces</p></li> +</ul> + +<p>Code:</p> + +<pre><code>this code block is indented by one tab +</code></pre> + +<p>And:</p> + +<pre><code> this code block is indented by two tabs +</code></pre> + +<p>And:</p> + +<pre><code>+ this is an example list item + indented with tabs + ++ this is an example list item + indented with spaces +</code></pre> diff --git a/regress/original/Tabs.text b/regress/original/Tabs.text @@ -0,0 +1,21 @@ ++ this is a list item + indented with tabs + ++ this is a list item + indented with spaces + +Code: + + this code block is indented by one tab + +And: + + this code block is indented by two tabs + +And: + + + this is an example list item + indented with tabs + + + this is an example list item + indented with spaces diff --git a/regress/original/Tidyness.html b/regress/original/Tidyness.html @@ -0,0 +1,8 @@ +<blockquote> +<p>A list within a blockquote:</p> +<ul> +<li>asterisk 1</li> +<li>asterisk 2</li> +<li>asterisk 3</li> +</ul> +</blockquote> diff --git a/regress/original/Tidyness.text b/regress/original/Tidyness.text @@ -0,0 +1,5 @@ +> A list within a blockquote: +> +> * asterisk 1 +> * asterisk 2 +> * asterisk 3 diff --git a/regress/shift-heading-level-by-neg.gemini b/regress/shift-heading-level-by-neg.gemini @@ -0,0 +1,19 @@ +# header 1 + +1 + +# header 2 + +2 + +## header 3 + +3 + +### header 4 + +4 + +#### header 5 + +5 diff --git a/regress/shift-heading-level-by-neg.html b/regress/shift-heading-level-by-neg.html @@ -0,0 +1,19 @@ +<h1 id="header-1">header 1</h1> + +<p>1</p> + +<h1 id="header-2">header 2</h1> + +<p>2</p> + +<h2 id="header-3">header 3</h2> + +<p>3</p> + +<h3 id="header-4">header 4</h3> + +<p>4</p> + +<h4 id="header-5">header 5</h4> + +<p>5</p> diff --git a/regress/shift-heading-level-by-neg.latex b/regress/shift-heading-level-by-neg.latex @@ -0,0 +1,24 @@ +\hypertarget{header-1}{% +\section{header 1}\label{header-1}} + +1 + +\hypertarget{header-2}{% +\section{header 2}\label{header-2}} + +2 + +\hypertarget{header-3}{% +\subsection{header 3}\label{header-3}} + +3 + +\hypertarget{header-4}{% +\subsubsection{header 4}\label{header-4}} + +4 + +\hypertarget{header-5}{% +\paragraph{header 5}\label{header-5}} + +5 diff --git a/regress/shift-heading-level-by-neg.man b/regress/shift-heading-level-by-neg.man @@ -0,0 +1,20 @@ +.SH +header 1 +.LP +1 +.SH +header 2 +.LP +2 +.SS +header 3 +.LP +3 +.SS +header 4 +.LP +4 +.SS +header 5 +.LP +5 diff --git a/regress/shift-heading-level-by-neg.md b/regress/shift-heading-level-by-neg.md @@ -0,0 +1,21 @@ +shift heading level by: -1 + +# header 1 + +1 + +## header 2 + +2 + +### header 3 + +3 + +#### header 4 + +4 + +##### header 5 + +5 diff --git a/regress/shift-heading-level-by-neg.ms b/regress/shift-heading-level-by-neg.ms @@ -0,0 +1,30 @@ +.NH 1 +header 1 +.pdfhref O 1 header 1 +.pdfhref M header-1 +.LP +1 +.NH 1 +header 2 +.pdfhref O 1 header 2 +.pdfhref M header-2 +.LP +2 +.NH 2 +header 3 +.pdfhref O 2 header 3 +.pdfhref M header-3 +.LP +3 +.NH 3 +header 4 +.pdfhref O 3 header 4 +.pdfhref M header-4 +.LP +4 +.NH 4 +header 5 +.pdfhref O 4 header 5 +.pdfhref M header-5 +.LP +5 diff --git a/regress/shift-heading-level-by-pos.gemini b/regress/shift-heading-level-by-pos.gemini @@ -0,0 +1,19 @@ +### header 1 + +1 + +#### header 2 + +2 + +##### header 3 + +3 + +###### header 4 + +4 + +####### header 5 + +5 diff --git a/regress/shift-heading-level-by-pos.html b/regress/shift-heading-level-by-pos.html @@ -0,0 +1,19 @@ +<h3 id="header-1">header 1</h3> + +<p>1</p> + +<h4 id="header-2">header 2</h4> + +<p>2</p> + +<h5 id="header-3">header 3</h5> + +<p>3</p> + +<h6 id="header-4">header 4</h6> + +<p>4</p> + +<h6 id="header-5">header 5</h6> + +<p>5</p> diff --git a/regress/shift-heading-level-by-pos.latex b/regress/shift-heading-level-by-pos.latex @@ -0,0 +1,24 @@ +\hypertarget{header-1}{% +\subsubsection{header 1}\label{header-1}} + +1 + +\hypertarget{header-2}{% +\paragraph{header 2}\label{header-2}} + +2 + +\hypertarget{header-3}{% +\subparagraph{header 3}\label{header-3}} + +3 + +\hypertarget{header-4}{% +\subparagraph{header 4}\label{header-4}} + +4 + +\hypertarget{header-5}{% +\subparagraph{header 5}\label{header-5}} + +5 diff --git a/regress/shift-heading-level-by-pos.man b/regress/shift-heading-level-by-pos.man @@ -0,0 +1,20 @@ +.SS +header 1 +.LP +1 +.SS +header 2 +.LP +2 +.SS +header 3 +.LP +3 +.SS +header 4 +.LP +4 +.SS +header 5 +.LP +5 diff --git a/regress/shift-heading-level-by-pos.md b/regress/shift-heading-level-by-pos.md @@ -0,0 +1,21 @@ +shift heading level by: 2 + +# header 1 + +1 + +## header 2 + +2 + +### header 3 + +3 + +#### header 4 + +4 + +##### header 5 + +5 diff --git a/regress/shift-heading-level-by-pos.ms b/regress/shift-heading-level-by-pos.ms @@ -0,0 +1,30 @@ +.NH 3 +header 1 +.pdfhref O 3 header 1 +.pdfhref M header-1 +.LP +1 +.NH 4 +header 2 +.pdfhref O 4 header 2 +.pdfhref M header-2 +.LP +2 +.NH 5 +header 3 +.pdfhref O 5 header 3 +.pdfhref M header-3 +.LP +3 +.NH 6 +header 4 +.pdfhref O 6 header 4 +.pdfhref M header-4 +.LP +4 +.NH 7 +header 5 +.pdfhref O 7 header 5 +.pdfhref M header-5 +.LP +5 diff --git a/regress/shift-heading-level-by-zero.gemini b/regress/shift-heading-level-by-zero.gemini @@ -0,0 +1,19 @@ +# header 1 + +1 + +## header 2 + +2 + +### header 3 + +3 + +#### header 4 + +4 + +##### header 5 + +5 diff --git a/regress/shift-heading-level-by-zero.html b/regress/shift-heading-level-by-zero.html @@ -0,0 +1,19 @@ +<h1 id="header-1">header 1</h1> + +<p>1</p> + +<h2 id="header-2">header 2</h2> + +<p>2</p> + +<h3 id="header-3">header 3</h3> + +<p>3</p> + +<h4 id="header-4">header 4</h4> + +<p>4</p> + +<h5 id="header-5">header 5</h5> + +<p>5</p> diff --git a/regress/shift-heading-level-by-zero.latex b/regress/shift-heading-level-by-zero.latex @@ -0,0 +1,24 @@ +\hypertarget{header-1}{% +\section{header 1}\label{header-1}} + +1 + +\hypertarget{header-2}{% +\subsection{header 2}\label{header-2}} + +2 + +\hypertarget{header-3}{% +\subsubsection{header 3}\label{header-3}} + +3 + +\hypertarget{header-4}{% +\paragraph{header 4}\label{header-4}} + +4 + +\hypertarget{header-5}{% +\subparagraph{header 5}\label{header-5}} + +5 diff --git a/regress/shift-heading-level-by-zero.man b/regress/shift-heading-level-by-zero.man @@ -0,0 +1,20 @@ +.SH +header 1 +.LP +1 +.SS +header 2 +.LP +2 +.SS +header 3 +.LP +3 +.SS +header 4 +.LP +4 +.SS +header 5 +.LP +5 diff --git a/regress/shift-heading-level-by-zero.md b/regress/shift-heading-level-by-zero.md @@ -0,0 +1,21 @@ +shift heading level by: 0 + +# header 1 + +1 + +## header 2 + +2 + +### header 3 + +3 + +#### header 4 + +4 + +##### header 5 + +5 diff --git a/regress/shift-heading-level-by-zero.ms b/regress/shift-heading-level-by-zero.ms @@ -0,0 +1,30 @@ +.NH 1 +header 1 +.pdfhref O 1 header 1 +.pdfhref M header-1 +.LP +1 +.NH 2 +header 2 +.pdfhref O 2 header 2 +.pdfhref M header-2 +.LP +2 +.NH 3 +header 3 +.pdfhref O 3 header 3 +.pdfhref M header-3 +.LP +3 +.NH 4 +header 4 +.pdfhref O 4 header 4 +.pdfhref M header-4 +.LP +4 +.NH 5 +header 5 +.pdfhref O 5 header 5 +.pdfhref M header-5 +.LP +5 diff --git a/regress/simple.fodt b/regress/simple.fodt @@ -0,0 +1,304 @@ +<office:automatic-styles> +<style:style style:family="paragraph" style:name="P1" style:parent-style-name="Heading_20_1"> +</style:style> +<style:style style:family="paragraph" style:name="P2" style:parent-style-name="Standard"> +</style:style> +<style:style style:family="text" style:name="T1"> +<style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"/> +</style:style> +<style:style style:family="text" style:name="T2"> +<style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/> +</style:style> +<text:list-style style:name="L1"> +<text:list-level-style-bullet text:level="1" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.270cm" fo:text-indent="-0.635cm" fo:margin-left="1.270cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="2" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.905cm" fo:text-indent="-0.635cm" fo:margin-left="1.905cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="3" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.540cm" fo:text-indent="-0.635cm" fo:margin-left="2.540cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="4" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="3.175cm" fo:text-indent="-0.635cm" fo:margin-left="3.175cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="5" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="3.810cm" fo:text-indent="-0.635cm" fo:margin-left="3.810cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="6" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="4.445cm" fo:text-indent="-0.635cm" fo:margin-left="4.445cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="7" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="5.080cm" fo:text-indent="-0.635cm" fo:margin-left="5.080cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="8" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="5.715cm" fo:text-indent="-0.635cm" fo:margin-left="5.715cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="9" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="6.350cm" fo:text-indent="-0.635cm" fo:margin-left="6.350cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +<text:list-level-style-bullet text:level="10" text:style-name="Bullet_20_Symbols" text:bullet-char="•"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="6.985cm" fo:text-indent="-0.635cm" fo:margin-left="6.985cm"/> +</style:list-level-properties> +</text:list-level-style-bullet> +</text:list-style> +<style:style style:family="paragraph" style:name="P3" style:parent-style-name="Standard" style:list-style-name="L1"> +</style:style> +<style:style style:family="paragraph" style:name="P4" style:parent-style-name="Standard"> +<style:paragraph-properties fo:margin-left="1.250cm" fo:margin-right="0cm" fo:text-indent="0cm" style:auto-text-indent="false"/> +</style:style> +<style:style style:family="paragraph" style:name="P5" style:parent-style-name="Heading_20_2"> +</style:style> +<text:list-style style:name="L2"> +<text:list-level-style-number text:level="1" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.270cm" fo:text-indent="-0.635cm" fo:margin-left="1.270cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="2" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.905cm" fo:text-indent="-0.635cm" fo:margin-left="1.905cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="3" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.540cm" fo:text-indent="-0.635cm" fo:margin-left="2.540cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="4" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="3.175cm" fo:text-indent="-0.635cm" fo:margin-left="3.175cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="5" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="3.810cm" fo:text-indent="-0.635cm" fo:margin-left="3.810cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="6" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="4.445cm" fo:text-indent="-0.635cm" fo:margin-left="4.445cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="7" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="5.080cm" fo:text-indent="-0.635cm" fo:margin-left="5.080cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="8" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="5.715cm" fo:text-indent="-0.635cm" fo:margin-left="5.715cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="9" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="6.350cm" fo:text-indent="-0.635cm" fo:margin-left="6.350cm"/> +</style:list-level-properties> +</text:list-level-style-number> +<text:list-level-style-number text:level="10" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1"> +<style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> +<style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="6.985cm" fo:text-indent="-0.635cm" fo:margin-left="6.985cm"/> +</style:list-level-properties> +</text:list-level-style-number> +</text:list-style> +<style:style style:family="paragraph" style:name="P6" style:parent-style-name="Standard" style:list-style-name="L2"> +</style:style> +<style:style style:family="paragraph" style:name="P7" style:parent-style-name="Preformatted_20_Text"> +</style:style> +<style:style style:family="paragraph" style:name="P8" style:parent-style-name="Heading_20_3"> +</style:style> +<style:style style:family="paragraph" style:name="P9" style:parent-style-name="Preformatted_20_Text"> +</style:style> +<style:style style:family="paragraph" style:name="P10" style:parent-style-name="Table_20_Contents"> +</style:style> +<style:style style:family="table" style:name="Table1"> +<style:table-properties fo:margin-left="0.000cm" fo:margin-right="0cm" table:align="margins"/> +</style:style> +<style:page-layout style:name="pm1"> +<style:page-layout-properties fo:page-width="21.001cm" fo:page-height="29.7cm" style:num-format="1" style:print-orientation="portrait" fo:margin-top="2cm" fo:margin-bottom="2cm" fo:margin-left="2cm" fo:margin-right="2cm" style:writing-mode="lr-tb" style:footnote-max-height="0cm"> +</style:page-layout-properties> +</style:page-layout> +</office:automatic-styles> +<office:master-styles> +<style:master-page style:name="Standard" style:page-layout-name="pm1"/> +</office:master-styles> +<office:body> +<office:text> +<text:h text:outline-level="1" text:style-name="P1"><text:bookmark-start text:name="an-h1-header" />An h1 header<text:bookmark-end text:name="an-h1-header" /></text:h> + +<text:p text:style-name="P2">Paragraphs are separated by a blank line.</text:p> + +<text:p text:style-name="P2">2nd paragraph. <text:span text:style-name="T1">Italic</text:span>, <text:span text:style-name="T2">bold</text:span>, and <text:span text:style-name="Source_20_Text">monospace</text:span>. Itemized lists +look like:</text:p> + +<text:list text:style-name="L1"> +<text:list-item><text:p text:style-name="P3">this one</text:p></text:list-item> +<text:list-item><text:p text:style-name="P3">that one</text:p></text:list-item> +<text:list-item><text:p text:style-name="P3">the other one</text:p></text:list-item> +</text:list> + +<text:p text:style-name="P2">Note that — not considering the asterisk — the actual text +content starts at 4-columns in.</text:p> +<text:p text:style-name="P4">Block quotes are +written like so.</text:p> + +<text:p text:style-name="P4">They can span multiple paragraphs, +if you like.</text:p> + +<text:p text:style-name="P2">Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., “it’s all +in chapters 12–14”). Three dots … will be converted to an ellipsis. +Unicode is supported. ☺</text:p> + +<text:h text:outline-level="2" text:style-name="P5"><text:bookmark-start text:name="an-h2-header" />An h2 header<text:bookmark-end text:name="an-h2-header" /></text:h> + +<text:p text:style-name="P2">Here’s a numbered list:</text:p> + +<text:list text:style-name="L2"> +<text:list-item><text:p text:style-name="P6">first item</text:p></text:list-item> +<text:list-item><text:p text:style-name="P6">second item</text:p></text:list-item> +<text:list-item><text:p text:style-name="P6">third item</text:p></text:list-item> +</text:list> + +<text:p text:style-name="P2">Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here’s a code sample:</text:p> + +<text:p text:style-name="P7"># Let me re-iterate ...</text:p> +<text:p text:style-name="P7">for i in 1 .. 10 { do-something(i) }</text:p> + +<text:p text:style-name="P2">As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like:</text:p> + +<text:p text:style-name="P7">define foobar() {</text:p> +<text:p text:style-name="P7"><text:s text:c="4"/>print "Welcome to flavor country!";</text:p> +<text:p text:style-name="P7">}</text:p> + +<text:p text:style-name="P2">(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it:</text:p> + +<text:p text:style-name="P7">import time</text:p> +<text:p text:style-name="P7"># Quick, count to ten!</text:p> +<text:p text:style-name="P7">for i in range(10):</text:p> +<text:p text:style-name="P7"><text:s text:c="4"/># (but not *too* quick)</text:p> +<text:p text:style-name="P7"><text:s text:c="4"/>time.sleep(0.5)</text:p> +<text:p text:style-name="P7"><text:s text:c="4"/>print i</text:p> + +<text:h text:outline-level="3" text:style-name="P8"><text:bookmark-start text:name="an-h3-header" />An h3 header<text:bookmark-end text:name="an-h3-header" /></text:h> + +<text:p text:style-name="P2">Now a nested list:</text:p> + +<text:list text:style-name="L2"> +<text:list-item><text:p text:style-name="P6">First, get these ingredients:</text:p> + +<text:list> +<text:list-item><text:p text:style-name="P6">carrots</text:p></text:list-item> +<text:list-item><text:p text:style-name="P6">celery</text:p></text:list-item> +<text:list-item><text:p text:style-name="P6">lentils</text:p></text:list-item> +</text:list></text:list-item> +<text:list-item><text:p text:style-name="P6">Boil some water.</text:p></text:list-item> +<text:list-item><text:p text:style-name="P6">Dump everything in the pot and follow +this algorithm:</text:p> + +<text:p text:style-name="P9">find wooden spoon</text:p> +<text:p text:style-name="P9">uncover pot</text:p> +<text:p text:style-name="P9">stir</text:p> +<text:p text:style-name="P9">cover pot</text:p> +<text:p text:style-name="P9">balance wooden spoon precariously on pot handle</text:p> +<text:p text:style-name="P9">wait 10 minutes</text:p> +<text:p text:style-name="P9">goto first step (or shut off burner when done)</text:p> + +<text:p text:style-name="P6">Do not bump wooden spoon or it will fall.</text:p></text:list-item> +</text:list> + +<text:p text:style-name="P2">Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above).</text:p> + +<text:p text:style-name="P2">Here’s a link to <text:a xlink:type="simple" text:style-name="Internet_20_Link" xlink:href="http://foo.bar">a website</text:a>, to a <text:a xlink:type="simple" text:style-name="Internet_20_Link" xlink:href="local-doc.html">local +doc</text:a>, and to a <text:a xlink:type="simple" text:style-name="Internet_20_Link" xlink:href="#an-h2-header">section heading in the current +doc</text:a>. Here’s a footnote <text:note text:id="ftn1" text:note-class="footnote"><text:note-citation>1</text:note-citation><text:note-body> +<text:p text:style-name="P2">Footnote text goes here.</text:p> +</text:note-body></text:note> +.</text:p> + +<text:p text:style-name="P2">Tables can look like this:</text:p> + +<text:p text:style-name="P2"> +<draw:frame draw:style-name="fr1" draw:name="Frame" draw:z-index="0"> +<draw:text-box fo:min-height="0.499cm" fo:min-width="0.34cm"> +<table:table table:style-name="Table1" table:name="Table1"> +<table:table-column table:number-columns-repeated="3"/> +<table:table-row> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">size</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">material</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">color</text:p></table:table-cell> +</table:table-row> +<table:table-row> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">9</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">leather</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">brown</text:p></table:table-cell> +</table:table-row> +<table:table-row> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">10</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">hemp canvas</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">natural</text:p></table:table-cell> +</table:table-row> +<table:table-row> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">11</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">glass</text:p></table:table-cell> +<table:table-cell office:value-type="string"><text:p text:style-name="P10">transparent</text:p></table:table-cell> +</table:table-row> +</table:table> +</draw:text-box> +</draw:frame> +</text:p> + +<text:p text:style-name="P2">Table: Shoes, their sizes, and what they’re made of</text:p> + +<text:p text:style-name="P2">(The above is the caption for the table.)</text:p> + +<text:p text:style-name="P2">A horizontal rule follows.</text:p> + +<text:p text:style-name="Horizontal_20_Line"/> + +<text:p text:style-name="P2">Here’s a definition list:</text:p> +<text:p text:style-name="P2">apples</text:p> + +<text:p text:style-name="P4">Good for making applesauce.</text:p> + +<text:p text:style-name="P2">oranges</text:p> + +<text:p text:style-name="P4">Citrus!</text:p> + +<text:p text:style-name="P2">tomatoes</text:p> + +<text:p text:style-name="P4">There’s no “e” in tomatoe.</text:p> + +<text:p text:style-name="P2">Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.)</text:p> + +<text:p text:style-name="P2">Images can be specified like so:</text:p> + +<text:p text:style-name="P2"><draw:frame draw:name="Image1" text:anchor-type="as-char" draw:z-index="0" draw:style-name="Graphics"><draw:image xlink:href="example-image.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" draw:filter-name="<All images>" /><svg:title>example image</svg:title></draw:frame></text:p> + +<text:p text:style-name="P2">And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: `foo`, *bar*, etc.</text:p> +</office:text> +</office:body> diff --git a/regress/simple.gemini b/regress/simple.gemini @@ -0,0 +1,120 @@ +# An h1 header + +Paragraphs are separated by a blank line. + +2nd paragraph. Italic, bold, and monospace. Itemized lists look like: + +* this one +* that one +* the other one + +Note that — not considering the asterisk — the actual text content starts at 4-columns in. + +> Block quotes are written like so. + +They can span multiple paragraphs, if you like. + +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., “it’s all in chapters 12–14”). Three dots … will be converted to an ellipsis. Unicode is supported. ☺ + +## An h2 header + +Here’s a numbered list: + +1. first item +2. second item +3. third item + +Note again how the actual text starts at 4 columns in (4 characters from the left side). Here’s a code sample: + +```# Let me re-iterate ... +for i in 1 .. 10 { do-something(i) } +``` + +As you probably guessed, indented 4 spaces. By the way, instead of indenting the block, you can use delimited blocks, if you like: + +```define foobar() { + print "Welcome to flavor country!"; +} +``` + +(which makes copying & pasting easier). You can optionally mark the delimited block for Pandoc to syntax highlight it: + +```import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +``` + +### An h3 header + +Now a nested list: + +1. First, get these ingredients: + +* carrots +* celery +* lentils + +2. Boil some water. + +3. Dump everything in the pot and follow this algorithm: + +```find wooden spoon +uncover pot +stir +cover pot +balance wooden spoon precariously on pot handle +wait 10 minutes +goto first step (or shut off burner when done) +``` + +Do not bump wooden spoon or it will fall. + +Notice again how text always lines up on 4-space indents (including that last line which continues item 3 above). + +Here’s a link to a website[a], to a local doc[b], and to a section heading in the current doc[c]. Here’s a footnote [1]. + +=> http://foo.bar [a] +=> local-doc.html [b] +=> #an-h2-header [c] + +Tables can look like this: + +``` +size | material | color +-----|-------------|------------- +9 | leather | brown +10 | hemp canvas | natural +11 | glass | transparent +``` + +Table: Shoes, their sizes, and what they’re made of + +(The above is the caption for the table.) + +A horizontal rule follows. + +~~~~~~~~ + +Here’s a definition list: + +apples +: Good for making applesauce. +oranges +: Citrus! +tomatoes +: There’s no “e” in tomatoe. + +Again, text is indented 4 spaces. (Put a blank line between each term/definition pair to spread things out more.) + +Images can be specified like so: + +=> example-image.jpg example image + +And note that you can backslash-escape any punctuation characters which you wish to be displayed literally, ex.: `foo`, *bar*, etc. + +~~~~~~~~ + +[1] Footnote text goes here. diff --git a/regress/simple.html b/regress/simple.html @@ -0,0 +1,174 @@ +<h1 id="an-h1-header">An h1 header</h1> + +<p>Paragraphs are separated by a blank line.</p> + +<p>2nd paragraph. <em>Italic</em>, <strong>bold</strong>, and <code>monospace</code>. Itemized lists +look like:</p> + +<ul> +<li>this one</li> +<li>that one</li> +<li>the other one</li> +</ul> + +<p>Note that — not considering the asterisk — the actual text +content starts at 4-columns in.</p> + +<blockquote> +<p>Block quotes are +written like so.</p> + +<p>They can span multiple paragraphs, +if you like.</p> +</blockquote> + +<p>Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., “it’s all +in chapters 12–14”). Three dots … will be converted to an ellipsis. +Unicode is supported. ☺</p> + +<h2 id="an-h2-header">An h2 header</h2> + +<p>Here’s a numbered list:</p> + +<ol> +<li>first item</li> +<li>second item</li> +<li>third item</li> +</ol> + +<p>Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here’s a code sample:</p> + +<pre><code># Let me re-iterate ... +for i in 1 .. 10 { do-something(i) } +</code></pre> + +<p>As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like:</p> + +<pre><code>define foobar() { + print "Welcome to flavor country!"; +} +</code></pre> + +<p>(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it:</p> + +<pre><code class="language-python">import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +</code></pre> + +<h3 id="an-h3-header">An h3 header</h3> + +<p>Now a nested list:</p> + +<ol> +<li><p>First, get these ingredients:</p> + +<ul> +<li>carrots</li> +<li>celery</li> +<li>lentils</li> +</ul></li> +<li><p>Boil some water.</p></li> +<li><p>Dump everything in the pot and follow +this algorithm:</p> + +<pre><code>find wooden spoon +uncover pot +stir +cover pot +balance wooden spoon precariously on pot handle +wait 10 minutes +goto first step (or shut off burner when done) +</code></pre> + +<p>Do not bump wooden spoon or it will fall.</p></li> +</ol> + +<p>Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above).</p> + +<p>Here’s a link to <a href="http://foo.bar">a website</a>, to a <a href="local-doc.html">local +doc</a>, and to a <a href="#an-h2-header">section heading in the current +doc</a>. Here’s a footnote <sup id="fnref1"><a href="#fn1" rel="footnote">1</a></sup>.</p> + +<p>Tables can look like this:</p> + +<table> +<thead> +<tr> +<th>size</th> +<th>material</th> +<th>color</th> +</tr> +</thead> + +<tbody> +<tr> +<td>9</td> +<td>leather</td> +<td>brown</td> +</tr> +<tr> +<td>10</td> +<td>hemp canvas</td> +<td>natural</td> +</tr> +<tr> +<td>11</td> +<td>glass</td> +<td>transparent</td> +</tr> +</tbody> +</table> + +<p>Table: Shoes, their sizes, and what they’re made of</p> + +<p>(The above is the caption for the table.)</p> + +<p>A horizontal rule follows.</p> + +<hr/> + +<p>Here’s a definition list:</p> + +<dl> +<dt>apples</dt> +<dd> +Good for making applesauce. +</dd> +<dt>oranges</dt> +<dd> +Citrus! +</dd> +<dt>tomatoes</dt> +<dd> +There’s no “e” in tomatoe. +</dd> +</dl> + +<p>Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.)</p> + +<p>Images can be specified like so:</p> + +<p><img src="example-image.jpg" alt="example image" title="An exemplary image" /></p> + +<p>And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: `foo`, *bar*, etc.</p> + +<div class="footnotes"> +<hr/> +<ol> + +<li id="fn1"> +<p>Footnote text goes here. <a href="#fnref1" rev="footnote">↩</a></p> +</li> + +</ol> +</div> diff --git a/regress/simple.latex b/regress/simple.latex @@ -0,0 +1,149 @@ +\hypertarget{an-h1-header}{% +\section{An h1 header}\label{an-h1-header}} + +Paragraphs are separated by a blank line. + +2nd paragraph. \emph{Italic}, \textbf{bold}, and \texttt{monospace}. Itemized lists +look like: + +\begin{itemize} +\itemsep -0.2em +\item this one +\item that one +\item the other one +\end{itemize} + +Note that --- not considering the asterisk --- the actual text +content starts at 4-columns in. + +\begin{quotation} + +Block quotes are +written like so. + +They can span multiple paragraphs, +if you like. +\end{quotation} + +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., ``it's all +in chapters 12--14''). Three dots \ldots{} will be converted to an ellipsis. +Unicode is supported. ☺ + +\hypertarget{an-h2-header}{% +\subsection{An h2 header}\label{an-h2-header}} + +Here's a numbered list: + +\begin{enumerate} +\itemsep -0.2em +\item first item +\item second item +\item third item +\end{enumerate} + +Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here's a code sample: + +\begin{verbatim} +# Let me re-iterate ... +for i in 1 .. 10 { do-something(i) } +\end{verbatim} + +As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like: + +\begin{verbatim} +define foobar() { + print "Welcome to flavor country!"; +} +\end{verbatim} + +(which makes copying \& pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it: + +\begin{verbatim} +import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +\end{verbatim} + +\hypertarget{an-h3-header}{% +\subsubsection{An h3 header}\label{an-h3-header}} + +Now a nested list: + +\begin{enumerate} +\item +First, get these ingredients: + +\begin{itemize} +\itemsep -0.2em +\item carrots +\item celery +\item lentils +\end{itemize} +\item +Boil some water. +\item +Dump everything in the pot and follow +this algorithm: + +\begin{verbatim} +find wooden spoon +uncover pot +stir +cover pot +balance wooden spoon precariously on pot handle +wait 10 minutes +goto first step (or shut off burner when done) +\end{verbatim} + +Do not bump wooden spoon or it will fall. +\end{enumerate} + +Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above). + +Here's a link to \href{http://foo.bar}{a website}, to a \href{local-doc.html}{local +doc}, and to a \hyperlink{an-h2-header}{section heading in the current +doc}. Here's a footnote \footnote[1]{ +Footnote text goes here. +} +. + +Tables can look like this: + +\begin{longtable}[]{lll} +size & material & color \\ +9 & leather & brown \\ +10 & hemp canvas & natural \\ +11 & glass & transparent \\ +\end{longtable} + +Table: Shoes, their sizes, and what they're made of + +(The above is the caption for the table.) + +A horizontal rule follows. + +\noindent\hrulefill + +Here's a definition list: +\begin{description} +\item [apples] Good for making applesauce. +\item [oranges] Citrus! +\item [tomatoes] There's no ``e'' in tomatoe. +\end{description} + +Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.) + +Images can be specified like so: + +\includegraphics[]{{example-image}.jpg} + +And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: `foo`, *bar*, etc. diff --git a/regress/simple.man b/regress/simple.man @@ -0,0 +1,213 @@ +.SH +An h1 header +.LP +Paragraphs are separated by a blank line. +.PP +2nd paragraph. \fIItalic\fR, \fBbold\fR, and \fCmonospace\fR. Itemized lists +look like: +.IP "\(bu" 2 +this one +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +that one +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +the other one +.LP +Note that \(em not considering the asterisk \(em the actual text +content starts at 4-columns in. +.RS +.PP +Block quotes are +written like so. +.PP +They can span multiple paragraphs, +if you like. +.RE +.LP +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., \(lqit\(cqs all +in chapters 12\(en14\(rq). Three dots \[u2026] will be converted to an ellipsis. +Unicode is supported. ☺ +.SS +An h2 header +.LP +Here\(cqs a numbered list: +.IP "1. " +first item +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "2. " +second item +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "3. " +third item +.LP +Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here\(cqs a code sample: +.LP +.EX +# Let me re-iterate ... +for i in 1 .. 10 { do-something(i) } +.EE +.PP +As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like: +.LP +.EX +define foobar() { + print \(dqWelcome to flavor country!\(dq; +} +.EE +.PP +(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it: +.LP +.EX +import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +.EE +.SS +An h3 header +.LP +Now a nested list: +.IP "1. " +First, get these ingredients: +.RS +.IP "\(bu" 2 +carrots +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +celery +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +lentils +.RE +.IP "2. " +Boil some water. +.IP "3. " +Dump everything in the pot and follow +this algorithm: +.LP +.EX +find wooden spoon +uncover pot +stir +cover pot +balance wooden spoon precariously on pot handle +wait 10 minutes +goto first step (or shut off burner when done) +.EE +.IP +Do not bump wooden spoon or it will fall. +.LP +Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above). +.PP +Here\(cqs a link to \fBa website\fR <\fIhttp://foo.bar\fR>, to a \fBlocal +doc\fR <\fIlocal-doc.html\fR>, and to a \fBsection heading in the current +doc\fR <\fI#an-h2-header\fR>. Here\(cqs a footnote \u\s-31\s+3\d. +.PP +Tables can look like this: +.TS +tab(|) expand allbox; +lb lb lb +l l l. +T{ +size +T}|T{ +material +T}|T{ +color +T} +T{ +9 +T}|T{ +leather +T}|T{ +brown +T} +T{ +10 +T}|T{ +hemp canvas +T}|T{ +natural +T} +T{ +11 +T}|T{ +glass +T}|T{ +transparent +T} +.TE +.LP +Table: Shoes, their sizes, and what they\(cqre made of +.PP +(The above is the caption for the table.) +.PP +A horizontal rule follows. +.LP +\l'2i' +.LP +Here\(cqs a definition list: +.LP +apples +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +Good for making applesauce. +.LP +oranges +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +Citrus! +.LP +tomatoes +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +There\(cqs no \(lqe\(rq in tomatoe. +.LP +Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.) +.PP +Images can be specified like so: +.PP +\fBexample image\fR (Image: \fIexample-image.jpg\fR) +.PP +And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: \(gafoo\(ga, *bar*, etc. +.LP +.sp 3 +\l'2i' +.LP +\0\fI\u\s-31\s+3\d\fP\0Footnote text goes here. diff --git a/regress/simple.md b/regress/simple.md @@ -0,0 +1,135 @@ +An h1 header +============ + +Paragraphs are separated by a blank line. + +2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists +look like: + + * this one + * that one + * the other one + +Note that --- not considering the asterisk --- the actual text +content starts at 4-columns in. + +> Block quotes are +> written like so. +> +> They can span multiple paragraphs, +> if you like. + +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all +in chapters 12--14"). Three dots ... will be converted to an ellipsis. +Unicode is supported. ☺ + + + +An h2 header +------------ + +Here's a numbered list: + + 1. first item + 2. second item + 3. third item + +Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here's a code sample: + + # Let me re-iterate ... + for i in 1 .. 10 { do-something(i) } + +As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like: + +~~~ +define foobar() { + print "Welcome to flavor country!"; +} +~~~ + +(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it: + +~~~python +import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +~~~ + + + +### An h3 header ### + +Now a nested list: + + 1. First, get these ingredients: + + * carrots + * celery + * lentils + + 2. Boil some water. + + 3. Dump everything in the pot and follow + this algorithm: + + find wooden spoon + uncover pot + stir + cover pot + balance wooden spoon precariously on pot handle + wait 10 minutes + goto first step (or shut off burner when done) + + Do not bump wooden spoon or it will fall. + +Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above). + +Here's a link to [a website](http://foo.bar), to a [local +doc](local-doc.html), and to a [section heading in the current +doc](#an-h2-header). Here's a footnote [^1]. + +[^1]: Footnote text goes here. + +Tables can look like this: + +size | material | color +---- | ------------| ------------ +9 | leather | brown +10 | hemp canvas | natural +11 | glass | transparent + +Table: Shoes, their sizes, and what they're made of + +(The above is the caption for the table.) + +A horizontal rule follows. + +*** + +Here's a definition list: + +apples + : Good for making applesauce. + +oranges + : Citrus! + +tomatoes + : There's no "e" in tomatoe. + +Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.) + +Images can be specified like so: + + + +And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. diff --git a/regress/simple.ms b/regress/simple.ms @@ -0,0 +1,238 @@ +.NH 1 +An h1 header +.pdfhref O 1 An h1 header +.pdfhref M an-h1-header +.LP +Paragraphs are separated by a blank line. +.PP +2nd paragraph. \fIItalic\fR, \fBbold\fR, and \fCmonospace\fR. Itemized lists +look like: +.IP "\(bu" 2 +this one +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +that one +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +the other one +.LP +Note that \(em not considering the asterisk \(em the actual text +content starts at 4-columns in. +.RS +.PP +Block quotes are +written like so. +.PP +They can span multiple paragraphs, +if you like. +.RE +.LP +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., \(lqit\(cqs all +in chapters 12\(en14\(rq). Three dots \[u2026] will be converted to an ellipsis. +Unicode is supported. ☺ +.NH 2 +An h2 header +.pdfhref O 2 An h2 header +.pdfhref M an-h2-header +.LP +Here\(cqs a numbered list: +.IP "1. " +first item +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "2. " +second item +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "3. " +third item +.LP +Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here\(cqs a code sample: +.LP +.nf +.ft CR +# Let me re-iterate ... +for i in 1 .. 10 { do-something(i) } +.ft +.fi +.PP +As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like: +.LP +.nf +.ft CR +define foobar() { + print \(dqWelcome to flavor country!\(dq; +} +.ft +.fi +.PP +(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it: +.LP +.nf +.ft CR +import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +.ft +.fi +.NH 3 +An h3 header +.pdfhref O 3 An h3 header +.pdfhref M an-h3-header +.LP +Now a nested list: +.IP "1. " +First, get these ingredients: +.RS +.IP "\(bu" 2 +carrots +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +celery +.if n \ +.sp -1 +.if t \ +.sp -0.25v +.IP "\(bu" 2 +lentils +.RE +.IP "2. " +Boil some water. +.IP "3. " +Dump everything in the pot and follow +this algorithm: +.LP +.nf +.ft CR +find wooden spoon +uncover pot +stir +cover pot +balance wooden spoon precariously on pot handle +wait 10 minutes +goto first step (or shut off burner when done) +.ft +.fi +.IP +Do not bump wooden spoon or it will fall. +.LP +Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above). +.PP +Here\(cqs a link to \c +.pdfhref W -A "\c" -D http://foo.bar -- a website +, to a \c +.pdfhref W -A "\c" -D local-doc.html -- local doc +, and to a \c +.pdfhref L -A "\c" -D an-h2-header -- section heading in the current doc +\&. Here\(cqs a footnote \** +.FS +Footnote text goes here. +.FE +\&. +.PP +Tables can look like this: +.TS H +tab(|) expand allbox; +lb lb lb +l l l. +T{ +size +T}|T{ +material +T}|T{ +color +T} +.TH +T{ +9 +T}|T{ +leather +T}|T{ +brown +T} +T{ +10 +T}|T{ +hemp canvas +T}|T{ +natural +T} +T{ +11 +T}|T{ +glass +T}|T{ +transparent +T} +.TE +.LP +Table: Shoes, their sizes, and what they\(cqre made of +.PP +(The above is the caption for the table.) +.PP +A horizontal rule follows. +.LP +.ie d HR \{\ +.HR +\} +.el \{\ +.sp 1v +\l'\n(.lu' +.sp 1v +.\} +.LP +Here\(cqs a definition list: +.LP +apples +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +Good for making applesauce. +.LP +oranges +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +Citrus! +.LP +tomatoes +.if n \ +.sp -1v +.if t \ +.sp -0.25v +.IP "" \*(PI +There\(cqs no \(lqe\(rq in tomatoe. +.LP +Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.) +.PP +Images can be specified like so: +.PP +\fBexample image\fR (Image: \fIexample-image.jpg\fR) +.PP +And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: \(gafoo\(ga, *bar*, etc. diff --git a/regress/smarty-copyright-before-punct.html b/regress/smarty-copyright-before-punct.html @@ -0,0 +1 @@ +<p>foo ©.</p> diff --git a/regress/smarty-copyright-before-punct.md b/regress/smarty-copyright-before-punct.md @@ -0,0 +1 @@ +foo (c). diff --git a/regress/smarty-copyright-beginning.html b/regress/smarty-copyright-beginning.html @@ -0,0 +1 @@ +<p>© 2020</p> diff --git a/regress/smarty-copyright-beginning.md b/regress/smarty-copyright-beginning.md @@ -0,0 +1 @@ +(c) 2020 diff --git a/regress/smarty-copyright-double.html b/regress/smarty-copyright-double.html @@ -0,0 +1 @@ +<p>©©</p> diff --git a/regress/smarty-copyright-double.md b/regress/smarty-copyright-double.md @@ -0,0 +1 @@ +(c)(c) diff --git a/regress/smarty-copyright.html b/regress/smarty-copyright.html @@ -0,0 +1 @@ +<p>foo © bar</p> diff --git a/regress/smarty-copyright.md b/regress/smarty-copyright.md @@ -0,0 +1 @@ +foo (c) bar diff --git a/regress/smarty-dquotes-around-copyright.html b/regress/smarty-dquotes-around-copyright.html @@ -0,0 +1 @@ +<p>“©”</p> diff --git a/regress/smarty-dquotes-around-copyright.md b/regress/smarty-dquotes-around-copyright.md @@ -0,0 +1 @@ +"(c)" diff --git a/regress/smarty-dquotes-around-ellipsis.html b/regress/smarty-dquotes-around-ellipsis.html @@ -0,0 +1 @@ +<p>“…”</p> diff --git a/regress/smarty-dquotes-around-ellipsis.md b/regress/smarty-dquotes-around-ellipsis.md @@ -0,0 +1 @@ +"..." diff --git a/regress/smarty-dquotes-around-parenthesis.html b/regress/smarty-dquotes-around-parenthesis.html @@ -0,0 +1 @@ +<p>“(Hello, world.)”</p> diff --git a/regress/smarty-dquotes-around-parenthesis.md b/regress/smarty-dquotes-around-parenthesis.md @@ -0,0 +1 @@ +"(Hello, world.)" diff --git a/regress/smarty-dquotes-around-sentence.html b/regress/smarty-dquotes-around-sentence.html @@ -0,0 +1 @@ +<p>“foo bar baz.”</p> diff --git a/regress/smarty-dquotes-around-sentence.md b/regress/smarty-dquotes-around-sentence.md @@ -0,0 +1 @@ +"foo bar baz." diff --git a/regress/smarty-dquotes-around-squarebrace.html b/regress/smarty-dquotes-around-squarebrace.html @@ -0,0 +1 @@ +<p>“[Hello, world.]”</p> diff --git a/regress/smarty-dquotes-around-squarebrace.md b/regress/smarty-dquotes-around-squarebrace.md @@ -0,0 +1 @@ +"[Hello, world.]" diff --git a/regress/smarty-dquotes-around-word.html b/regress/smarty-dquotes-around-word.html @@ -0,0 +1 @@ +<p>foo “bar” baz</p> diff --git a/regress/smarty-dquotes-around-word.md b/regress/smarty-dquotes-around-word.md @@ -0,0 +1 @@ +foo "bar" baz diff --git a/regress/smarty-dquotes-within-parenthesis-nopunct.html b/regress/smarty-dquotes-within-parenthesis-nopunct.html @@ -0,0 +1 @@ +<p>(“foo bar”)</p> diff --git a/regress/smarty-dquotes-within-parenthesis-nopunct.md b/regress/smarty-dquotes-within-parenthesis-nopunct.md @@ -0,0 +1 @@ +("foo bar") diff --git a/regress/smarty-dquotes-within-parenthesis.html b/regress/smarty-dquotes-within-parenthesis.html @@ -0,0 +1 @@ +<p>(“Hello, world.”)</p> diff --git a/regress/smarty-dquotes-within-parenthesis.md b/regress/smarty-dquotes-within-parenthesis.md @@ -0,0 +1 @@ +("Hello, world.") diff --git a/regress/smarty-dquotes-within-sentence.html b/regress/smarty-dquotes-within-sentence.html @@ -0,0 +1 @@ +<p>“foo bar baz”.</p> diff --git a/regress/smarty-dquotes-within-sentence.md b/regress/smarty-dquotes-within-sentence.md @@ -0,0 +1 @@ +"foo bar baz". diff --git a/regress/smarty-dquotes-within-squarebrace.html b/regress/smarty-dquotes-within-squarebrace.html @@ -0,0 +1 @@ +<p>[“Hello, world.”]</p> diff --git a/regress/smarty-dquotes-within-squarebrace.md b/regress/smarty-dquotes-within-squarebrace.md @@ -0,0 +1 @@ +["Hello, world."] diff --git a/regress/smarty-dquotes-within-squotes.html b/regress/smarty-dquotes-within-squotes.html @@ -0,0 +1 @@ +<p>‘“hello”’ he said</p> diff --git a/regress/smarty-dquotes-within-squotes.md b/regress/smarty-dquotes-within-squotes.md @@ -0,0 +1 @@ +'"hello"' he said diff --git a/regress/smarty-dquotes-word-left.html b/regress/smarty-dquotes-word-left.html @@ -0,0 +1 @@ +<p>Hello “world “stuff.</p> diff --git a/regress/smarty-dquotes-word-left.md b/regress/smarty-dquotes-word-left.md @@ -0,0 +1 @@ +Hello "world "stuff. diff --git a/regress/smarty-dquotes-word-right.html b/regress/smarty-dquotes-word-right.html @@ -0,0 +1 @@ +<p>Hello” world” stuff.</p> diff --git a/regress/smarty-dquotes-word-right.md b/regress/smarty-dquotes-word-right.md @@ -0,0 +1 @@ +Hello" world" stuff. diff --git a/regress/smarty-ellipsis-end.html b/regress/smarty-ellipsis-end.html @@ -0,0 +1 @@ +<p>hello…</p> diff --git a/regress/smarty-ellipsis-end.md b/regress/smarty-ellipsis-end.md @@ -0,0 +1 @@ +hello... diff --git a/regress/smarty-ellipsis-lead-word.html b/regress/smarty-ellipsis-lead-word.html @@ -0,0 +1 @@ +<p>hello …there</p> diff --git a/regress/smarty-ellipsis-lead-word.md b/regress/smarty-ellipsis-lead-word.md @@ -0,0 +1 @@ +hello ...there diff --git a/regress/smarty-ellipsis-only.html b/regress/smarty-ellipsis-only.html @@ -0,0 +1 @@ +<p>…</p> diff --git a/regress/smarty-ellipsis-only.md b/regress/smarty-ellipsis-only.md @@ -0,0 +1 @@ +... diff --git a/regress/smarty-ellipsis-standalone.html b/regress/smarty-ellipsis-standalone.html @@ -0,0 +1 @@ +<p>hello … there</p> diff --git a/regress/smarty-ellipsis-standalone.md b/regress/smarty-ellipsis-standalone.md @@ -0,0 +1 @@ +hello ... there diff --git a/regress/smarty-ellipsis-trail-word.html b/regress/smarty-ellipsis-trail-word.html @@ -0,0 +1 @@ +<p>hello… there</p> diff --git a/regress/smarty-ellipsis-trail-word.md b/regress/smarty-ellipsis-trail-word.md @@ -0,0 +1 @@ +hello... there diff --git a/regress/smarty-emdash-in-dquotes.html b/regress/smarty-emdash-in-dquotes.html @@ -0,0 +1 @@ +<p>Hello “—” world.</p> diff --git a/regress/smarty-emdash-in-dquotes.md b/regress/smarty-emdash-in-dquotes.md @@ -0,0 +1 @@ +Hello "---" world. diff --git a/regress/smarty-emdash-only.html b/regress/smarty-emdash-only.html @@ -0,0 +1 @@ +<p>Hello — world.</p> diff --git a/regress/smarty-emdash-only.md b/regress/smarty-emdash-only.md @@ -0,0 +1 @@ +Hello --- world. diff --git a/regress/smarty-emdash.html b/regress/smarty-emdash.html @@ -0,0 +1 @@ +<p>Hello—world.</p> diff --git a/regress/smarty-emdash.md b/regress/smarty-emdash.md @@ -0,0 +1 @@ +Hello---world. diff --git a/regress/smarty-endash-in-dquotes.html b/regress/smarty-endash-in-dquotes.html @@ -0,0 +1 @@ +<p>Hello “–” world.</p> diff --git a/regress/smarty-endash-in-dquotes.md b/regress/smarty-endash-in-dquotes.md @@ -0,0 +1 @@ +Hello "--" world. diff --git a/regress/smarty-endash.html b/regress/smarty-endash.html @@ -0,0 +1 @@ +<p>Hello–world.</p> diff --git a/regress/smarty-endash.md b/regress/smarty-endash.md @@ -0,0 +1 @@ +Hello--world. diff --git a/regress/smarty-quarter-end-sentence.html b/regress/smarty-quarter-end-sentence.html @@ -0,0 +1 @@ +<p>This is ¼.</p> diff --git a/regress/smarty-quarter-end-sentence.md b/regress/smarty-quarter-end-sentence.md @@ -0,0 +1 @@ +This is 1/4. diff --git a/regress/smarty-quarter-fortieth.html b/regress/smarty-quarter-fortieth.html @@ -0,0 +1 @@ +<p>1/40 of this</p> diff --git a/regress/smarty-quarter-fortieth.md b/regress/smarty-quarter-fortieth.md @@ -0,0 +1 @@ +1/40 of this diff --git a/regress/smarty-quarter-within-dquotes.html b/regress/smarty-quarter-within-dquotes.html @@ -0,0 +1 @@ +<p>I have “¼” of this.</p> diff --git a/regress/smarty-quarter-within-dquotes.md b/regress/smarty-quarter-within-dquotes.md @@ -0,0 +1 @@ +I have "1/4" of this. diff --git a/regress/smarty-quarter-within-parenthesis.html b/regress/smarty-quarter-within-parenthesis.html @@ -0,0 +1 @@ +<p>This is (¼) blah.</p> diff --git a/regress/smarty-quarter-within-parenthesis.md b/regress/smarty-quarter-within-parenthesis.md @@ -0,0 +1 @@ +This is (1/4) blah. diff --git a/regress/smarty-quarter-zero-padded.html b/regress/smarty-quarter-zero-padded.html @@ -0,0 +1 @@ +<p>01/4 of this</p> diff --git a/regress/smarty-quarter-zero-padded.md b/regress/smarty-quarter-zero-padded.md @@ -0,0 +1 @@ +01/4 of this diff --git a/regress/smarty-quarter.html b/regress/smarty-quarter.html @@ -0,0 +1 @@ +<p>¼ of this</p> diff --git a/regress/smarty-quarter.md b/regress/smarty-quarter.md @@ -0,0 +1 @@ +1/4 of this diff --git a/regress/smarty-quarterth-end-sentence.html b/regress/smarty-quarterth-end-sentence.html @@ -0,0 +1 @@ +<p>This is ¼.</p> diff --git a/regress/smarty-quarterth-end-sentence.md b/regress/smarty-quarterth-end-sentence.md @@ -0,0 +1 @@ +This is 1/4th. diff --git a/regress/smarty-quarterth-within-dquotes.html b/regress/smarty-quarterth-within-dquotes.html @@ -0,0 +1 @@ +<p>I have “¼” of this.</p> diff --git a/regress/smarty-quarterth-within-dquotes.md b/regress/smarty-quarterth-within-dquotes.md @@ -0,0 +1 @@ +I have "1/4th" of this. diff --git a/regress/smarty-quarterth-within-parenthesis.html b/regress/smarty-quarterth-within-parenthesis.html @@ -0,0 +1 @@ +<p>This is (¼) of that.</p> diff --git a/regress/smarty-quarterth-within-parenthesis.md b/regress/smarty-quarterth-within-parenthesis.md @@ -0,0 +1 @@ +This is (1/4th) of that. diff --git a/regress/smarty-quarterth-within-squotes.html b/regress/smarty-quarterth-within-squotes.html @@ -0,0 +1 @@ +<p>I have ‘¼’ of this.</p> diff --git a/regress/smarty-quarterth-within-squotes.md b/regress/smarty-quarterth-within-squotes.md @@ -0,0 +1 @@ +I have '1/4th' of this. diff --git a/regress/smarty-squote-within-squotes.html b/regress/smarty-squote-within-squotes.html @@ -0,0 +1 @@ +<p>‘I’m’ me.</p> diff --git a/regress/smarty-squote-within-squotes.md b/regress/smarty-squote-within-squotes.md @@ -0,0 +1 @@ +'I'm' me. diff --git a/regress/smarty-squote-within-word.html b/regress/smarty-squote-within-word.html @@ -0,0 +1 @@ +<p>I’m me.</p> diff --git a/regress/smarty-squote-within-word.md b/regress/smarty-squote-within-word.md @@ -0,0 +1 @@ +I'm me. diff --git a/regress/smarty-squotes-around-copyright.html b/regress/smarty-squotes-around-copyright.html @@ -0,0 +1 @@ +<p>‘©’</p> diff --git a/regress/smarty-squotes-around-copyright.md b/regress/smarty-squotes-around-copyright.md @@ -0,0 +1 @@ +'(c)' diff --git a/regress/smarty-squotes-around-ellipsis.html b/regress/smarty-squotes-around-ellipsis.html @@ -0,0 +1 @@ +<p>‘…’</p> diff --git a/regress/smarty-squotes-around-ellipsis.md b/regress/smarty-squotes-around-ellipsis.md @@ -0,0 +1 @@ +'...' diff --git a/regress/smarty-squotes-around-parenthesis.html b/regress/smarty-squotes-around-parenthesis.html @@ -0,0 +1 @@ +<p>‘(Hello, world.)’</p> diff --git a/regress/smarty-squotes-around-parenthesis.md b/regress/smarty-squotes-around-parenthesis.md @@ -0,0 +1 @@ +'(Hello, world.)' diff --git a/regress/smarty-squotes-around-sentence.html b/regress/smarty-squotes-around-sentence.html @@ -0,0 +1 @@ +<p>‘foo bar baz.’</p> diff --git a/regress/smarty-squotes-around-sentence.md b/regress/smarty-squotes-around-sentence.md @@ -0,0 +1 @@ +'foo bar baz.' diff --git a/regress/smarty-squotes-around-squarebrace.html b/regress/smarty-squotes-around-squarebrace.html @@ -0,0 +1 @@ +<p>‘[Hello, world.]’</p> diff --git a/regress/smarty-squotes-around-squarebrace.md b/regress/smarty-squotes-around-squarebrace.md @@ -0,0 +1 @@ +'[Hello, world.]' diff --git a/regress/smarty-squotes-around-word.html b/regress/smarty-squotes-around-word.html @@ -0,0 +1 @@ +<p>foo ‘bar’ baz</p> diff --git a/regress/smarty-squotes-around-word.md b/regress/smarty-squotes-around-word.md @@ -0,0 +1 @@ +foo 'bar' baz diff --git a/regress/smarty-squotes-within-dquotes.html b/regress/smarty-squotes-within-dquotes.html @@ -0,0 +1 @@ +<p>“‘hello’” he said</p> diff --git a/regress/smarty-squotes-within-dquotes.md b/regress/smarty-squotes-within-dquotes.md @@ -0,0 +1 @@ +"'hello'" he said diff --git a/regress/smarty-squotes-within-parenthesis-nopunct.html b/regress/smarty-squotes-within-parenthesis-nopunct.html @@ -0,0 +1 @@ +<p>(‘foo bar’)</p> diff --git a/regress/smarty-squotes-within-parenthesis-nopunct.md b/regress/smarty-squotes-within-parenthesis-nopunct.md @@ -0,0 +1 @@ +('foo bar') diff --git a/regress/smarty-squotes-within-parenthesis.html b/regress/smarty-squotes-within-parenthesis.html @@ -0,0 +1 @@ +<p>(‘Hello, world.’)</p> diff --git a/regress/smarty-squotes-within-parenthesis.md b/regress/smarty-squotes-within-parenthesis.md @@ -0,0 +1 @@ +('Hello, world.') diff --git a/regress/smarty-squotes-within-sentence.html b/regress/smarty-squotes-within-sentence.html @@ -0,0 +1 @@ +<p>‘foo bar baz’.</p> diff --git a/regress/smarty-squotes-within-sentence.md b/regress/smarty-squotes-within-sentence.md @@ -0,0 +1 @@ +'foo bar baz'. diff --git a/regress/smarty-squotes-within-squarebrace.html b/regress/smarty-squotes-within-squarebrace.html @@ -0,0 +1 @@ +<p>[‘Hello, world.’]</p> diff --git a/regress/smarty-squotes-within-squarebrace.md b/regress/smarty-squotes-within-squarebrace.md @@ -0,0 +1 @@ +['Hello, world.'] diff --git a/regress/smarty-squotes-word-left.html b/regress/smarty-squotes-word-left.html @@ -0,0 +1 @@ +<p>Hello ‘world ‘stuff.</p> diff --git a/regress/smarty-squotes-word-left.md b/regress/smarty-squotes-word-left.md @@ -0,0 +1 @@ +Hello 'world 'stuff. diff --git a/regress/smarty-squotes-word-right.html b/regress/smarty-squotes-word-right.html @@ -0,0 +1 @@ +<p>Hello’ world’ stuff.</p> diff --git a/regress/smarty-squotes-word-right.md b/regress/smarty-squotes-word-right.md @@ -0,0 +1 @@ +Hello' world' stuff. diff --git a/regress/standalone/html-meta-css.html b/regress/standalone/html-meta-css.html @@ -0,0 +1,13 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<link rel="stylesheet" href="foo.css" /> +<title>Untitled article</title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-css.md b/regress/standalone/html-meta-css.md @@ -0,0 +1,3 @@ +css: foo.css + +hello, world diff --git a/regress/standalone/html-meta-escape-attr.html b/regress/standalone/html-meta-escape-attr.html @@ -0,0 +1,13 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<meta name="author" content="foo&bar"foo&#8221;.css<i>foo</i>" /> +<title>Untitled article</title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-escape-attr.md b/regress/standalone/html-meta-escape-attr.md @@ -0,0 +1,3 @@ +author: foo&bar"foo".css<i>foo</i> + +hello, world diff --git a/regress/standalone/html-meta-escape-href.html b/regress/standalone/html-meta-escape-href.html @@ -0,0 +1,13 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<link rel="stylesheet" href="foo&bar%22foo&#8221;.css" /> +<title>Untitled article</title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-escape-href.md b/regress/standalone/html-meta-escape-href.md @@ -0,0 +1,3 @@ +css: foo&bar"foo".css + +hello, world diff --git a/regress/standalone/html-meta-escape-html.html b/regress/standalone/html-meta-escape-html.html @@ -0,0 +1,12 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<title><i>this is a&b title</i></title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-escape-html.md b/regress/standalone/html-meta-escape-html.md @@ -0,0 +1,3 @@ +title: <i>this is a&b title</i> + +hello, world diff --git a/regress/standalone/html-meta-multi-css-eoln.html b/regress/standalone/html-meta-multi-css-eoln.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<link rel="stylesheet" href="foo.css" /> +<link rel="stylesheet" href="bar.css" /> +<title>Untitled article</title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-multi-css-eoln.md b/regress/standalone/html-meta-multi-css-eoln.md @@ -0,0 +1,4 @@ +css: foo.css +bar.css + +hello, world diff --git a/regress/standalone/html-meta-multi-css.html b/regress/standalone/html-meta-multi-css.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8" /> +<meta name="viewport" content="width=device-width,initial-scale=1" /> +<link rel="stylesheet" href="foo.css" /> +<link rel="stylesheet" href="bar.css" /> +<title>Untitled article</title> +</head> +<body> + +<p>hello, world</p> +</body> +</html> diff --git a/regress/standalone/html-meta-multi-css.md b/regress/standalone/html-meta-multi-css.md @@ -0,0 +1,3 @@ +css: foo.css bar.css + +hello, world diff --git a/regress/standalone/latex-meta-escape-title.latex b/regress/standalone/latex-meta-escape-title.latex @@ -0,0 +1,27 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode}{hyperref} +\PassOptionsToPackage{hyphens}{url} +% +\documentclass[11pt,a4paper]{article} +\usepackage{amsmath,amssymb} +\usepackage{lmodern} +\usepackage{iftex} +\ifPDFTeX + \usepackage[T1]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex + \usepackage{unicode-math} + \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{hyperref} +\begin{document} +\title{ab\textasciitilde{}cde\textbackslash{}\{foo\}\$bar} +\maketitle + +foo bar +\end{document} diff --git a/regress/standalone/latex-meta-escape-title.md b/regress/standalone/latex-meta-escape-title.md @@ -0,0 +1,3 @@ +title: ab~cde\{foo}$bar + +foo bar diff --git a/regress/standalone/nroff-meta-escape-author.md b/regress/standalone/nroff-meta-escape-author.md @@ -0,0 +1,3 @@ +author: .ab~cde .fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-author.ms b/regress/standalone/nroff-meta-escape-author.ms @@ -0,0 +1,7 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.AU +\&.ab\(ticde .fgh +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-copy.md b/regress/standalone/nroff-meta-escape-copy.md @@ -0,0 +1,4 @@ +copyright: .ab~cdes*()&)abc +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-copy.ms b/regress/standalone/nroff-meta-escape-copy.ms @@ -0,0 +1,6 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.ds LF Copyright \(co .ab\(ticdes*()&)abc \&.fgh +.TL +Untitled article +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-date.man b/regress/standalone/nroff-meta-escape-date.man @@ -0,0 +1,4 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" ".ab\(ticde \&.fgh" +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-date.md b/regress/standalone/nroff-meta-escape-date.md @@ -0,0 +1,4 @@ +date: .ab~cde +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-date.ms b/regress/standalone/nroff-meta-escape-date.ms @@ -0,0 +1,6 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.DA .ab\(ticde \&.fgh +.TL +Untitled article +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-multi-author.md b/regress/standalone/nroff-meta-escape-multi-author.md @@ -0,0 +1,4 @@ +author: .ab~cde +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-multi-author.ms b/regress/standalone/nroff-meta-escape-multi-author.ms @@ -0,0 +1,9 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +Untitled article +.AU +\&.ab\(ticde +.AU +\&.fgh +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-section.man b/regress/standalone/nroff-meta-escape-section.man @@ -0,0 +1,4 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" ".ab\(ticde \&.fgh" "" +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-section.md b/regress/standalone/nroff-meta-escape-section.md @@ -0,0 +1,4 @@ +section: .ab~cde +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-source-volume.md b/regress/standalone/nroff-meta-escape-source-volume.md @@ -0,0 +1,6 @@ +source: *&^(*&^asdfsad +volume: .ab~cde +.fgh +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-source.man b/regress/standalone/nroff-meta-escape-source.man @@ -0,0 +1,4 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" "" ".ab\(ticde \&.fgh" "" +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-source.md b/regress/standalone/nroff-meta-escape-source.md @@ -0,0 +1,4 @@ +source: .ab~cde +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-title.man b/regress/standalone/nroff-meta-escape-title.man @@ -0,0 +1,4 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH ".ab\(ticde \&.fgh" "7" "" +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-title.md b/regress/standalone/nroff-meta-escape-title.md @@ -0,0 +1,4 @@ +title: .ab~cde +.fgh + +foo bar diff --git a/regress/standalone/nroff-meta-escape-title.ms b/regress/standalone/nroff-meta-escape-title.ms @@ -0,0 +1,6 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TL +\&.ab\(ticde +\&.fgh +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-volume.man b/regress/standalone/nroff-meta-escape-volume.man @@ -0,0 +1,4 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.TH "Untitled article" "7" "" "" ".ab\(ticde \&.fgh" +.PP +foo bar diff --git a/regress/standalone/nroff-meta-escape-volume.md b/regress/standalone/nroff-meta-escape-volume.md @@ -0,0 +1,4 @@ +volume: .ab~cde +.fgh + +foo bar diff --git a/regress/table-footnotes.md b/regress/table-footnotes.md @@ -0,0 +1,33 @@ + +This used to crash -Tterm because the footnote size was saved and zeroed +prior to table column-width scanning, then restored. However, this +could result in off-by-ones or -twos because the actual footnotes may +have gone double-digit while the zeroed wouldn't. + +a[^1] +a[^2] +a[^3] +a[^4] +a[^5] +a[^6] +a[^7] +a[^8] + +| Officer | Rank | +| --------------: | -------------------- | +| Jean-Luc Picard | Captain | +| Worf[^9] | Lieutenant Commander | +| Data[^10] | Lieutenant Commander | +| William Riker[^11] | Commander | + +[^1]: foo +[^2]: foo +[^3]: foo +[^4]: foo +[^5]: foo +[^6]: foo +[^7]: foo +[^8]: foo +[^9]: foo +[^10]: foo +[^11]: foo diff --git a/regress/table-footnotes.term b/regress/table-footnotes.term @@ -0,0 +1,27 @@ + This used to crash -Tterm because the footnote size was saved and + zeroed prior to table column-width scanning, then restored. However, + this could result in off-by-ones or -twos because the actual + footnotes may have gone double-digit while the zeroed wouldn’t. + + a[1;93m[1][0m a[1;93m[2][0m a[1;93m[3][0m a[1;93m[4][0m a[1;93m[5][0m a[1;93m[6][0m a[1;93m[7][0m a[1;93m[8][0m + + Officer[93m | [0mRank + [93m------------------|----------------------[0m + Jean-Luc Picard[93m | [0mCaptain + Worf[1;93m[9][0m[93m | [0mLieutenant Commander + Data[1;93m[10][0m[93m | [0mLieutenant Commander + William Riker[1;93m[11][0m[93m | [0mCommander + +[37m ~~~~~~~~[0m + + [92m 1. [0mfoo + [92m 2. [0mfoo + [92m 3. [0mfoo + [92m 4. [0mfoo + [92m 5. [0mfoo + [92m 6. [0mfoo + [92m 7. [0mfoo + [92m 8. [0mfoo + [92m 9. [0mfoo + [92m10. [0mfoo + [92m11. [0mfoo diff --git a/regress/table-links.gemini b/regress/table-links.gemini @@ -0,0 +1,101 @@ +``` +a | b | c +---|------|--------- +9 | [a] | foo[b] +10 | [c] | [d] +9 | [e] | foo[f] +10 | [g] | [h] +9 | [i] | foo[j] +10 | [k] | [l] +9 | [m] | foo[n] +10 | [o] | [p] +9 | [q] | foo[r] +10 | [s] | [t] +9 | [u] | foo[v] +10 | [w] | [x] +9 | [y] | foo[z] +10 | [aa] | [ab] +9 | [ac] | foo[ad] +10 | [ae] | [af] +9 | [ag] | foo[ah] +10 | [ai] | [aj] +9 | [ak] | foo[al] +10 | [am] | [an] +9 | [ao] | foo[ap] +10 | [aq] | [ar] +9 | [as] | foo[at] +10 | [au] | [av] +9 | [aw] | foo[ax] +10 | [ay] | [az] +9 | [ba] | foo[bb] +10 | [bc] | [bd] +9 | [be] | foo[bf] +10 | [bg] | [bh] +9 | [bi] | foo[bj] +10 | [bk] | [bl] +``` + +=> http://www.foo.com [a] +=> https://foo.com [b] +=> http://www.bar.com [c] +=> https://baz.com [d] +=> http://www.foo.com [e] +=> https://foo.com [f] +=> http://www.bar.com [g] +=> https://baz.com [h] +=> http://www.foo.com [i] +=> https://foo.com [j] +=> http://www.bar.com [k] +=> https://baz.com [l] +=> http://www.foo.com [m] +=> https://foo.com [n] +=> http://www.bar.com [o] +=> https://baz.com [p] +=> http://www.foo.com [q] +=> https://foo.com [r] +=> http://www.bar.com [s] +=> https://baz.com [t] +=> http://www.foo.com [u] +=> https://foo.com [v] +=> http://www.bar.com [w] +=> https://baz.com [x] +=> http://www.foo.com [y] +=> https://foo.com [z] +=> http://www.bar.com [aa] +=> https://baz.com [ab] +=> http://www.foo.com [ac] +=> https://foo.com [ad] +=> http://www.bar.com [ae] +=> https://baz.com [af] +=> http://www.foo.com [ag] +=> https://foo.com [ah] +=> http://www.bar.com [ai] +=> https://baz.com [aj] +=> http://www.foo.com [ak] +=> https://foo.com [al] +=> http://www.bar.com [am] +=> https://baz.com [an] +=> http://www.foo.com [ao] +=> https://foo.com [ap] +=> http://www.bar.com [aq] +=> https://baz.com [ar] +=> http://www.foo.com [as] +=> https://foo.com [at] +=> http://www.bar.com [au] +=> https://baz.com [av] +=> http://www.foo.com [aw] +=> https://foo.com [ax] +=> http://www.bar.com [ay] +=> https://baz.com [az] +=> http://www.foo.com [ba] +=> https://foo.com [bb] +=> http://www.bar.com [bc] +=> https://baz.com [bd] +=> http://www.foo.com [be] +=> https://foo.com [bf] +=> http://www.bar.com [bg] +=> https://baz.com [bh] +=> http://www.foo.com [bi] +=> https://foo.com [bj] +=> http://www.bar.com [bk] +=> https://baz.com [bl] diff --git a/regress/table-links.html b/regress/table-links.html @@ -0,0 +1,172 @@ +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +<th>c</th> +</tr> +</thead> + +<tbody> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +<tr> +<td>9</td> +<td><a href="http://www.foo.com">http://www.foo.com</a></td> +<td><a href="https://foo.com">foo</a></td> +</tr> +<tr> +<td>10</td> +<td><a href="http://www.bar.com">http://www.bar.com</a></td> +<td><a href="https://baz.com">https://baz.com</a></td> +</tr> +</tbody> +</table> diff --git a/regress/table-links.latex b/regress/table-links.latex @@ -0,0 +1,35 @@ +\begin{longtable}[]{lll} +a & b & c \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +9 & \url{http://www.foo.com} & \href{https://foo.com}{foo} \\ +10 & \url{http://www.bar.com} & \url{https://baz.com} \\ +\end{longtable} diff --git a/regress/table-links.man b/regress/table-links.man @@ -0,0 +1,236 @@ +.TS +tab(|) expand allbox; +lb lb lb +l l l. +T{ +a +T}|T{ +b +T}|T{ +c +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +T{ +9 +T}|T{ +\fIhttp://www.foo.com\fR +T}|T{ +\fBfoo\fR <\fIhttps://foo.com\fR> +T} +T{ +10 +T}|T{ +\fIhttp://www.bar.com\fR +T}|T{ +\fIhttps://baz.com\fR +T} +.TE diff --git a/regress/table-links.md b/regress/table-links.md @@ -0,0 +1,36 @@ + +a | b | c +---| ------------|--- +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com +9 | www.foo.com | [foo](https://foo.com) +10 | www.bar.com | https://baz.com + diff --git a/regress/table-links.ms b/regress/table-links.ms @@ -0,0 +1,237 @@ +.TS H +tab(|) expand allbox; +lb lb lb +l l l. +T{ +a +T}|T{ +b +T}|T{ +c +T} +.TH +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +T{ +9 +T}|T{ +.pdfhref W -D http://www.foo.com -- http://www.foo.com +T}|T{ +.pdfhref W -D https://foo.com -- foo +T} +T{ +10 +T}|T{ +.pdfhref W -D http://www.bar.com -- http://www.bar.com +T}|T{ +.pdfhref W -D https://baz.com -- https://baz.com +T} +.TE diff --git a/share/odt/styles.xml b/share/odt/styles.xml @@ -0,0 +1,230 @@ +<office:font-face-decls> + <style:font-face style:name="OpenSymbol" + svg:font-family="OpenSymbol" style:font-charset="x-symbol"/> + <style:font-face style:name="Liberation Mono" + svg:font-family="'Liberation Mono'" + style:font-family-generic="modern" style:font-pitch="fixed"/> + <style:font-face style:name="Liberation Serif" + svg:font-family="'Liberation Serif'" + style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Liberation Sans" + svg:font-family="'Liberation Sans'" + style:font-family-generic="swiss" style:font-pitch="variable"/> +</office:font-face-decls> +<office:scripts> + <office:script script:language="ooo:Basic"> + <ooo:libraries xmlns:ooo="http://openoffice.org/2004/office" + xmlns:xlink="http://www.w3.org/1999/xlink"/> + </office:script> +</office:scripts> +<office:styles> + <style:style style:name="Standard" + style:family="paragraph" style:class="text"/> + <style:style style:name="Text_20_body" + style:display-name="Text body" + style:family="paragraph" + style:parent-style-name="Standard" style:class="text"> + <style:paragraph-properties fo:margin-top="0cm" + fo:margin-bottom="0.247cm" + style:contextual-spacing="false" + fo:line-height="115%"/> + </style:style> + <style:style style:family="paragraph" + style:name="Horizontal_20_Line" + style:parent-style-name="Standard" + style:display-name="Horizontal Line" + style:next-style-name="Text_20_body" style:class="html"> + <style:paragraph-properties + fo:margin-top="0cm" + fo:margin-bottom="0.499cm" + style:contextual-spacing="false" + style:border-line-width-bottom="0.002cm 0.004cm 0.002cm" + fo:padding="0cm" + fo:border-left="none" + fo:border-right="none" + fo:border-top="none" + fo:border-bottom="0.14pt double #808080" + text:number-lines="false" + text:line-number="0" + style:join-border="false"/> + <style:text-properties fo:font-size="6pt" + style:font-size-asian="6pt" + style:font-size-complex="6pt"/> + </style:style> + <style:style style:family="text" + style:name="Internet_20_Link" + style:display-name="Internet Link"> + <style:text-properties fo:color="#000080" + loext:opacity="100%" + fo:language="zxx" + fo:country="none" + style:language-asian="zxx" + style:country-asian="none" + style:language-complex="zxx" + style:country-complex="none" + style:text-underline-style="solid" + style:text-underline-color="font-color" + style:text-underline-width="auto"/> + </style:style> + <style:style style:family="text" + style:name="Source_20_Text" + style:display-name="Source Text"> + <style:text-properties style:font-name="Liberation Mono" + fo:font-family="'Liberation Mono'" + style:font-family-generic="modern" + style:font-pitch="fixed" + style:font-name-asian="Liberation Mono" + style:font-family-asian="'Liberation Mono'" + style:font-family-generic-asian="modern" + style:font-pitch-asian="fixed" + style:font-name-complex="Liberation Mono" + style:font-family-complex="'Liberation Mono'" + style:font-family-generic-complex="modern" + style:font-pitch-complex="fixed"/> + </style:style> + <style:style style:name="Frame" style:family="graphic"> + <style:graphic-properties text:anchor-type="as-char" + svg:x="0cm" + svg:y="0cm" + fo:margin-left="0cm" + fo:margin-right="0cm" + fo:margin-top="0.201cm" + fo:margin-bottom="0.201cm" + style:wrap="parallel" + style:number-wrapped-paragraphs="no-limit" + style:wrap-contour="false" + style:vertical-pos="top" + style:vertical-rel="paragraph-content" + style:horizontal-pos="center" + style:horizontal-rel="paragraph-content" + fo:padding="0cm" + fo:border="0pt solid #000000"/> + </style:style> + <style:style style:name="Preformatted_20_Text" + style:display-name="Preformatted Text" + style:family="paragraph" + style:parent-style-name="Standard" style:class="html"> + <style:paragraph-properties fo:margin-top="0cm" + fo:margin-bottom="0cm" + style:contextual-spacing="false"/> + <style:text-properties style:font-name="Liberation Mono" + fo:font-family="'Liberation Mono'" + style:font-family-generic="modern" + style:font-pitch="fixed" + fo:font-size="10pt" + style:font-name-asian="Liberation Mono" + style:font-family-asian="'Liberation Mono'" + style:font-family-generic-asian="modern" + style:font-pitch-asian="fixed" + style:font-size-asian="10pt" + style:font-name-complex="Liberation Mono" + style:font-family-complex="'Liberation Mono'" + style:font-family-generic-complex="modern" + style:font-pitch-complex="fixed" + style:font-size-complex="10pt"/> + </style:style> + <style:style style:name="Table_20_Contents" + style:display-name="Table Contents" + style:family="paragraph" + style:parent-style-name="Standard" style:class="extra"> + <style:paragraph-properties fo:orphans="0" + fo:widows="0" + text:number-lines="false" + text:line-number="0"/> + </style:style> + <style:style style:name="Heading" + style:family="paragraph" + style:parent-style-name="Standard" style:class="text"> + <style:paragraph-properties fo:margin-top="0.423cm" + fo:margin-bottom="0.212cm" + style:contextual-spacing="false" + fo:keep-with-next="always"/> + <style:text-properties style:font-name="Liberation Sans" + fo:font-family="'Liberation Sans'" + style:font-family-generic="swiss" + style:font-pitch="variable" + fo:font-size="14pt" + style:font-name-asian="Liberation Sans" + style:font-family-asian="'Liberation Sans'" + style:font-family-generic-asian="system" + style:font-pitch-asian="variable" + style:font-size-asian="14pt" + style:font-name-complex="Liberation Sans" + style:font-family-complex="'Liberation Sans'" + style:font-family-generic-complex="system" + style:font-pitch-complex="variable" + style:font-size-complex="14pt"/> + </style:style> + <style:style style:name="Bullet_20_Symbols" + style:display-name="Bullet Symbols" style:family="text"> + <style:text-properties style:font-name="OpenSymbol" + fo:font-family="OpenSymbol" + style:font-charset="x-symbol" + style:font-name-asian="OpenSymbol" + style:font-family-asian="OpenSymbol" + style:font-charset-asian="x-symbol" + style:font-name-complex="OpenSymbol" + style:font-family-complex="OpenSymbol" + style:font-charset-complex="x-symbol"/> + </style:style> + <style:style style:name="Numbering_20_Symbols" + style:display-name="Numbering Symbols" style:family="text"/> + <style:style style:name="Heading_20_1" + style:display-name="Heading 1" + style:family="paragraph" + style:parent-style-name="Heading" + style:next-style-name="Text_20_body" + style:default-outline-level="1" style:class="text"> + <style:paragraph-properties fo:margin-top="0.423cm" + fo:margin-bottom="0.212cm" + style:contextual-spacing="false"/> + <style:text-properties fo:font-size="130%" + fo:font-weight="bold" + style:font-size-asian="130%" + style:font-weight-asian="bold" + style:font-size-complex="130%" + style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="Heading_20_2" + style:display-name="Heading 2" + style:family="paragraph" + style:parent-style-name="Heading" + style:next-style-name="Text_20_body" + style:default-outline-level="2" style:class="text"> + <style:paragraph-properties fo:margin-top="0.353cm" + fo:margin-bottom="0.212cm" + style:contextual-spacing="false"/> + <style:text-properties fo:font-size="115%" + fo:font-weight="bold" + style:font-size-asian="115%" + style:font-weight-asian="bold" + style:font-size-complex="115%" + style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="Heading_20_3" + style:display-name="Heading 3" + style:family="paragraph" + style:parent-style-name="Heading" + style:next-style-name="Text_20_body" + style:default-outline-level="3" style:class="text"> + <style:paragraph-properties fo:margin-top="0.247cm" + fo:margin-bottom="0.212cm" + style:contextual-spacing="false"/> + <style:text-properties fo:font-size="101%" + fo:font-weight="bold" + style:font-size-asian="101%" + style:font-weight-asian="bold" + style:font-size-complex="101%" + style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="fr1" + style:family="graphic" style:parent-style-name="Frame"> + <style:graphic-properties style:run-through="foreground" + style:wrap="parallel" + style:number-wrapped-paragraphs="no-limit" + style:vertical-pos="middle" + style:vertical-rel="baseline" + style:horizontal-pos="center" + style:horizontal-rel="paragraph"/> + </style:style> +</office:styles> diff --git a/smartypants.c b/smartypants.c @@ -0,0 +1,486 @@ +/* $Id$ */ +/* + * Copyright (c) 2020 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +enum entity { + ENT_COPY, + ENT_REG, + ENT_TMARK, + ENT_SMARK, + ENT_ELLIP, + ENT_MDASH, + ENT_NDASH, + ENT_LDQUO, + ENT_RDQUO, + ENT_LSQUO, + ENT_RSQUO, + ENT_FRAC14, + ENT_FRAC12, + ENT_FRAC34, + ENT__MAX +}; + +enum type { + TYPE_ROOT, /* root (LOWDOWN_ROOT) */ + TYPE_BLOCK, /* block-level */ + TYPE_SPAN, /* span-level */ + TYPE_OPAQUE, /* skip */ + TYPE_TEXT /* text (LOWDOWN_NORMAL_TEXT) */ +}; + +struct sym { + const char *key; /* input in markdown */ + enum entity ent; /* output entity */ +}; + +struct smarty { + int left_wb; /* left wordbreak */ +}; + +static const char *ents[ENT__MAX] = { + "©", /* ENT_COPY */ + "®", /* ENT_REG */ + "™", /* ENT_TMARK */ + "℠", /* ENT_SMARK */ + "…", /* ENT_ELLIP */ + "—", /* ENT_MDASH */ + "–", /* ENT_NDASH */ + "“", /* ENT_LDQUO */ + "”", /* ENT_RDQUO */ + "‘", /* ENT_LSQUO */ + "’", /* ENT_RSQUO */ + "¼", /* ENT_FRAC14 */ + "½", /* ENT_FRAC12 */ + "¾", /* ENT_FRAC34 */ +}; + +/* + * Order is important: check the longest subset first. + * (So basically "---" comes before "--".) + */ +static const struct sym syms[] = { + { "(c)", ENT_COPY }, + { "(C)", ENT_COPY }, + { "(r)", ENT_REG }, + { "(R)", ENT_REG }, + { "(tm)", ENT_TMARK }, + { "(TM)", ENT_TMARK }, + { "(sm)", ENT_SMARK }, + { "(SM)", ENT_SMARK }, + { "...", ENT_ELLIP }, + { ". . .", ENT_ELLIP }, + { "---", ENT_MDASH }, + { "--", ENT_NDASH }, + { NULL, ENT__MAX } +}; + +/* + * Symbols that require word-break on both sides. + * Again, order is important: longest-first. + */ +static const struct sym syms2[] = { + { "1/4th", ENT_FRAC14 }, + { "1/4", ENT_FRAC14 }, + { "3/4ths", ENT_FRAC34 }, + { "3/4th", ENT_FRAC34 }, + { "3/4", ENT_FRAC34 }, + { "1/2", ENT_FRAC12 }, + { NULL, ENT__MAX } +}; + +static const enum type types[LOWDOWN__MAX] = { + TYPE_ROOT, /* LOWDOWN_ROOT */ + TYPE_OPAQUE, /* LOWDOWN_BLOCKCODE */ + TYPE_BLOCK, /* LOWDOWN_BLOCKQUOTE */ + TYPE_BLOCK, /* LOWDOWN_DEFINITION */ + TYPE_BLOCK, /* LOWDOWN_DEFINITION_TITLE */ + TYPE_BLOCK, /* LOWDOWN_DEFINITION_DATA */ + TYPE_BLOCK, /* LOWDOWN_HEADER */ + TYPE_BLOCK, /* LOWDOWN_HRULE */ + TYPE_BLOCK, /* LOWDOWN_LIST */ + TYPE_BLOCK, /* LOWDOWN_LISTITEM */ + TYPE_BLOCK, /* LOWDOWN_PARAGRAPH */ + TYPE_BLOCK, /* LOWDOWN_TABLE_BLOCK */ + TYPE_BLOCK, /* LOWDOWN_TABLE_HEADER */ + TYPE_BLOCK, /* LOWDOWN_TABLE_BODY */ + TYPE_BLOCK, /* LOWDOWN_TABLE_ROW */ + TYPE_BLOCK, /* LOWDOWN_TABLE_CELL */ + TYPE_OPAQUE, /* LOWDOWN_BLOCKHTML */ + TYPE_OPAQUE, /* LOWDOWN_LINK_AUTO */ + TYPE_OPAQUE, /* LOWDOWN_CODESPAN */ + TYPE_SPAN, /* LOWDOWN_DOUBLE_EMPHASIS */ + TYPE_SPAN, /* LOWDOWN_EMPHASIS */ + TYPE_SPAN, /* LOWDOWN_HIGHLIGHT */ + TYPE_SPAN, /* LOWDOWN_IMAGE */ + TYPE_SPAN, /* LOWDOWN_LINEBREAK */ + TYPE_SPAN, /* LOWDOWN_LINK */ + TYPE_SPAN, /* LOWDOWN_TRIPLE_EMPHASIS */ + TYPE_SPAN, /* LOWDOWN_STRIKETHROUGH */ + TYPE_SPAN, /* LOWDOWN_SUPERSCRIPT */ + TYPE_BLOCK, /* LOWDOWN_FOOTNOTE */ + TYPE_OPAQUE, /* LOWDOWN_MATH_BLOCK */ + TYPE_OPAQUE, /* LOWDOWN_RAW_HTML */ + TYPE_OPAQUE, /* LOWDOWN_ENTITY */ + TYPE_TEXT, /* LOWDOWN_NORMAL_TEXT */ + TYPE_BLOCK, /* LOWDOWN_DOC_HEADER */ + TYPE_BLOCK, /* LOWDOWN_META */ +}; + +/* + * Given the sequence in "n" starting at "start" and ending at "end", + * split "n" around the sequence and replace it with "entity". + * This behaves properly if the leading or trailing sequence is + * zero-length. + * It may modify the subtree rooted at the parent of "n". + * Return zero on failure (memory), non-zero on success. + */ +static int +smarty_entity(struct lowdown_node *n, size_t *maxn, + size_t start, size_t end, enum entity entity) +{ + struct lowdown_node *nn, *nent; + + assert(n->type == LOWDOWN_NORMAL_TEXT); + + /* Allocate the subsequent entity. */ + + nent = calloc(1, sizeof(struct lowdown_node)); + if (nent == NULL) + return 0; + TAILQ_INSERT_AFTER(&n->parent->children, n, nent, entries); + + nent->id = (*maxn)++; + nent->type = LOWDOWN_ENTITY; + nent->parent = n->parent; + TAILQ_INIT(&nent->children); + nent->rndr_entity.text.data = strdup(ents[entity]); + if (nent->rndr_entity.text.data == NULL) + return 0; + nent->rndr_entity.text.size = strlen(ents[entity]); + + /* Allocate the remaining bits, if applicable. */ + + if (n->rndr_normal_text.text.size - end > 0) { + nn = calloc(1, sizeof(struct lowdown_node)); + if (nn == NULL) + return 0; + TAILQ_INSERT_AFTER(&n->parent->children, + nent, nn, entries); + + nn->id = (*maxn)++; + nn->type = LOWDOWN_NORMAL_TEXT; + nn->parent = n->parent; + TAILQ_INIT(&nn->children); + nn->rndr_normal_text.text.size = + n->rndr_normal_text.text.size - end; + nn->rndr_normal_text.text.data = + malloc(nn->rndr_normal_text.text.size); + if (nn->rndr_normal_text.text.data == NULL) + return 0; + memcpy(nn->rndr_normal_text.text.data, + n->rndr_normal_text.text.data + end, + nn->rndr_normal_text.text.size); + } + + n->rndr_normal_text.text.size = start; + return 1; +} + +/* + * Whether the character to the left of a word constitutes a word break + * on its left side. + * This is any space or opening punctuation. + */ +static int +smarty_is_wb_l(char c) +{ + + return isspace((unsigned char)c) || + '(' == c || '[' == c; +} + +/* + * Whether the character to the right of a word constitutes a word + * break. + * This is any space or punctuation. + */ +static int +smarty_is_wb_r(char c) +{ + + return isspace((unsigned char)c) || + ispunct((unsigned char)c); +} + +/* + * Recursive scan for next white-space. + * If "skip" is set, we're on the starting node and shouldn't do a check + * for white-space in ourselves. + */ +static int +smarty_right_wb_r(const struct lowdown_node *n, int skip) +{ + const struct lowdown_buf *b; + const struct lowdown_node *nn; + + /* Check type of node. */ + + if (types[n->type] == TYPE_BLOCK) + return 1; + if (types[n->type] == TYPE_OPAQUE) + return 0; + + if (!skip && + types[n->type] == TYPE_TEXT && + n->rndr_normal_text.text.size) { + assert(n->type == LOWDOWN_NORMAL_TEXT); + b = &n->rndr_normal_text.text; + return smarty_is_wb_r(b->data[0]); + } + + /* First scan down. */ + + if ((nn = TAILQ_FIRST(&n->children)) != NULL) + return smarty_right_wb_r(nn, 0); + + /* Now scan back up. */ + + do { + /* FIXME: don't go up to block. */ + if ((nn = TAILQ_NEXT(n, entries)) != NULL) + return smarty_right_wb_r(nn, 0); + } while ((n = n->parent) != NULL); + + return 1; +} + +/* + * See if the character to the right of position "pos" in node "n" marks + * the end of a word. + * This may require us to traverse the node graph if we're on a node + * boundary as well. + */ +static int +smarty_right_wb(const struct lowdown_node *n, size_t pos) +{ + const struct lowdown_buf *b; + + assert(n->type == LOWDOWN_NORMAL_TEXT); + b = &n->rndr_normal_text.text; + + if (pos + 1 <= b->size) + return smarty_is_wb_r(b->data[pos]); + + return smarty_right_wb_r(n, 1); +} + +/* + * FIXME: this can be faster with a table-based lookup instead of the + * switch statement. + * Returns >1 if a left-quote entity was inserted as the next node + * of the parse tree, <0 on failure, otherwise return zero. + */ +static int +smarty_hbuf(struct lowdown_node *n, size_t *maxn, + struct lowdown_buf *b, struct smarty *s) +{ + size_t i = 0, j, sz; + + assert(n->type == LOWDOWN_NORMAL_TEXT); + + for (i = 0; i < b->size; i++) { + switch (b->data[i]) { + case '.': + case '(': + case '-': + /* Symbols that don't need wordbreak. */ + + for (j = 0; syms[j].key != NULL; j++) { + sz = strlen(syms[j].key); + if (i + sz - 1 >= b->size) + continue; + if (memcmp(syms[j].key, + &b->data[i], sz)) + continue; + if (!smarty_entity(n, maxn, + i, i + sz, syms[j].ent)) + return -1; + return 0; + } + break; + case '"': + /* Left-wb and right-wb differ. */ + + if (!s->left_wb) { + if (!smarty_right_wb(n, i + 1)) + break; + if (!smarty_entity(n, maxn, + i, i + 1, ENT_RDQUO)) + return -1; + return 0; + } + if (!smarty_entity + (n, maxn, i, i + 1, ENT_LDQUO)) + return -1; + return 1; + case '\'': + /* Left-wb and right-wb differ. */ + + if (!s->left_wb) { + if (!smarty_entity(n, maxn, + i, i + 1, ENT_RSQUO)) + return -1; + return 0; + } + if (!smarty_entity + (n, maxn, i, i + 1, ENT_LSQUO)) + return -1; + return 1; + case '1': + case '3': + /* Symbols that require wb. */ + + if (!s->left_wb) + break; + for (j = 0; syms2[j].key != NULL; j++) { + sz = strlen(syms2[j].key); + if (i + sz - 1 >= b->size) + continue; + if (memcmp(syms2[j].key, + &b->data[i], sz)) + continue; + if (!smarty_right_wb(n, i + sz)) + continue; + if (!smarty_entity(n, maxn, i, + i + sz, syms2[j].ent)) + return -1; + return 0; + } + break; + default: + break; + } + + s->left_wb = smarty_is_wb_l(b->data[i]); + } + + return 0; +} + +static int +smarty_block(struct lowdown_node *, size_t *, enum lowdown_type); + +static int +smarty_span(struct lowdown_node *root, size_t *maxn, + struct smarty *s, enum lowdown_type type) +{ + struct lowdown_node *n; + int c; + + TAILQ_FOREACH(n, &root->children, entries) + switch (types[n->type]) { + case TYPE_TEXT: + assert(n->type == LOWDOWN_NORMAL_TEXT); + c = smarty_hbuf(n, maxn, + &n->rndr_normal_text.text, s); + if (c < 0) + return 0; + if (c > 0) + n = TAILQ_NEXT(n, entries); + break; + case TYPE_SPAN: + if (!smarty_span(n, maxn, s, type)) + return 0; + break; + case TYPE_OPAQUE: + s->left_wb = 0; + break; + case TYPE_BLOCK: + if (!smarty_block(n, maxn, type)) + return 0; + break; + case TYPE_ROOT: + abort(); + } + + return 1; +} + +static int +smarty_block(struct lowdown_node *root, + size_t *maxn, enum lowdown_type type) +{ + struct smarty s; + struct lowdown_node *n; + int c; + + s.left_wb = 1; + + TAILQ_FOREACH(n, &root->children, entries) + switch (types[n->type]) { + case TYPE_ROOT: + case TYPE_BLOCK: + if (!smarty_block(n, maxn, type)) + return 0; + break; + case TYPE_TEXT: + assert(n->type == LOWDOWN_NORMAL_TEXT); + c = smarty_hbuf(n, maxn, + &n->rndr_normal_text.text, &s); + if (c < 0) + return 0; + if (c > 0) + n = TAILQ_NEXT(n, entries); + break; + case TYPE_SPAN: + if (!smarty_span(n, maxn, &s, type)) + return 0; + break; + case TYPE_OPAQUE: + s.left_wb = 0; + break; + default: + break; + } + + s.left_wb = 1; + return 1; +} + +int +smarty(struct lowdown_node *n, size_t maxn, enum lowdown_type type) +{ + + if (n == NULL) + return 1; + assert(types[n->type] == TYPE_ROOT); + return smarty_block(n, &maxn, type); +} diff --git a/smartypants.o b/smartypants.o Binary files differ. diff --git a/term.c b/term.c @@ -0,0 +1,1541 @@ +/* $Id$ */ +/* + * Copyright (c) 2020--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> + +#include "lowdown.h" +#include "extern.h" + +struct tstack { + const struct lowdown_node *n; /* node in question */ + size_t lines; /* times emitted */ +}; + +struct term { + unsigned int opts; /* oflags from lowdown_cfg */ + size_t col; /* output column from zero */ + ssize_t last_blank; /* line breaks or -1 (start) */ + struct tstack *stack; /* stack of nodes */ + size_t stackmax; /* size of stack */ + size_t stackpos; /* position in stack */ + size_t maxcol; /* soft limit */ + size_t hmargin; /* left of content */ + size_t vmargin; /* before/after content */ + struct lowdown_buf *tmp; /* for temporary allocations */ + wchar_t *buf; /* buffer for counting wchar */ + size_t bufsz; /* size of buf */ + struct lowdown_buf **foots; /* footnotes */ + size_t footsz; /* footnotes size */ + int footoff; /* don't collect (tables) */ +}; + +/* + * How to style the output on the screen. + */ +struct sty { + int italic; /* italic */ + int strike; /* strikethrough */ + int bold; /* bold */ + int under; /* underline */ + size_t bcolour; /* not inherited */ + size_t colour; /* not inherited */ + int override; /* don't inherit... */ +#define OSTY_UNDER 0x01 /* underlining */ +#define OSTY_BOLD 0x02 /* bold */ +}; + +/* + * Prefixes to put before each line. This only applies to very specific + * circumstances. + */ +struct pfx { + const char *text; + size_t cols; +}; + +#include "term.h" + +static const struct sty *stys[LOWDOWN__MAX] = { + NULL, /* LOWDOWN_ROOT */ + &sty_blockcode, /* LOWDOWN_BLOCKCODE */ + NULL, /* LOWDOWN_BLOCKQUOTE */ + NULL, /* LOWDOWN_DEFINITION */ + NULL, /* LOWDOWN_DEFINITION_TITLE */ + NULL, /* LOWDOWN_DEFINITION_DATA */ + &sty_header, /* LOWDOWN_HEADER */ + &sty_hrule, /* LOWDOWN_HRULE */ + NULL, /* LOWDOWN_LIST */ + NULL, /* LOWDOWN_LISTITEM */ + NULL, /* LOWDOWN_PARAGRAPH */ + NULL, /* LOWDOWN_TABLE_BLOCK */ + NULL, /* LOWDOWN_TABLE_HEADER */ + NULL, /* LOWDOWN_TABLE_BODY */ + NULL, /* LOWDOWN_TABLE_ROW */ + NULL, /* LOWDOWN_TABLE_CELL */ + &sty_blockhtml, /* LOWDOWN_BLOCKHTML */ + &sty_autolink, /* LOWDOWN_LINK_AUTO */ + &sty_codespan, /* LOWDOWN_CODESPAN */ + &sty_d_emph, /* LOWDOWN_DOUBLE_EMPHASIS */ + &sty_emph, /* LOWDOWN_EMPHASIS */ + &sty_highlight, /* LOWDOWN_HIGHLIGHT */ + &sty_img, /* LOWDOWN_IMAGE */ + NULL, /* LOWDOWN_LINEBREAK */ + &sty_link, /* LOWDOWN_LINK */ + &sty_t_emph, /* LOWDOWN_TRIPLE_EMPHASIS */ + &sty_strike, /* LOWDOWN_STRIKETHROUGH */ + NULL, /* LOWDOWN_SUPERSCRIPT */ + NULL, /* LOWDOWN_FOOTNOTE */ + NULL, /* LOWDOWN_MATH_BLOCK */ + &sty_rawhtml, /* LOWDOWN_RAW_HTML */ + NULL, /* LOWDOWN_ENTITY */ + NULL, /* LOWDOWN_NORMAL_TEXT */ + NULL, /* LOWDOWN_DOC_HEADER */ + NULL, /* LOWDOWN_META */ +}; + +/* + * Whether the style is not empty (i.e., has style attributes). + */ +#define STY_NONEMPTY(_s) \ + ((_s)->colour || (_s)->bold || (_s)->italic || \ + (_s)->under || (_s)->strike || (_s)->bcolour || \ + (_s)->override) + +/* Forward declaration. */ + +static int +rndr(struct lowdown_buf *, struct term *, const struct lowdown_node *); + +/* + * Get the column width of a multi-byte sequence. The sequence should + * be self-contained, i.e., not straddle multi-byte borders, because the + * calculation for UTF-8 columns is local to this function: a split + * multi-byte sequence will fail to return the correct number of + * printable columns. If the sequence is bad, return the number of raw + * bytes to print. Return <0 on failure (memory), >=0 otherwise with + * the number of printable columns. + */ +static ssize_t +rndr_mbswidth(struct term *term, const char *buf, size_t sz) +{ + size_t wsz, csz; + const char *cp; + void *pp; + mbstate_t mbs; + + memset(&mbs, 0, sizeof(mbstate_t)); + cp = buf; + wsz = mbsnrtowcs(NULL, &cp, sz, 0, &mbs); + if (wsz == (size_t)-1) + return sz; + + if (term->bufsz < wsz) { + term->bufsz = wsz; + pp = reallocarray(term->buf, wsz, sizeof(wchar_t)); + if (pp == NULL) + return -1; + term->buf = pp; + } + + memset(&mbs, 0, sizeof(mbstate_t)); + cp = buf; + mbsnrtowcs(term->buf, &cp, sz, wsz, &mbs); + csz = wcswidth(term->buf, wsz); + return csz == (size_t)-1 ? sz : csz; +} + +/* + * Copy the buffer into "out", escaping along the width. + * Returns the number of actual printed columns, which in the case of + * multi-byte glyphs, may be less than the given bytes. + * Return <0 on failure (memory), >= 0 otherwise. + */ +static ssize_t +rndr_escape(struct term *term, struct lowdown_buf *out, + const char *buf, size_t sz) +{ + size_t i, start = 0, cols = 0; + ssize_t ret; + + /* Don't allow control characters through. */ + + for (i = 0; i < sz; i++) + if (iscntrl((unsigned char)buf[i])) { + ret = rndr_mbswidth + (term, buf + start, i - start); + if (ret < 0) + return -1; + cols += ret; + if (!hbuf_put(out, buf + start, i - start)) + return -1; + start = i + 1; + } + + /* Remaining bytes. */ + + if (start < sz) { + ret = rndr_mbswidth(term, buf + start, sz - start); + if (ret < 0) + return -1; + cols += ret; + if (!hbuf_put(out, buf + start, sz - start)) + return -1; + } + + return cols; +} + +static void +rndr_free_footnotes(struct term *st) +{ + size_t i; + + for (i = 0; i < st->footsz; i++) + hbuf_free(st->foots[i]); + + free(st->foots); + st->foots = NULL; + st->footsz = 0; + st->footoff = 0; +} + +/* + * If there's an active style in "s" or s is NULL), then emit an + * unstyling escape sequence. Return zero on failure (memory), non-zero + * on success. + */ +static int +rndr_buf_unstyle(const struct term *term, + struct lowdown_buf *out, const struct sty *s) +{ + + if (term->opts & LOWDOWN_TERM_NOANSI) + return 1; + if (s != NULL && !STY_NONEMPTY(s)) + return 1; + return HBUF_PUTSL(out, "\033[0m"); +} + +/* + * Output style "s" into "out" as an ANSI escape. If "s" does not have + * any style information or is NULL, output nothing. Return zero on + * failure (memory), non-zero on success. + */ +static int +rndr_buf_style(const struct term *term, + struct lowdown_buf *out, const struct sty *s) +{ + int has = 0; + + if (term->opts & LOWDOWN_TERM_NOANSI) + return 1; + if (s == NULL || !STY_NONEMPTY(s)) + return 1; + if (!HBUF_PUTSL(out, "\033[")) + return 0; + + if (s->bold) { + if (!HBUF_PUTSL(out, "1")) + return 0; + has++; + } + if (s->under) { + if (has++ && !HBUF_PUTSL(out, ";")) + return 0; + if (!HBUF_PUTSL(out, "4")) + return 0; + } + if (s->italic) { + if (has++ && !HBUF_PUTSL(out, ";")) + return 0; + if (!HBUF_PUTSL(out, "3")) + return 0; + } + if (s->strike) { + if (has++ && !HBUF_PUTSL(out, ";")) + return 0; + if (!HBUF_PUTSL(out, "9")) + return 0; + } + if (s->bcolour && !(term->opts & LOWDOWN_TERM_NOCOLOUR) && + ((s->bcolour >= 40 && s->bcolour <= 47) || + (s->bcolour >= 100 && s->bcolour <= 107))) { + if (has++ && !HBUF_PUTSL(out, ";")) + return 0; + if (!hbuf_printf(out, "%zu", s->bcolour)) + return 0; + } + if (s->colour && !(term->opts & LOWDOWN_TERM_NOCOLOUR) && + ((s->colour >= 30 && s->colour <= 37) || + (s->colour >= 90 && s->colour <= 97))) { + if (has++ && !HBUF_PUTSL(out, ";")) + return 0; + if (!hbuf_printf(out, "%zu", s->colour)) + return 0; + } + return HBUF_PUTSL(out, "m"); +} + +/* + * Take the given style "from" and apply it to "to". + * This accumulates styles: unless an override has been set, it adds to + * the existing style in "to" instead of overriding it. + * The one exception is TODO colours, which override each other. + */ +static void +rndr_node_style_apply(struct sty *to, const struct sty *from) +{ + + if (from->italic) + to->italic = 1; + if (from->strike) + to->strike = 1; + if (from->bold) + to->bold = 1; + else if ((from->override & OSTY_BOLD)) + to->bold = 0; + if (from->under) + to->under = 1; + else if ((from->override & OSTY_UNDER)) + to->under = 0; + if (from->bcolour) + to->bcolour = from->bcolour; + if (from->colour) + to->colour = from->colour; +} + +/* + * Apply the style for only the given node to the current style. + * This *augments* the current style: see rndr_node_style_apply(). + * (This does not ascend to the parent node.) + */ +static void +rndr_node_style(struct sty *s, const struct lowdown_node *n) +{ + + /* The basic node itself. */ + + if (stys[n->type] != NULL) + rndr_node_style_apply(s, stys[n->type]); + + /* Any special node situation that overrides. */ + + switch (n->type) { + case LOWDOWN_HEADER: + if (n->rndr_header.level > 0) + rndr_node_style_apply(s, &sty_header_n); + else + rndr_node_style_apply(s, &sty_header_1); + break; + default: + /* FIXME: crawl up nested? */ + if (n->parent != NULL && + n->parent->type == LOWDOWN_LINK) + rndr_node_style_apply(s, &sty_linkalt); + break; + } + + if (n->chng == LOWDOWN_CHNG_INSERT) + rndr_node_style_apply(s, &sty_chng_ins); + if (n->chng == LOWDOWN_CHNG_DELETE) + rndr_node_style_apply(s, &sty_chng_del); +} + +/* + * Bookkeep that we've put "len" characters into the current line. + */ +static void +rndr_buf_advance(struct term *term, size_t len) +{ + + term->col += len; + if (term->col && term->last_blank != 0) + term->last_blank = 0; +} + +/* + * Return non-zero if "n" or any of its ancestors require resetting the + * output line mode, otherwise return zero. + * This applies to both block and inline styles. + */ +static int +rndr_buf_endstyle(const struct lowdown_node *n) +{ + struct sty s; + + if (n->parent != NULL) + if (rndr_buf_endstyle(n->parent)) + return 1; + + memset(&s, 0, sizeof(struct sty)); + rndr_node_style(&s, n); + return STY_NONEMPTY(&s); +} + +/* + * Unsets the current style context given "n" and an optional terminal + * style "osty", if applies. Return zero on failure (memory), non-zero + * on success. + */ +static int +rndr_buf_endwords(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct sty *osty) +{ + + if (rndr_buf_endstyle(n)) + return rndr_buf_unstyle(term, out, NULL); + if (osty != NULL) + return rndr_buf_unstyle(term, out, osty); + return 1; +} + +/* + * Like rndr_buf_endwords(), but also terminating the line itself. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_buf_endline(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct sty *osty) +{ + + if (!rndr_buf_endwords(term, out, n, osty)) + return 0; + + /* + * We can legit be at col == 0 if, for example, we're in a + * literal context with a blank line. + * assert(term->col > 0); + * assert(term->last_blank == 0); + */ + + term->col = 0; + term->last_blank = 1; + return HBUF_PUTSL(out, "\n"); +} + +/* + * Return the printed width of the number up to six digits (we're + * probably not going to have more list items than that). + */ +static size_t +rndr_numlen(size_t sz) +{ + + if (sz > 100000) + return 6; + if (sz > 10000) + return 5; + if (sz > 1000) + return 4; + if (sz > 100) + return 3; + if (sz > 10) + return 2; + return 1; +} + +/* + * Output prefixes of the given node in the style further accumulated + * from the parent nodes. "Depth" is set to how deep we are, starting + * at -1 (the root). + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_buf_startline_prefixes(struct term *term, + struct sty *s, const struct lowdown_node *n, + struct lowdown_buf *out, size_t *depth) +{ + struct sty sinner; + const struct pfx *pfx; + size_t i, emit, len; + int pstyle = 0; + enum hlist_fl fl; + + if (n->parent != NULL && + !rndr_buf_startline_prefixes(term, s, n->parent, out, depth)) + return 0; + + if (n->parent == NULL) { + assert(n->type == LOWDOWN_ROOT); + *depth = -1; + } + + /* + * The "sinner" value is temporary for only this function. + * This allows us to set a temporary style mask that only + * applies to the prefix data. + * Otherwise "s" propogates to the subsequent line. + */ + + rndr_node_style(s, n); + sinner = *s; + + /* + * Look up the current node in the list of node's we're + * servicing so we can get how many times we've output the + * prefix. This is used for (e.g.) lists, where we only output + * the list prefix once. XXX: read backwards for faster perf? + */ + + for (i = 0; i <= term->stackpos; i++) + if (term->stack[i].n == n) + break; + + /* + * If we can't find the node, then we're in a "faked" context + * like footnotes within a table. Ignore this. XXX: is there a + * non-hacky way for this? + */ + + if (i > term->stackpos) + return 1; + + emit = term->stack[i].lines++; + + /* + * If we're below the document root and not a header, that means + * we're in a body part. Emit the general body indentation. + */ + + if (*depth == 0 && n->type != LOWDOWN_HEADER) { + if (!hbuf_puts(out, pfx_body.text)) + return 0; + rndr_buf_advance(term, pfx_body.cols); + } else if (*depth == 0) { + if (!hbuf_puts(out, pfx_header.text)) + return 0; + rndr_buf_advance(term, pfx_header.cols); + } + + /* + * Output any prefixes. + * Any output must have rndr_buf_style() and set pstyle so that + * we close out the style afterward. + */ + + switch (n->type) { + case LOWDOWN_BLOCKCODE: + rndr_node_style_apply(&sinner, &sty_bkcd_pfx); + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + if (!hbuf_puts(out, pfx_bkcd.text)) + return 0; + rndr_buf_advance(term, pfx_bkcd.cols); + break; + case LOWDOWN_ROOT: + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + for (i = 0; i < term->hmargin; i++) + if (!HBUF_PUTSL(out, " ")) + return 0; + break; + case LOWDOWN_BLOCKQUOTE: + rndr_node_style_apply(&sinner, &sty_bkqt_pfx); + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + if (!hbuf_puts(out, pfx_bkqt.text)) + return 0; + rndr_buf_advance(term, pfx_bkqt.cols); + break; + case LOWDOWN_DEFINITION_DATA: + rndr_node_style_apply(&sinner, &sty_dli_pfx); + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + if (emit == 0) { + if (!hbuf_puts(out, pfx_dli_1.text)) + return 0; + rndr_buf_advance(term, pfx_dli_1.cols); + } else { + if (!hbuf_puts(out, pfx_dli_n.text)) + return 0; + rndr_buf_advance(term, pfx_dli_n.cols); + } + break; + case LOWDOWN_FOOTNOTE: + rndr_node_style_apply(&sinner, &sty_fdef_pfx); + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + if (emit == 0) { + if (!hbuf_printf(out, "%2zu. ", + term->footsz + 1)) + return 0; + len = rndr_numlen(term->footsz + 1); + if (len + 2 > pfx_fdef_1.cols) + len += 2; + else + len = pfx_fdef_1.cols; + rndr_buf_advance(term, len); + } else { + if (!hbuf_puts(out, pfx_fdef_n.text)) + return 0; + rndr_buf_advance(term, pfx_fdef_n.cols); + } + break; + case LOWDOWN_HEADER: + if (n->rndr_header.level == 0) + pfx = &pfx_header_1; + else + pfx = &pfx_header_n; + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + for (i = 0; i < n->rndr_header.level + 1; i++) { + if (!hbuf_puts(out, pfx->text)) + return 0; + rndr_buf_advance(term, pfx->cols); + } + if (pfx->cols) { + if (!HBUF_PUTSL(out, " ")) + return 0; + rndr_buf_advance(term, 1); + } + break; + case LOWDOWN_LISTITEM: + if (n->parent == NULL || + n->parent->type == LOWDOWN_DEFINITION_DATA) + break; + + /* Don't print list item prefix after first. */ + + if (emit) { + if (!hbuf_puts(out, pfx_li_n.text)) + return 0; + rndr_buf_advance(term, pfx_li_n.cols); + break; + } + + /* List item prefix depends upon type. */ + + fl = n->rndr_list.flags; + rndr_node_style_apply(&sinner, &sty_li_pfx); + if (!rndr_buf_style(term, out, &sinner)) + return 0; + pstyle = 1; + + if (fl & HLIST_FL_CHECKED) + pfx = &pfx_uli_c1; + else if (fl & HLIST_FL_UNCHECKED) + pfx = &pfx_uli_nc1; + else if (fl & HLIST_FL_UNORDERED) + pfx = &pfx_uli_1; + else + pfx = &pfx_oli_1; + + if (pfx == &pfx_oli_1) { + if (!hbuf_printf(out, "%2zu. ", + n->rndr_listitem.num)) + return 0; + len = rndr_numlen(n->rndr_listitem.num); + if (len + 2 > pfx->cols) + len += 2; + else + len = pfx->cols; + } else { + if (pfx->text != NULL && + !hbuf_puts(out, pfx->text)) + return 0; + len = pfx->cols; + } + rndr_buf_advance(term, len); + break; + default: + break; + } + + if (pstyle && !rndr_buf_unstyle(term, out, &sinner)) + return 0; + + (*depth)++; + return 1; +} + +/* + * Like rndr_buf_startwords(), but at the start of a line. + * This also outputs all line prefixes of the block context. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_buf_startline(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct sty *osty) +{ + struct sty s; + size_t depth = 0; + + assert(term->last_blank); + assert(term->col == 0); + + memset(&s, 0, sizeof(struct sty)); + if (!rndr_buf_startline_prefixes(term, &s, n, out, &depth)) + return 0; + if (osty != NULL) + rndr_node_style_apply(&s, osty); + return rndr_buf_style(term, out, &s); +} + +/* + * Output optional number of newlines before or after content. + * Return zero on failure, non-zero on success. + */ +static int +rndr_buf_vspace(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, size_t sz) +{ + const struct lowdown_node *prev; + + if (term->last_blank == -1) + return 1; + + prev = n->parent == NULL ? NULL : + TAILQ_PREV(n, lowdown_nodeq, entries); + + assert(sz > 0); + while ((size_t)term->last_blank < sz) { + if (term->col || prev == NULL) { + if (!HBUF_PUTSL(out, "\n")) + return 0; + } else { + if (!rndr_buf_startline + (term, out, n->parent, NULL)) + return 0; + if (!rndr_buf_endline + (term, out, n->parent, NULL)) + return 0; + } + term->last_blank++; + term->col = 0; + } + return 1; +} + +/* + * Ascend to the root of the parse tree from rndr_buf_startwords(), + * accumulating styles as we do so. + */ +static void +rndr_buf_startwords_style(const struct lowdown_node *n, struct sty *s) +{ + + if (n->parent != NULL) + rndr_buf_startwords_style(n->parent, s); + rndr_node_style(s, n); +} + +/* + * Accumulate and output the style at the start of one or more words. + * Should *not* be called on the start of a new line, which calls for + * rndr_buf_startline(). + * Return zero on failure, non-zero on success. + */ +static int +rndr_buf_startwords(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct sty *osty) +{ + struct sty s; + + assert(!term->last_blank); + assert(term->col > 0); + + memset(&s, 0, sizeof(struct sty)); + rndr_buf_startwords_style(n, &s); + if (osty != NULL) + rndr_node_style_apply(&s, osty); + return rndr_buf_style(term, out, &s); +} + +/* + * Return zero on failure, non-zero on success. + */ +static int +rndr_buf_literal(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct lowdown_buf *in, + const struct sty *osty) +{ + size_t i = 0, len; + const char *start; + + while (i < in->size) { + start = &in->data[i]; + while (i < in->size && in->data[i] != '\n') + i++; + len = &in->data[i] - start; + i++; + if (!rndr_buf_startline(term, out, n, osty)) + return 0; + + /* + * No need to record the column width here because we're + * going to reset to zero anyway. + */ + + if (rndr_escape(term, out, start, len) < 0) + return 0; + rndr_buf_advance(term, len); + if (!rndr_buf_endline(term, out, n, osty)) + return 0; + } + + return 1; +} + +/* + * Emit text in "in" the current line with output "out". + * Use "n" and its ancestry to determine our context. + * Return zero on failure, non-zero on success. + */ +static int +rndr_buf(struct term *term, struct lowdown_buf *out, + const struct lowdown_node *n, const struct lowdown_buf *in, + const struct sty *osty) +{ + size_t i = 0, len, cols; + ssize_t ret; + int needspace, begin = 1, end = 0; + const char *start; + const struct lowdown_node *nn; + + for (nn = n; nn != NULL; nn = nn->parent) + if (nn->type == LOWDOWN_BLOCKCODE || + nn->type == LOWDOWN_BLOCKHTML) + return rndr_buf_literal(term, out, n, in, osty); + + /* Start each word by seeing if it has leading space. */ + + while (i < in->size) { + needspace = isspace((unsigned char)in->data[i]); + + while (i < in->size && + isspace((unsigned char)in->data[i])) + i++; + + /* See how long it the coming word (may be 0). */ + + start = &in->data[i]; + while (i < in->size && + !isspace((unsigned char)in->data[i])) + i++; + len = &in->data[i] - start; + + /* + * If we cross our maximum width and are preceded by a + * space, then break. + * (Leaving out the check for a space will cause + * adjacent text or punctuation to have a preceding + * newline.) + * This will also unset the current style. + */ + + if ((needspace || + (out->size && isspace + ((unsigned char)out->data[out->size - 1]))) && + term->col && term->col + len > term->maxcol) { + if (!rndr_buf_endline(term, out, n, osty)) + return 0; + end = 0; + } + + /* + * Either emit our new line prefix (only if we have a + * word that will follow!) or, if we need space, emit + * the spacing. In the first case, or if we have + * following text and are starting this node, emit our + * current style. + */ + + if (term->last_blank && len) { + if (!rndr_buf_startline(term, out, n, osty)) + return 0; + begin = 0; + end = 1; + } else if (!term->last_blank) { + if (begin && len) { + if (!rndr_buf_startwords + (term, out, n, osty)) + return 0; + begin = 0; + end = 1; + } + if (needspace) { + if (!HBUF_PUTSL(out, " ")) + return 0; + rndr_buf_advance(term, 1); + } + } + + /* Emit the word itself. */ + + if ((ret = rndr_escape(term, out, start, len)) < 0) + return 0; + cols = ret; + rndr_buf_advance(term, cols); + } + + if (end) { + assert(begin == 0); + if (!rndr_buf_endwords(term, out, n, osty)) + return 0; + } + + return 1; +} + +/* + * Output the unicode entry "val", which must be strictly greater than + * zero, as a UTF-8 sequence. + * This does no error checking. + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_entity(struct lowdown_buf *buf, int32_t val) +{ + + assert(val > 0); + + if (val < 0x80) + return hbuf_putc(buf, val); + + if (val < 0x800) + return hbuf_putc(buf, 192 + val / 64) && + hbuf_putc(buf, 128 + val % 64); + + if (val - 0xd800u < 0x800) + return 1; + + if (val < 0x10000) + return hbuf_putc(buf, 224 + val / 4096) && + hbuf_putc(buf, 128 + val / 64 % 64) && + hbuf_putc(buf, 128 + val % 64); + + if (val < 0x110000) + return hbuf_putc(buf, 240 + val / 262144) && + hbuf_putc(buf, 128 + val / 4096 % 64) && + hbuf_putc(buf, 128 + val / 64 % 64) && + hbuf_putc(buf, 128 + val % 64); + + return 1; +} + +/* + * Adjust the stack of current nodes we're looking at. + */ +static int +rndr_stackpos_init(struct term *p, const struct lowdown_node *n) +{ + void *pp; + + if (p->stackpos >= p->stackmax) { + p->stackmax += 256; + pp = reallocarray(p->stack, + p->stackmax, sizeof(struct tstack)); + if (pp == NULL) + return 0; + p->stack = pp; + } + + memset(&p->stack[p->stackpos], 0, sizeof(struct tstack)); + p->stack[p->stackpos].n = n; + return 1; +} + +/* + * Return zero on failure (memory), non-zero on success. + */ +static int +rndr_table(struct lowdown_buf *ob, struct term *p, + const struct lowdown_node *n) +{ + size_t *widths = NULL; + const struct lowdown_node *row, *top, *cell; + struct lowdown_buf *celltmp = NULL, *rowtmp = NULL; + size_t col, i, j, maxcol, sz, footsz; + ssize_t last_blank; + unsigned int flags; + int rc = 0; + + assert(n->type == LOWDOWN_TABLE_BLOCK); + + widths = calloc(n->rndr_table.columns, sizeof(size_t)); + if (widths == NULL) + goto out; + + if ((rowtmp = hbuf_new(128)) == NULL || + (celltmp = hbuf_new(128)) == NULL) + goto out; + + /* + * Begin by counting the number of printable columns in each + * column in each row. We don't want to collect additional + * footnotes, as we're going to do so in the next iteration, and + * keep the current size (which will otherwise advance). + */ + + assert(!p->footoff); + p->footoff = 1; + footsz = p->footsz; + + TAILQ_FOREACH(top, &n->children, entries) { + assert(top->type == LOWDOWN_TABLE_HEADER || + top->type == LOWDOWN_TABLE_BODY); + TAILQ_FOREACH(row, &top->children, entries) + TAILQ_FOREACH(cell, &row->children, entries) { + i = cell->rndr_table_cell.col; + assert(i < n->rndr_table.columns); + hbuf_truncate(celltmp); + + /* + * Simulate that we're starting within + * the line by unsetting last_blank, + * having a non-zero column, and an + * infinite maximum column to prevent + * line wrapping. + */ + + maxcol = p->maxcol; + last_blank = p->last_blank; + col = p->col; + + p->last_blank = 0; + p->maxcol = SIZE_MAX; + p->col = 1; + if (!rndr(celltmp, p, cell)) + goto out; + if (widths[i] < p->col) + widths[i] = p->col; + p->last_blank = last_blank; + p->col = col; + p->maxcol = maxcol; + } + } + + /* Restore footnotes. */ + + p->footsz = footsz; + assert(p->footoff); + p->footoff = 0; + + /* Now actually print, row-by-row into the output. */ + + TAILQ_FOREACH(top, &n->children, entries) { + assert(top->type == LOWDOWN_TABLE_HEADER || + top->type == LOWDOWN_TABLE_BODY); + TAILQ_FOREACH(row, &top->children, entries) { + hbuf_truncate(rowtmp); + TAILQ_FOREACH(cell, &row->children, entries) { + i = cell->rndr_table_cell.col; + hbuf_truncate(celltmp); + maxcol = p->maxcol; + last_blank = p->last_blank; + col = p->col; + + p->last_blank = 0; + p->maxcol = SIZE_MAX; + p->col = 1; + if (!rndr(celltmp, p, cell)) + goto out; + assert(widths[i] >= p->col); + sz = widths[i] - p->col; + + /* + * Alignment is either beginning, + * ending, or splitting the remaining + * spaces around the word. + * Be careful about uneven splitting in + * the case of centre. + */ + + flags = cell->rndr_table_cell.flags & + HTBL_FL_ALIGNMASK; + if (flags == HTBL_FL_ALIGN_RIGHT) + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (flags == HTBL_FL_ALIGN_CENTER) + for (j = 0; j < sz / 2; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (!hbuf_putb(rowtmp, celltmp)) + goto out; + if (flags == 0 || + flags == HTBL_FL_ALIGN_LEFT) + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + if (flags == HTBL_FL_ALIGN_CENTER) { + sz = (sz % 2) ? + (sz / 2) + 1 : (sz / 2); + for (j = 0; j < sz; j++) + if (!HBUF_PUTSL(rowtmp, " ")) + goto out; + } + + p->last_blank = last_blank; + p->col = col; + p->maxcol = maxcol; + + if (TAILQ_NEXT(cell, entries) == NULL) + continue; + + if (!rndr_buf_style(p, rowtmp, &sty_table) || + !hbuf_printf(rowtmp, " %s ", ifx_table_col) || + !rndr_buf_unstyle(p, rowtmp, &sty_table)) + goto out; + } + + /* + * Some magic here. + * First, emulate rndr() by setting the + * stackpos to the table, which is required for + * checking the line start. + * Then directly print, as we've already escaped + * all characters, and have embedded escapes of + * our own. Then end the line. + */ + + p->stackpos++; + if (!rndr_stackpos_init(p, n)) + goto out; + if (!rndr_buf_startline(p, ob, n, NULL)) + goto out; + if (!hbuf_putb(ob, rowtmp)) + goto out; + rndr_buf_advance(p, 1); + if (!rndr_buf_endline(p, ob, n, NULL)) + goto out; + if (!rndr_buf_vspace(p, ob, n, 1)) + goto out; + p->stackpos--; + } + + if (top->type == LOWDOWN_TABLE_HEADER) { + p->stackpos++; + if (!rndr_stackpos_init(p, n)) + goto out; + if (!rndr_buf_startline(p, ob, n, &sty_table)) + goto out; + for (i = 0; i < n->rndr_table.columns; i++) { + for (j = 0; j < widths[i]; j++) + if (!hbuf_puts(ob, ifx_table_row)) + goto out; + if (i < n->rndr_table.columns - 1 && + !hbuf_printf(ob, "%s%s", + ifx_table_col, ifx_table_row)) + goto out; + } + rndr_buf_advance(p, 1); + if (!rndr_buf_endline(p, ob, n, &sty_table)) + goto out; + if (!rndr_buf_vspace(p, ob, n, 1)) + goto out; + p->stackpos--; + } + } + + rc = 1; +out: + hbuf_free(celltmp); + hbuf_free(rowtmp); + free(widths); + return rc; +} + +static int +rndr(struct lowdown_buf *ob, struct term *p, + const struct lowdown_node *n) +{ + const struct lowdown_node *child, *nn; + struct lowdown_buf *metatmp; + void *pp; + int32_t entity; + size_t i, col, vs; + ssize_t last_blank; + + /* Current nodes we're servicing. */ + + if (!rndr_stackpos_init(p, n)) + return 0; + + /* + * Vertical space before content. Vertical space (>1 space) is + * suppressed for normal blocks when in a non-block list, as the + * list item handles any spacing. Furthermore, definition list + * data also has its spaces suppressed because this is relegated + * to the title. The root gets the vertical margin as well. + */ + + vs = 0; + switch (n->type) { + case LOWDOWN_ROOT: + for (i = 0; i < p->vmargin; i++) + if (!HBUF_PUTSL(ob, "\n")) + return 0; + p->last_blank = -1; + break; + case LOWDOWN_BLOCKCODE: + case LOWDOWN_BLOCKHTML: + case LOWDOWN_BLOCKQUOTE: + case LOWDOWN_DEFINITION: + case LOWDOWN_DEFINITION_TITLE: + case LOWDOWN_HEADER: + case LOWDOWN_LIST: + case LOWDOWN_TABLE_BLOCK: + case LOWDOWN_PARAGRAPH: + vs = 2; + for (nn = n->parent; nn != NULL; nn = nn->parent) { + if (nn->type != LOWDOWN_LISTITEM) + continue; + vs = (nn->rndr_listitem.flags & HLIST_FL_BLOCK) ? 2 : 1; + break; + } + break; + case LOWDOWN_MATH_BLOCK: + vs = n->rndr_math.blockmode ? 1 : 0; + break; + case LOWDOWN_DEFINITION_DATA: + case LOWDOWN_HRULE: + case LOWDOWN_LINEBREAK: + case LOWDOWN_META: + vs = 1; + break; + case LOWDOWN_LISTITEM: + vs = 1; + if (n->rndr_listitem.flags & HLIST_FL_BLOCK) { + for (nn = n->parent; nn != NULL; nn = nn->parent) + if (nn->type == LOWDOWN_LISTITEM || + nn->type == LOWDOWN_DEFINITION_DATA) + break; + vs = nn == NULL ? 2 : 1; + } + break; + default: + break; + } + + if (vs > 0 && !rndr_buf_vspace(p, ob, n, vs)) + return 0; + + /* Output leading content. */ + + switch (n->type) { + case LOWDOWN_SUPERSCRIPT: + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_super) || + !rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + break; + case LOWDOWN_META: + if (!rndr_buf(p, ob, n, + &n->rndr_meta.key, &sty_meta_key)) + return 0; + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_meta_key) || + !rndr_buf(p, ob, n, p->tmp, &sty_meta_key)) + return 0; + break; + default: + break; + } + + /* Descend into children. */ + + switch (n->type) { + case LOWDOWN_FOOTNOTE: + if (p->footoff) { + p->footsz++; + break; + } + last_blank = p->last_blank; + p->last_blank = -1; + col = p->col; + p->col = 0; + if ((metatmp = hbuf_new(128)) == NULL) + return 0; + TAILQ_FOREACH(child, &n->children, entries) { + p->stackpos++; + if (!rndr(metatmp, p, child)) + return 0; + p->stackpos--; + } + p->last_blank = last_blank; + p->col = col; + pp = recallocarray(p->foots, p->footsz, + p->footsz + 1, sizeof(struct lowdown_buf *)); + if (pp == NULL) + return 0; + p->foots = pp; + p->foots[p->footsz++] = metatmp; + break; + case LOWDOWN_TABLE_BLOCK: + if (!rndr_table(ob, p, n)) + return 0; + break; + default: + TAILQ_FOREACH(child, &n->children, entries) { + p->stackpos++; + if (!rndr(ob, p, child)) + return 0; + p->stackpos--; + } + break; + } + + /* Output content. */ + + switch (n->type) { + case LOWDOWN_HRULE: + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_hrule)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + break; + case LOWDOWN_FOOTNOTE: + hbuf_truncate(p->tmp); + if (!hbuf_printf(p->tmp, "%s%zu%s", ifx_fref_left, + p->footsz, ifx_fref_right)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, &sty_fref)) + return 0; + break; + case LOWDOWN_RAW_HTML: + if (!rndr_buf(p, ob, n, &n->rndr_raw_html.text, NULL)) + return 0; + break; + case LOWDOWN_MATH_BLOCK: + if (!rndr_buf(p, ob, n, &n->rndr_math.text, NULL)) + return 0; + break; + case LOWDOWN_ENTITY: + entity = entity_find_iso(&n->rndr_entity.text); + if (entity > 0) { + hbuf_truncate(p->tmp); + if (!rndr_entity(p->tmp, entity)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + } else { + if (!rndr_buf(p, ob, n, + &n->rndr_entity.text, &sty_bad_ent)) + return 0; + } + break; + case LOWDOWN_BLOCKCODE: + if (!rndr_buf(p, ob, n, &n->rndr_blockcode.text, NULL)) + return 0; + break; + case LOWDOWN_BLOCKHTML: + if (!rndr_buf(p, ob, n, &n->rndr_blockhtml.text, NULL)) + return 0; + break; + case LOWDOWN_CODESPAN: + if (!rndr_buf(p, ob, n, &n->rndr_codespan.text, NULL)) + return 0; + break; + case LOWDOWN_LINK_AUTO: + if (p->opts & LOWDOWN_TERM_SHORTLINK) { + hbuf_truncate(p->tmp); + if (!hbuf_shortlink + (p->tmp, &n->rndr_autolink.link)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + } else { + if (!rndr_buf(p, ob, n, + &n->rndr_autolink.link, NULL)) + return 0; + } + break; + case LOWDOWN_LINK: + if (p->opts & LOWDOWN_TERM_NOLINK) + break; + hbuf_truncate(p->tmp); + if (!HBUF_PUTSL(p->tmp, " ")) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + if (p->opts & LOWDOWN_TERM_SHORTLINK) { + hbuf_truncate(p->tmp); + if (!hbuf_shortlink + (p->tmp, &n->rndr_link.link)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + } else { + if (!rndr_buf(p, ob, n, + &n->rndr_link.link, NULL)) + return 0; + } + break; + case LOWDOWN_IMAGE: + if (!rndr_buf(p, ob, n, &n->rndr_image.alt, NULL)) + return 0; + if (n->rndr_image.alt.size) { + hbuf_truncate(p->tmp); + if (!HBUF_PUTSL(p->tmp, " ")) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, NULL)) + return 0; + } + if (p->opts & LOWDOWN_TERM_NOLINK) { + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_imgbox_left)) + return 0; + if (!hbuf_puts(p->tmp, ifx_imgbox_right)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, &sty_imgbox)) + return 0; + break; + } + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_imgbox_left)) + return 0; + if (!hbuf_puts(p->tmp, ifx_imgbox_sep)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, &sty_imgbox)) + return 0; + if (p->opts & LOWDOWN_TERM_SHORTLINK) { + hbuf_truncate(p->tmp); + if (!hbuf_shortlink + (p->tmp, &n->rndr_image.link)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, &sty_imgurl)) + return 0; + } else + if (!rndr_buf(p, ob, n, + &n->rndr_image.link, &sty_imgurl)) + return 0; + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, ifx_imgbox_right)) + return 0; + if (!rndr_buf(p, ob, n, p->tmp, &sty_imgbox)) + return 0; + break; + case LOWDOWN_NORMAL_TEXT: + if (!rndr_buf(p, ob, n, + &n->rndr_normal_text.text, NULL)) + return 0; + break; + default: + break; + } + + /* Trailing block spaces. */ + + if (n->type == LOWDOWN_ROOT) { + if (p->footsz) { + if (!rndr_buf_vspace(p, ob, n, 2)) + return 0; + hbuf_truncate(p->tmp); + if (!hbuf_puts(p->tmp, pfx_body.text)) + return 0; + if (!hbuf_puts(p->tmp, ifx_foot)) + return 0; + if (!rndr_buf_literal(p, ob, n, p->tmp, &sty_foot)) + return 0; + if (!rndr_buf_vspace(p, ob, n, 2)) + return 0; + for (i = 0; i < p->footsz; i++) { + if (!hbuf_putb(ob, p->foots[i])) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + } + if (!rndr_buf_vspace(p, ob, n, 1)) + return 0; + while (ob->size && ob->data[ob->size - 1] == '\n') + ob->size--; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + + /* Strip breaks but for the vmargin. */ + + for (i = 0; i < p->vmargin; i++) + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + + return 1; +} + +int +lowdown_term_rndr(struct lowdown_buf *ob, + void *arg, const struct lowdown_node *n) +{ + struct term *st = arg; + int rc; + + st->stackpos = 0; + + rc = rndr(ob, st, n); + rndr_free_footnotes(st); + return rc; +} + +void * +lowdown_term_new(const struct lowdown_opts *opts) +{ + struct term *p; + + if ((p = calloc(1, sizeof(struct term))) == NULL) + return NULL; + + /* Give us 80 columns by default. */ + + if (opts != NULL) { + p->maxcol = opts->cols == 0 ? 80 : opts->cols; + p->hmargin = opts->hmargin; + p->vmargin = opts->vmargin; + p->opts = opts->oflags; + } else + p->maxcol = 80; + + if ((p->tmp = hbuf_new(32)) == NULL) { + free(p); + return NULL; + } + return p; +} + +void +lowdown_term_free(void *arg) +{ + struct term *p = arg; + + if (p == NULL) + return; + + hbuf_free(p->tmp); + free(p->buf); + free(p->stack); + free(p); +} diff --git a/term.h b/term.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file is for direct inclusion into term.c. It allows an easy + * place to make compile-term overrides of default styles. + */ + +/* + * Styles + * ====== + * + * Begin with text styles. Each style should be formatted as follows: + * + * static const struct sty sty_STYLE = { + * italic?, strike?, bold?, under?, bgcolour, colour, override? + * }; + * + * Italic, strike, bold, and under may be zero or non-zero numbers. If + * non-zero, the given style is applied and is inherited by all child + * styles. + * + * Override is a bit-mask of styles that are overridden. If 1 is set, + * the underline is overridden; if 2, the bold. + * + * Bgcolour and colour may be zero or an 8-bit ANSI colour escape code + * for standard or high-intensity colours, e.g., 30 = black, 36 = cyan, + * 91 = bright red. Non-conforming values are ignored. See + * <https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit>. These + * are not inherited by child styles. + * + * Please note that if NO_COLOR is specified during run-time, all of the + * colour codes will be stripped. When customising this, please make + * sure that your style will work both with colours and without. + */ + +/* For inserted content. Common parent style. */ +static const struct sty sty_chng_ins = { 0, 0, 0, 0, 44, 0, 0 }; + +/* For deleted content. Common parent style. */ +static const struct sty sty_chng_del = { 0, 0, 0, 0, 41, 0, 0 }; + +/* Image: >< */ +static const struct sty sty_img = { 0, 0, 1, 0, 0, 93, 1 }; + +/* Box around image link (in sty_img):  */ +static const struct sty sty_imgbox = { 0, 0, 0, 0, 0, 37, 2 }; + +/* Image link text (in sty_img):  */ +static const struct sty sty_imgurl = { 0, 0, 0, 1, 0, 32, 2 }; + +/* Footnote reference (as a number): >[^ref]< */ +static const struct sty sty_fref = { 0, 0, 1, 0, 0, 93, 1 }; + +/* In-line code: >`foo(void)`< */ +static const struct sty sty_codespan = { 0, 0, 1, 0, 0, 94, 0 }; + +/* Block code: ```foo(void)```< */ +static const struct sty sty_blockcode = { 0, 0, 1, 0, 0, 0, 0 }; + +/* Horizontal line: >***< */ +static const struct sty sty_hrule = { 0, 0, 0, 0, 0, 37, 0 }; + +/* Block HTML: ><html></html>< */ +static const struct sty sty_blockhtml = { 0, 0, 0, 0, 0, 37, 0 }; + +/* In-line HTML: ><span>< */ +static const struct sty sty_rawhtml = { 0, 0, 0, 0, 0, 37, 0 }; + +/* Strike-through: >~~foo~~< */ +static const struct sty sty_strike = { 0, 1, 0, 0, 0, 0, 0 }; + +/* Emphasis: >*foo*< */ +static const struct sty sty_emph = { 1, 0, 0, 0, 0, 0, 0 }; + +/* Highlight: >==foo==< */ +static const struct sty sty_highlight = { 0, 0, 1, 0, 0, 0, 0 }; + +/* Double-emphasis: >**foo**< */ +static const struct sty sty_d_emph = { 0, 0, 1, 0, 0, 0, 0 }; + +/* Triple emphasis: >***foo***< */ +static const struct sty sty_t_emph = { 1, 0, 1, 0, 0, 0, 0 }; + +/* Link: >[text](link)< */ +static const struct sty sty_link = { 0, 0, 0, 1, 0, 32, 0 }; + +/* Link text (in sty_link): [>text<](link) */ +static const struct sty sty_linkalt = { 0, 0, 1, 0, 0, 93, 1|2 }; + +/* Standalone link: >https://link< */ +static const struct sty sty_autolink = { 0, 0, 0, 1, 0, 32, 0 }; + +/* Header: >## Header< */ +static const struct sty sty_header = { 0, 0, 1, 0, 0, 0, 0 }; + +/* First header (in sty_header): ># Header< */ +static const struct sty sty_header_1 = { 0, 0, 0, 0, 0, 91, 0 }; + +/* Non-first header (in sty_header): >### Header< */ +static const struct sty sty_header_n = { 0, 0, 0, 0, 0, 36, 0 }; + +/* Footnote block: >[^ref]: foo bar< */ +static const struct sty sty_foot = { 0, 0, 0, 0, 0, 37, 0 }; + +/* Footnote prefix (in sty_foot, as a number): >[^ref]<: foo bar */ +static const struct sty sty_fdef_pfx = { 0, 0, 0, 0, 0, 92, 1 }; + +/* Metadata key: >key:< val */ +static const struct sty sty_meta_key = { 0, 0, 0, 0, 0, 37, 0 }; + +/* Entity (if not valid): >&#badent;< */ +static const struct sty sty_bad_ent = { 0, 0, 0, 0, 0, 37, 0 }; + +/* Definition list data prefix (see pfx_dli_1): foo \n >:< bar */ +static const struct sty sty_dli_pfx = { 0, 0, 0, 0, 0, 93, 0 }; + +/* List prefix (see pfx_li_1): >1.< foo */ +static const struct sty sty_li_pfx = { 0, 0, 0, 0, 0, 93, 0 }; + +/* Block quote prefix (see pfx_bkqt): >|< foo */ +static const struct sty sty_bkqt_pfx = { 0, 0, 0, 0, 0, 93, 0 }; + +/* Block code prefix (see pfx_bkcd): ``` >|< void \n >|< main``` */ +static const struct sty sty_bkcd_pfx = { 0, 0, 0, 0, 0, 94, 0 }; + +/* Table separator (see ifx_table_col, ifx_table_col) . */ +static const struct sty sty_table = { 0, 0, 0, 0, 0, 93, 0 }; + +/* + * Prefixes + * ======== + * + * What follows are hard-coded prefixes. These appear on the left of + * the output. Each prefix is arranged as: + * + * static const struct pfx pfx_STYLE = { text, columns }; + * + * The text is a quoted string that will be inserted as-is. It may + * contain UTF-8 values. It may be NULL *only* if the documentation + * specifically says that the value is ignored. + * + * Columns is the number of terminal columns that the prefix fills. If + * this is wrong, it will throw off line wrapping. XXX: this may be + * dynamically computed in later versions of lowodwn. + * + * If the prefix text is ignored, the columns are the minimum: for + * example, an ordered list may have numbers >100, which with the ". " + * would be greater than 4, if 4 is given as the columns. However, at + * least 4 spaces would be printed, even if it were "1. " -> " 1. ". + */ + +/* All non-header (child to the document root) content (no style). */ +static const struct pfx pfx_body = { " ", 4 }; + +/* All header (child to the document root) content (no style). */ +static const struct pfx pfx_header = { "", 0 }; + +/* Block code (see sty_bkcd_pfx). */ +static const struct pfx pfx_bkcd = { " | ", 4 }; + +/* Block quote (see sty_bkqt_pfx). */ +static const struct pfx pfx_bkqt = { " | ", 4 }; + +/* Definition list data, first line (see sty_dli_pfx). */ +static const struct pfx pfx_dli_1 = { " : ", 4 }; + +/* Definition list data, subsequent line (see sty_dli_pfx). */ +static const struct pfx pfx_dli_n = { " ", 4 }; + +/* Minimum ordered list item, first line (see sty_li_pfx). Text ignored. */ +static const struct pfx pfx_oli_1 = { NULL, 4 }; + +/* Unordered list item, first line (see sty_li_pfx). */ +static const struct pfx pfx_uli_1 = { " · ", 4 }; + +/* Unordered, checked list data, first line (see sty_li_pfx). */ +static const struct pfx pfx_uli_c1 = { " ☑ ", 4 }; + +/* Unordered, unchecked list data, first line (see sty_li_pfx). */ +static const struct pfx pfx_uli_nc1 = { " ☐ ", 4 }; + +/* List items, subsequent lines (see sty_li_pfx). */ +static const struct pfx pfx_li_n = { " ", 4 }; + +/* Minimum footnote prefix, first line (see sty_fdef_pfx). Text ignored. */ +static const struct pfx pfx_fdef_1 = { NULL, 4 }; + +/* Footnote prefix, subsequent lines (see sty_fdef_pfx). */ +static const struct pfx pfx_fdef_n = { " ", 4 }; + +/* Header first prefix (see sty_header_1). */ +static const struct pfx pfx_header_1 = { "", 0 }; + +/* Header non-first prefix, one per head level (see sty_header_n). */ +static const struct pfx pfx_header_n = { "#", 1 }; + +/* + * Infixes + * ======= + * + * These are character strings that appear throughout text. They may + * consist of UTF-8 characters. NULL is not allowed. + */ + +/* Footnote block header (see sty_foot). */ +static const char *ifx_foot = "~~~~~~~~"; + +/* Superscript. */ +static const char *ifx_super = "^"; + +/* Metadata key (see sty_meta_key). */ +static const char *ifx_meta_key = ": "; + +/* Horizontal rule. */ +static const char *ifx_hrule = "~~~~~~~~"; + +/* Image link box left-box (see sty_imgbox). */ +static const char *ifx_imgbox_left = "[Image"; + +/* Image link box right-box (see sty_imgbox). */ +static const char *ifx_imgbox_right = "]"; + +/* Image link box separator (see sty_imgbox). */ +static const char *ifx_imgbox_sep = ": "; + +/* Footnote reference left-box (see sty_fref). */ +static const char *ifx_fref_left = "["; + +/* Footnote reference right-box (see sty_fref). */ +static const char *ifx_fref_right = "]"; + +/* Table column separator (see sty_table). */ +static const char *ifx_table_col = "|"; + +/* Table header row separator (see sty_table). */ +static const char *ifx_table_row = "-"; diff --git a/term.o b/term.o Binary files differ. diff --git a/tests.c b/tests.c @@ -0,0 +1,685 @@ +#if TEST___PROGNAME +int +main(void) +{ + extern char *__progname; + + return !__progname; +} +#endif /* TEST___PROGNAME */ +#if TEST_ARC4RANDOM +#include <stdlib.h> + +int +main(void) +{ + return (arc4random() + 1) ? 0 : 1; +} +#endif /* TEST_ARC4RANDOM */ +#if TEST_B64_NTOP +#include <netinet/in.h> +#include <resolv.h> + +int +main(void) +{ + const char *src = "hello world"; + char output[1024]; + + return b64_ntop((const unsigned char *)src, 11, output, sizeof(output)) > 0 ? 0 : 1; +} +#endif /* TEST_B64_NTOP */ +#if TEST_CAPSICUM +#include <sys/capsicum.h> + +int +main(void) +{ + cap_enter(); + return(0); +} +#endif /* TEST_CAPSICUM */ +#if TEST_CRYPT +#if defined(__linux__) +# define _GNU_SOURCE /* old glibc */ +# define _DEFAULT_SOURCE /* new glibc */ +#endif +#if defined(__sun) +# ifndef _XOPEN_SOURCE /* SunOS already defines */ +# define _XOPEN_SOURCE /* XPGx */ +# endif +# define _XOPEN_SOURCE_EXTENDED 1 /* XPG4v2 */ +# ifndef __EXTENSIONS__ /* SunOS already defines */ +# define __EXTENSIONS__ /* reallocarray, etc. */ +# endif +#endif +#include <unistd.h> + +int main(void) +{ + char *v; + + v = crypt("this_is_a_key", "123455"); + return v == NULL; +} +#endif /* TEST_CRYPT */ +#if TEST_ENDIAN_H +#ifdef __linux__ +# define _DEFAULT_SOURCE +#endif +#include <endian.h> + +int +main(void) +{ + return !htole32(23); +} +#endif /* TEST_ENDIAN_H */ +#if TEST_ERR +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <err.h> +#include <errno.h> + +int +main(void) +{ + warnx("%d. warnx", 1); + warnc(ENOENT, "%d. warn", ENOENT); + warn("%d. warn", 2); + err(0, "%d. err", 3); + errx(0, "%d. err", 3); + errc(0, ENOENT, "%d. err", 3); + /* NOTREACHED */ + return 1; +} +#endif /* TEST_ERR */ +#if TEST_EXPLICIT_BZERO +#include <string.h> + +int +main(void) +{ + char foo[10]; + + explicit_bzero(foo, sizeof(foo)); + return(0); +} +#endif /* TEST_EXPLICIT_BZERO */ +#if TEST_FTS +#include <stddef.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fts.h> + +int +main(void) +{ + const char *argv[2]; + FTS *ftsp; + FTSENT *entry; + + argv[0] = "."; + argv[1] = (char *)NULL; + + ftsp = fts_open((char * const *)argv, + FTS_PHYSICAL | FTS_NOCHDIR, NULL); + + if (ftsp == NULL) + return 1; + + entry = fts_read(ftsp); + + if (entry == NULL) + return 1; + + if (fts_set(ftsp, entry, FTS_SKIP) != 0) + return 1; + + if (fts_close(ftsp) != 0) + return 1; + + return 0; +} +#endif /* TEST_FTS */ +#if TEST_GETEXECNAME +#include <stdlib.h> + +int +main(void) +{ + const char * progname; + + progname = getexecname(); + return progname == NULL; +} +#endif /* TEST_GETEXECNAME */ +#if TEST_GETPROGNAME +#include <stdlib.h> + +int +main(void) +{ + const char * progname; + + progname = getprogname(); + return progname == NULL; +} +#endif /* TEST_GETPROGNAME */ +#if TEST_INFTIM +/* + * Linux doesn't (always?) have this. + */ + +#include <poll.h> +#include <stdio.h> + +int +main(void) +{ + printf("INFTIM is defined to be %ld\n", (long)INFTIM); + return 0; +} +#endif /* TEST_INFTIM */ +#if TEST_LIB_SOCKET +#include <sys/socket.h> + +int +main(void) +{ + int fds[2], c; + + c = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + return c == -1; +} +#endif /* TEST_LIB_SOCKET */ +#if TEST_MD5 +#include <sys/types.h> +#include <md5.h> + +int main(void) +{ + MD5_CTX ctx; + char result[MD5_DIGEST_STRING_LENGTH]; + + MD5Init(&ctx); + MD5Update(&ctx, (const unsigned char *)"abcd", 4); + MD5End(&ctx, result); + + return 0; +} +#endif /* TEST_MD5 */ +#if TEST_MEMMEM +#define _GNU_SOURCE +#include <string.h> + +int +main(void) +{ + char *a = memmem("hello, world", strlen("hello, world"), "world", strlen("world")); + return(NULL == a); +} +#endif /* TEST_MEMMEM */ +#if TEST_MEMRCHR +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* See test-*.c what needs this. */ +#endif +#include <string.h> + +int +main(void) +{ + const char *buf = "abcdef"; + void *res; + + res = memrchr(buf, 'a', strlen(buf)); + return(NULL == res ? 1 : 0); +} +#endif /* TEST_MEMRCHR */ +#if TEST_MEMSET_S +#include <string.h> + +int main(void) +{ + char buf[10]; + memset_s(buf, 0, 'c', sizeof(buf)); + return 0; +} +#endif /* TEST_MEMSET_S */ +#if TEST_MKFIFOAT +#include <sys/stat.h> +#include <fcntl.h> + +int main(void) { + mkfifoat(AT_FDCWD, "this/path/should/not/exist", 0600); + return 0; +} +#endif /* TEST_MKFIFOAT */ +#if TEST_MKNODAT +#include <sys/stat.h> +#include <fcntl.h> + +int main(void) { + mknodat(AT_FDCWD, "this/path/should/not/exist", S_IFIFO | 0600, 0); + return 0; +} +#endif /* TEST_MKNODAT */ +#if TEST_OSBYTEORDER_H +#include <libkern/OSByteOrder.h> + +int +main(void) +{ + return !OSSwapHostToLittleInt32(23); +} +#endif /* TEST_OSBYTEORDER_H */ +#if TEST_PATH_MAX +/* + * POSIX allows PATH_MAX to not be defined, see + * http://pubs.opengroup.org/onlinepubs/9699919799/functions/sysconf.html; + * the GNU Hurd is an example of a system not having it. + * + * Arguably, it would be better to test sysconf(_SC_PATH_MAX), + * but since the individual *.c files include "config.h" before + * <limits.h>, overriding an excessive value of PATH_MAX from + * "config.h" is impossible anyway, so for now, the simplest + * fix is to provide a value only on systems not having any. + * So far, we encountered no system defining PATH_MAX to an + * impractically large value, even though POSIX explicitly + * allows that. + * + * The real fix would be to replace all static buffers of size + * PATH_MAX by dynamically allocated buffers. But that is + * somewhat intrusive because it touches several files and + * because it requires changing struct mlink in mandocdb.c. + * So i'm postponing that for now. + */ + +#include <limits.h> +#include <stdio.h> + +int +main(void) +{ + printf("PATH_MAX is defined to be %ld\n", (long)PATH_MAX); + return 0; +} +#endif /* TEST_PATH_MAX */ +#if TEST_PLEDGE +#include <unistd.h> + +int +main(void) +{ + return !!pledge("stdio", NULL); +} +#endif /* TEST_PLEDGE */ +#if TEST_PROGRAM_INVOCATION_SHORT_NAME +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <errno.h> + +int +main(void) +{ + + return !program_invocation_short_name; +} +#endif /* TEST_PROGRAM_INVOCATION_SHORT_NAME */ +#if TEST_READPASSPHRASE +#include <stddef.h> +#include <readpassphrase.h> + +int +main(void) +{ + return !!readpassphrase("prompt: ", NULL, 0, 0); +} +#endif /* TEST_READPASSPHRASE */ +#if TEST_REALLOCARRAY +#ifdef __NetBSD__ +# define _OPENBSD_SOURCE +#endif +#include <stdlib.h> + +int +main(void) +{ + return !reallocarray(NULL, 2, 2); +} +#endif /* TEST_REALLOCARRAY */ +#if TEST_RECALLOCARRAY +#include <stdlib.h> + +int +main(void) +{ + return !recallocarray(NULL, 0, 2, 2); +} +#endif /* TEST_RECALLOCARRAY */ +#if TEST_SANDBOX_INIT +#include <sandbox.h> + +int +main(void) +{ + char *ep; + int rc; + + rc = sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, &ep); + if (-1 == rc) + sandbox_free_error(ep); + return(-1 == rc); +} +#endif /* TEST_SANDBOX_INIT */ +#if TEST_SECCOMP_FILTER +#include <sys/prctl.h> +#include <linux/seccomp.h> +#include <errno.h> + +int +main(void) +{ + + prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 0); + return(EFAULT == errno ? 0 : 1); +} +#endif /* TEST_SECCOMP_FILTER */ +#if TEST_SETRESGID +#define _GNU_SOURCE /* linux */ +#include <sys/types.h> +#include <unistd.h> + +int +main(void) +{ + return setresgid(-1, -1, -1) == -1; +} +#endif /* TEST_SETRESGID */ +#if TEST_SETRESUID +#define _GNU_SOURCE /* linux */ +#include <sys/types.h> +#include <unistd.h> + +int +main(void) +{ + return setresuid(-1, -1, -1) == -1; +} +#endif /* TEST_SETRESUID */ +#if TEST_SHA2 +#include <sys/types.h> +#include <sha2.h> + +int main(void) +{ + SHA2_CTX ctx; + char result[SHA256_DIGEST_STRING_LENGTH]; + + SHA256Init(&ctx); + SHA256Update(&ctx, (const unsigned char *)"abcd", 4); + SHA256End(&ctx, result); + + return 0; +} +#endif /* TEST_SHA2 */ +#if TEST_SOCK_NONBLOCK +/* + * Linux doesn't (always?) have this. + */ + +#include <sys/socket.h> + +int +main(void) +{ + int fd[2]; + socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fd); + return 0; +} +#endif /* TEST_SOCK_NONBLOCK */ +#if TEST_STATIC +int +main(void) +{ + return 0; /* not meant to do anything */ +} +#endif /* TEST_STATIC */ +#if TEST_STRLCAT +#include <string.h> + +int +main(void) +{ + char buf[3] = "a"; + return ! (strlcat(buf, "b", sizeof(buf)) == 2 && + buf[0] == 'a' && buf[1] == 'b' && buf[2] == '\0'); +} +#endif /* TEST_STRLCAT */ +#if TEST_STRLCPY +#include <string.h> + +int +main(void) +{ + char buf[2] = ""; + return ! (strlcpy(buf, "a", sizeof(buf)) == 1 && + buf[0] == 'a' && buf[1] == '\0'); +} +#endif /* TEST_STRLCPY */ +#if TEST_STRNDUP +#include <string.h> + +int +main(void) +{ + const char *foo = "bar"; + char *baz; + + baz = strndup(foo, 1); + return(0 != strcmp(baz, "b")); +} +#endif /* TEST_STRNDUP */ +#if TEST_STRNLEN +#include <string.h> + +int +main(void) +{ + const char *foo = "bar"; + size_t sz; + + sz = strnlen(foo, 1); + return(1 != sz); +} +#endif /* TEST_STRNLEN */ +#if TEST_STRTONUM +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef __NetBSD__ +# define _OPENBSD_SOURCE +#endif +#include <stdlib.h> + +int +main(void) +{ + const char *errstr; + + if (strtonum("1", 0, 2, &errstr) != 1) + return 1; + if (errstr != NULL) + return 2; + if (strtonum("1x", 0, 2, &errstr) != 0) + return 3; + if (errstr == NULL) + return 4; + if (strtonum("2", 0, 1, &errstr) != 0) + return 5; + if (errstr == NULL) + return 6; + if (strtonum("0", 1, 2, &errstr) != 0) + return 7; + if (errstr == NULL) + return 8; + return 0; +} +#endif /* TEST_STRTONUM */ +#if TEST_SYS_BYTEORDER_H +#include <sys/byteorder.h> + +int +main(void) +{ + return !LE_32(23); +} +#endif /* TEST_SYS_BYTEORDER_H */ +#if TEST_SYS_ENDIAN_H +#include <sys/endian.h> + +int +main(void) +{ + return !htole32(23); +} +#endif /* TEST_SYS_ENDIAN_H */ +#if TEST_SYS_MKDEV_H +#include <sys/types.h> +#include <sys/mkdev.h> + +int +main(void) +{ + return !minor(0); +} +#endif /* TEST_SYS_MKDEV_H */ +#if TEST_SYS_QUEUE +#include <sys/queue.h> +#include <stddef.h> + +struct foo { + int bar; + TAILQ_ENTRY(foo) entries; +}; + +TAILQ_HEAD(fooq, foo); + +int +main(void) +{ + struct fooq foo_q, bar_q; + struct foo *p, *tmp; + int i = 0; + + TAILQ_INIT(&foo_q); + TAILQ_INIT(&bar_q); + + /* + * Use TAILQ_FOREACH_SAFE because some systems (e.g., Linux) + * have TAILQ_FOREACH but not the safe variant. + */ + + TAILQ_FOREACH_SAFE(p, &foo_q, entries, tmp) + p->bar = i++; + + /* Test for newer macros as well. */ + + TAILQ_CONCAT(&foo_q, &bar_q, entries); + return 0; +} +#endif /* TEST_SYS_QUEUE */ +#if TEST_SYS_SYSMACROS_H +#include <sys/sysmacros.h> + +int +main(void) +{ + return !minor(0); +} +#endif /* TEST_SYS_SYSMACROS_H */ +#if TEST_SYS_TREE +#include <sys/tree.h> +#include <stdlib.h> + +struct node { + RB_ENTRY(node) entry; + int i; +}; + +static int +intcmp(struct node *e1, struct node *e2) +{ + return (e1->i < e2->i ? -1 : e1->i > e2->i); +} + +RB_HEAD(inttree, node) head = RB_INITIALIZER(&head); +RB_PROTOTYPE(inttree, node, entry, intcmp) +RB_GENERATE(inttree, node, entry, intcmp) + +int testdata[] = { + 20, 16, 17, 13, 3, 6, 1, 8, 2, 4 +}; + +int +main(void) +{ + size_t i; + struct node *n; + + for (i = 0; i < sizeof(testdata) / sizeof(testdata[0]); i++) { + if ((n = malloc(sizeof(struct node))) == NULL) + return 1; + n->i = testdata[i]; + RB_INSERT(inttree, &head, n); + } + + return 0; +} + +#endif /* TEST_SYS_TREE */ +#if TEST_UNVEIL +#include <unistd.h> + +int +main(void) +{ + return -1 != unveil(NULL, NULL); +} +#endif /* TEST_UNVEIL */ +#if TEST_WAIT_ANY +#include <sys/wait.h> + +int +main(void) +{ + int st; + + return waitpid(WAIT_ANY, &st, WNOHANG) != -1; +} +#endif /* TEST_WAIT_ANY */ diff --git a/tree.c b/tree.c @@ -0,0 +1,458 @@ +/* $Id$ */ +/* + * Copyright (c) 2017--2021 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +static const char *const names[LOWDOWN__MAX] = { + "LOWDOWN_ROOT", /* LOWDOWN_ROOT */ + "LOWDOWN_BLOCKCODE", /* LOWDOWN_BLOCKCODE */ + "LOWDOWN_BLOCKQUOTE", /* LOWDOWN_BLOCKQUOTE */ + "LOWDOWN_DEFINITION", /* LOWDOWN_DEFINITION */ + "LOWDOWN_DEFINITION_TITLE", /* LOWDOWN_DEFINITION_TITLE */ + "LOWDOWN_DEFINITION_DATA", /* LOWDOWN_DEFINITION_DATA */ + "LOWDOWN_HEADER", /* LOWDOWN_HEADER */ + "LOWDOWN_HRULE", /* LOWDOWN_HRULE */ + "LOWDOWN_LIST", /* LOWDOWN_LIST */ + "LOWDOWN_LISTITEM", /* LOWDOWN_LISTITEM */ + "LOWDOWN_PARAGRAPH", /* LOWDOWN_PARAGRAPH */ + "LOWDOWN_TABLE_BLOCK", /* LOWDOWN_TABLE_BLOCK */ + "LOWDOWN_TABLE_HEADER", /* LOWDOWN_TABLE_HEADER */ + "LOWDOWN_TABLE_BODY", /* LOWDOWN_TABLE_BODY */ + "LOWDOWN_TABLE_ROW", /* LOWDOWN_TABLE_ROW */ + "LOWDOWN_TABLE_CELL", /* LOWDOWN_TABLE_CELL */ + "LOWDOWN_BLOCKHTML", /* LOWDOWN_BLOCKHTML */ + "LOWDOWN_LINK_AUTO", /* LOWDOWN_LINK_AUTO */ + "LOWDOWN_CODESPAN", /* LOWDOWN_CODESPAN */ + "LOWDOWN_DOUBLE_EMPHASIS", /* LOWDOWN_DOUBLE_EMPHASIS */ + "LOWDOWN_EMPHASIS", /* LOWDOWN_EMPHASIS */ + "LOWDOWN_HIGHLIGHT", /* LOWDOWN_HIGHLIGHT */ + "LOWDOWN_IMAGE", /* LOWDOWN_IMAGE */ + "LOWDOWN_LINEBREAK", /* LOWDOWN_LINEBREAK */ + "LOWDOWN_LINK", /* LOWDOWN_LINK */ + "LOWDOWN_TRIPLE_EMPHASIS", /* LOWDOWN_TRIPLE_EMPHASIS */ + "LOWDOWN_STRIKETHROUGH", /* LOWDOWN_STRIKETHROUGH */ + "LOWDOWN_SUPERSCRIPT", /* LOWDOWN_SUPERSCRIPT */ + "LOWDOWN_FOOTNOTE", /* LOWDOWN_FOOTNOTE */ + "LOWDOWN_MATH_BLOCK", /* LOWDOWN_MATH_BLOCK */ + "LOWDOWN_RAW_HTML", /* LOWDOWN_RAW_HTML */ + "LOWDOWN_ENTITY", /* LOWDOWN_ENTITY */ + "LOWDOWN_NORMAL_TEXT", /* LOWDOWN_NORMAL_TEXT */ + "LOWDOWN_DOC_HEADER", /* LOWDOWN_DOC_HEADER */ + "LOWDOWN_META", /* LOWDOWN_META */ +}; + +static int +rndr_indent(struct lowdown_buf *ob, size_t indent) +{ + size_t i; + + for (i = 0; i < indent; i++) + if (!HBUF_PUTSL(ob, " ")) + return 0; + return 1; +} + +static int +rndr_short(struct lowdown_buf *ob, const struct lowdown_buf *b) +{ + size_t i; + + for (i = 0; i < 20 && i < b->size; i++) + if (b->data[i] == '\n') { + if (!HBUF_PUTSL(ob, "\\n")) + return 0; + } else if (b->data[i] == '\t') { + if (!HBUF_PUTSL(ob, "\\t")) + return 0; + } else if (iscntrl((unsigned char)b->data[i])) { + if (!hbuf_putc(ob, '?')) + return 0; + } else { + if (!hbuf_putc(ob, b->data[i])) + return 0; + } + + if (i < b->size && !HBUF_PUTSL(ob, "...")) + return 0; + return 1; +} + +static int +rndr(struct lowdown_buf *ob, + const struct lowdown_node *root, size_t indent) +{ + const struct lowdown_node *n; + struct lowdown_buf *tmp; + + if (!rndr_indent(ob, indent)) + return 0; + if (root->chng == LOWDOWN_CHNG_INSERT && + !HBUF_PUTSL(ob, "INSERT: ")) + return 0; + if (root->chng == LOWDOWN_CHNG_DELETE && + !HBUF_PUTSL(ob, "DELETE: ")) + return 0; + if (!hbuf_printf(ob, "%s (%zu)", names[root->type], root->id)) + return 0; + if (!hbuf_putc(ob, '\n')) + return 0; + + switch (root->type) { + case LOWDOWN_PARAGRAPH: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "lines: %zu, blank-after: %d\n", + root->rndr_paragraph.lines, + root->rndr_paragraph.beoln)) + return 0; + break; + case LOWDOWN_IMAGE: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "source: ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.link)) + return 0; + if (root->rndr_image.dims.size) { + if (!HBUF_PUTSL(ob, "(")) + return 0; + if (!rndr_short(ob, &root->rndr_image.dims)) + return 0; + if (!HBUF_PUTSL(ob, ")")) + return 0; + } + if (!HBUF_PUTSL(ob, "\n")) + return 0; + if (root->rndr_image.title.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "title: ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.title)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.alt.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "alt: ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.alt)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.dims.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "dims: ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.dims)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.attr_width.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "width (extended): ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.attr_width)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.attr_height.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "height (extended): ")) + return 0; + if (!rndr_short(ob, &root->rndr_image.attr_height)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.attr_cls.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "class: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_image.attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_image.attr_id.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "id: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_image.attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + break; + case LOWDOWN_HEADER: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "level: %zu\n", + root->rndr_header.level)) + return 0; + if (root->rndr_header.attr_cls.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "class: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_header.attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_header.attr_id.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "id: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_header.attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + break; + case LOWDOWN_RAW_HTML: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "data: %zu Bytes: ", + root->rndr_raw_html.text.size)) + return 0; + if (!rndr_short(ob, &root->rndr_raw_html.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_BLOCKHTML: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "data: %zu Bytes: ", + root->rndr_blockhtml.text.size)) + return 0; + if (!rndr_short(ob, &root->rndr_blockhtml.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_BLOCKCODE: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "data: %zu Bytes: ", + root->rndr_blockcode.text.size)) + return 0; + if (!rndr_short(ob, &root->rndr_blockcode.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_DEFINITION: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "scope: %s\n", + HLIST_FL_BLOCK & root->rndr_definition.flags ? + "block" : "span")) + return 0; + break; + case LOWDOWN_TABLE_BLOCK: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "columns: %zu\n", + root->rndr_table.columns)) + return 0; + break; + case LOWDOWN_TABLE_CELL: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "current: %zu\n", + root->rndr_table_cell.col)) + return 0; + break; + case LOWDOWN_LISTITEM: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "scope: %s\n", + (root->rndr_listitem.flags & HLIST_FL_BLOCK) ? + "block" : "span")) + return 0; + if (!(root->rndr_listitem.flags & + (HLIST_FL_CHECKED | HLIST_FL_UNCHECKED))) + break; + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "check status: %s\n", + (root->rndr_listitem.flags & HLIST_FL_CHECKED) ? + "checked" : "unchecked")) + return 0; + break; + case LOWDOWN_LIST: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "list type: %s\n", + HLIST_FL_ORDERED & root->rndr_list.flags ? + "ordered" : "unordered")) + return 0; + break; + case LOWDOWN_META: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "key: ")) + return 0; + if (!rndr_short(ob, &root->rndr_meta.key)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_MATH_BLOCK: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "blockmode: %s\n", + root->rndr_math.blockmode ? + "block" : "inline")) + return 0; + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "data: %zu Bytes: ", + root->rndr_math.text.size)) + return 0; + if (!rndr_short(ob, &root->rndr_math.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_ENTITY: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "value: ")) + return 0; + if (!rndr_short(ob, &root->rndr_entity.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + case LOWDOWN_LINK_AUTO: + if (root->rndr_autolink.link.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "link: ")) + return 0; + if (!rndr_short(ob, &root->rndr_autolink.link)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + break; + case LOWDOWN_LINK: + if (root->rndr_link.title.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "title: ")) + return 0; + if (!rndr_short(ob, &root->rndr_link.title)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_link.link.size) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "link: ")) + return 0; + if (!rndr_short(ob, &root->rndr_link.link)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_link.attr_cls.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "class: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_link.attr_cls)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + if (root->rndr_link.attr_id.size > 0) { + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!HBUF_PUTSL(ob, "id: ")) + return 0; + if (!hbuf_putb(ob, &root->rndr_link.attr_id)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + } + break; + case LOWDOWN_NORMAL_TEXT: + if (!rndr_indent(ob, indent + 1)) + return 0; + if (!hbuf_printf(ob, "data: %zu Bytes: ", + root->rndr_normal_text.text.size)) + return 0; + if (!rndr_short(ob, &root->rndr_normal_text.text)) + return 0; + if (!HBUF_PUTSL(ob, "\n")) + return 0; + break; + default: + break; + } + + if ((tmp = hbuf_new(64)) == NULL) + return 0; + + TAILQ_FOREACH(n, &root->children, entries) + if (!rndr(tmp, n, indent + 1)) { + hbuf_free(tmp); + return 0; + } + + hbuf_putb(ob, tmp); + hbuf_free(tmp); + return 1; +} + +int +lowdown_tree_rndr(struct lowdown_buf *ob, + const struct lowdown_node *root) +{ + + return rndr(ob, root, 0); +} + diff --git a/tree.o b/tree.o Binary files differ. diff --git a/util.c b/util.c @@ -0,0 +1,143 @@ +/* $Id$ */ +/* + * Copyright (c) 2017 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#if HAVE_SYS_QUEUE +# include <sys/queue.h> +#endif + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "lowdown.h" +#include "extern.h" + +/* + * Convert the "$Date$" string to a simple ISO date in a + * static buffer. + * Returns NULL if the string is malformed at all or the date otherwise. + */ +char * +rcsdate2str(const char *v) +{ + unsigned int y, m, d, h, min, s; + int rc; + static char buf[32]; + + if (v == NULL || strlen(v) < 12) + return NULL; + + /* Escaped dollar sign. */ + + if ('\\' == v[0]) + v++; + + /* Date and perforce datetime. */ + + if (strncmp(v, "$Date: ", 7) == 0) + v += 7; + else if (strncmp(v, "$DateTime: ", 11) == 0) + v += 11; + else + return NULL; + + /* + * Try for long and short format dates. + * Use regular forward slash and HTML escapes. + */ + + rc = sscanf(v, "%u/%u/%u %u:%u:%u", + &y, &m, &d, &h, &min, &s); + if (rc != 6) + rc = sscanf(v, "%u/%u/%u %u:%u:%u", + &y, &m, &d, &h, &min, &s); + if (rc != 6) { + rc = sscanf(v, "%u/%u/%u", &y, &m, &d); + if (rc != 3) + rc = sscanf(v, "%u/%u/%u", &y, &m, &d); + if (rc != 3) + return NULL; + } + + snprintf(buf, sizeof(buf), "%u-%.2u-%.2u", y, m, d); + return buf; +} + +/* + * Convert the "$Author$" string to just the author in a static + * buffer of a fixed length. + * Returns NULL if the string is malformed (too long, too short, etc.) + * at all or the author name otherwise. + */ +char * +rcsauthor2str(const char *v) +{ + static char buf[1024]; + size_t sz; + + if (v == NULL || strlen(v) < 12) + return NULL; + + /* Check for LaTeX. */ + + if ('\\' == v[0]) + v++; + + if (strncmp(v, "$Author: ", 9)) + return NULL; + if ((sz = strlcpy(buf, v + 9, sizeof(buf))) >= sizeof(buf)) + return NULL; + + /* Strip end (with LaTeX). */ + + if (sz && buf[sz - 1] == '$') { + buf[--sz] = '\0'; + if (sz && buf[sz - 1] == '\\') + buf[--sz] = '\0'; + if (sz && buf[sz - 1] == ' ') + buf[--sz] = '\0'; + } + + return buf; +} + +/* + * Convert an ISO date (y/m/d or y-m-d) to a canonical form. + * Returns NULL if the string is malformed at all or the date otherwise. + */ +char * +date2str(const char *v) +{ + unsigned int y, m, d; + int rc; + static char buf[32]; + + if (NULL == v) + return(NULL); + + rc = sscanf(v, "%u/%u/%u", &y, &m, &d); + if (3 != rc) { + rc = sscanf(v, "%u-%u-%u", &y, &m, &d); + if (3 != rc) + return(NULL); + } + + snprintf(buf, sizeof(buf), "%u-%.2u-%.2u", y, m, d); + return(buf); +} + diff --git a/util.o b/util.o Binary files differ.