|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- -*- coding: raw-text-unix -*-
-- escripts.lua - convert ascii files (e-scripts) to html.
-- This is part of blogme3.
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2013jan25
-- <http://anggtwu.net/blogme3/escripts.lua>
-- <http://anggtwu.net/blogme3/escripts.lua.html>
-- License: GPL.
--
-- BUG: UTF-8 characters are not converted correctly (yet).
--
--
-- Here's a very brief explanation of what this file does:
-- When we run this,
--
-- lua51 blogme3.lua -o foo.html -a2html foo
--
-- blogme3.lua processes the options according to the entries in the
-- table "_O"; "-o foo.html" sets the output file, and "-a2html foo"
-- says to read the file "foo", process it with the default htmlizer,
-- add an HTML header and a footer, and write the result to the output
-- file. The default htmlizer is the function "htmlizelines", defined
-- below. It applies "htmlizeline" on each line, and "htmlizeline"
-- handles these cases:
--
-- * Glyphs, non-ascii characters, and characters that need to be
-- sgml-quoted. Examples: "&<>*«»áº×".
-- See: (find-blogme3 "charset.lua")
--
-- * Sexp hyperlinks. See:
-- (find-eevarticlesection "hyperlinks")
-- (find-eevarticlesection "shorter-hyperlinks")
-- (find-eevarticlesection "e-scripts")
-- (find-blogme3 "elisp.lua")
-- (find-blogme3 "angglisp.lua")
--
-- * Anchors, and "to" links pointing to anchors. An example:
-- «here» (to "there")
-- «there» (to "here")
-- See: (find-eevarticlesection "anchors")
--
-- * Urls, and a few special strings: |&, $S/, <<'%%%'.
-- (find-blogme3 "options.lua" "basic-options")
-- (find-blogme3 "options.lua" "htmlizefile")
-- (find-blogme3 "escripts.lua" "htmlizelines")
-- (find-blogme3grep "grep -nH -e htmlizer *")
-- (find-blogme3grep "grep -nH -e htmlizefile *")
-- (find-es "lua5" "sheadsymbol-roberto")
-- (find-angg "LUA/lua50init.lua")
-- (find-angg "LUA/lua50init.lua" "loadlpeg")
-- (find-lpegw3m "doc.html" "function anywhere (p)")
-- (find-blogmefile "blogme2-outer.lua" "entities and quoting (Q)")
-- (find-anggfile "TH/Generate")
-- (find-anggfile "TH/Generate" "txt2html")
-- (ascstr 33 126)
-- !"#$%&'()*+,-./ :;<=>?@ [\]^_` {|}~
-- 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz
-- «.anchor» (to "anchor")
-- «.assexplink» (to "assexplink")
-- «.htmlizeline» (to "htmlizeline")
-- «.htmlizelines» (to "htmlizelines")
DBG = DBG or function () end
-- (find-blogme3file "anggdefs.lua" "Q_table =")
Q_re = "([&<>])"
Q_table = { ["&"]="&", ["<"]="<", [">"]=">" }
Q = function (text) return translatechars(text, Q_re, Q_table) end
--------[ Basics ]--------
loadlpeg()
Eos = lpeg.P(-1)
Eol = #(lpeg.S "\n") + Eos
Pos = lpeg.Cp()
AlphaNumeric = lpeg.R("AZ", "az", "09")
-- OptU8 = lpeg.P("\195")^-1
--------[ Anchor ]--------
-- «anchor» (to ".anchor")
AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~")
Anchor = "\171" * lpeg.C(AnchorChar^1) * "\187"
-- Anchor = OptU8*"\171" * lpeg.C(AnchorChar^1) * OptU8*"\187"
--------[ Url ]--------
UrlProtocol = lpeg.P("https") + lpeg.P("http") + lpeg.P("ftp")
UrlDomainChar = lpeg.R("az", "09") + lpeg.S("-")
UrlDomain = UrlDomainChar^1 * ("." * UrlDomainChar^1)^0
UrlPathChar = AlphaNumeric + lpeg.S("!#$%&()*+,-./:;=?@[]^_{|}~")
UrlPath = UrlPathChar^0
Url = UrlProtocol * "://" * UrlDomain * "/" * UrlPath
--------[ SString, SNumber, SSymbol ]--------
SStringChar = 1 - lpeg.S "\"\n"
SStringBsl = "\\" * (1 - lpeg.S "\n")
SString = "\"" * SStringChar^0 * "\""
SNumber = lpeg.P"-"^-1 * lpeg.R"09"^1
SNonSymbolChar = lpeg.S "\"#'(),.[\\]`"
SSymbolChar = lpeg.R "!~" - SNonSymbolChar - lpeg.S "{}"
SSymbol = SSymbolChar^1
--------[ SHeadSymbol, SSexpLink ]--------
-- SHeadSymbol is like SSymbol but more strict.
-- Its logic is: if SSymbol matches at pos, then ckeck if the symbol
-- (as a string) has an entry in the table _E; if yes, then return its
-- ending position. lpeg.P is used to convert that function - that has
-- signature subj,pos|->endposornil - into a pattern.
--
-- The table _E contains the "code to htmlize elisp hyperlinks"...
-- Its entries - one for each htmlizable elisp hyperlink function -
-- are functions with signatures like
-- "all,funname,qarg1,qarg2|->html", like his:
--
-- _E["to"] = function (all, funname, qarg1, qarg2)
-- return href("#"..dequote(qarg1), Q(all))
-- end
--
-- SexpLink matches sexps that are lists formed by a SHeadSymbol
-- followed by zero or more atoms. This covers most of the kinds of
-- hyperlink sexps that I want like to htmlize, but not all. For
-- example, this is not recognized:
-- (find-iconbookpage (+ 22 143))
_E = _E or {}
SHeadSymbol = lpeg.P(function (subj, pos)
local e = lpeg.match(SSymbol, subj, pos)
local symbol = e and string.sub(subj, pos, e - 1)
return symbol and _E[symbol] and e
end)
SAtom = SString + SNumber + SSymbol
SSpace = lpeg.S " \t"
SSpaces = SSpace^1
SexpLink = "(" * lpeg.C(SHeadSymbol) * (SSpaces * lpeg.C(SAtom))^0 * ")"
--------[ Translators ]--------
dequote = function (sstr) return sstr and string.sub(sstr, 2, -2) end
href = function (target, text)
if target then return format("<a href=\"%s\">%s</a>", target, text) end
return text
end
-- (find-blogme3 "anggdefs.lua" "asurl_hack")
-- (find-THgrep "grep -nH -e gsub *.blogme | grep angg")
asurl_hack = asurl_hack or id
asurl = function (url)
return format("<a href=\"%s\">%s</a>", asurl_hack(url), url)
end
asanchor = function (anchor)
return format("<a name=\"%s\"><font color=\"green\"><i>«</i>" ..
"%s<i>»</i></font></a>", anchor, anchor)
end
-- «assexplink» (to ".assexplink")
-- Used by the lpeg pattern `SexpLink' above.
-- This is very old - from 2007, I think.
-- The table _E is defined here:
-- (find-blogme3 "angglisp.lua")
--
assexplink = function (all, funname, qarg1, qarg2)
if _E[funname] then
return _E[funname](all, funname, qarg1, qarg2)
else
return Q(all)
end
end
UrlT = lpeg.C(Url) / asurl
SexpLinkT = lpeg.C(SexpLink) / assexplink
AnchorT = Anchor / asanchor
--------[ Some extra translators ]--------
PipeAmpT = lpeg.P("|&") /
'<a href="http://anggtwu.net/e/bash.e.html#pipe_stdout_stderr">|&</a>'
-- PipeSnarfDirT = lpeg.P("$S/") /
-- '<a href="http://anggtwu.net/eev-article.html#local-copies">$S/</a>'
PipeSnarfDirT = lpeg.P("$S/") /
'<a href="http://anggtwu.net/eev-intros/find-psne-intro.html">$S/</a>'
HereDocT = lpeg.P("<<'%%%'") /
"<a href=\"http://en.wikipedia.org/wiki/Here-document\"><<'%%%%%%'</a>"
--------[ Parsing and translating lines ]--------
SpecialT = UrlT + AnchorT + SexpLinkT * Eol
SpecialT = UrlT + AnchorT + PipeAmpT + PipeSnarfDirT + HereDocT + SexpLinkT * Eol
EtcChar = 1 - lpeg.S "\n"
EtcSpecial = Pos * lpeg.P {
[1] = Pos * SpecialT
+ EtcChar * lpeg.V(1)
}
EtcSpecials = EtcSpecial^0
EtcSpecialsEtc = EtcSpecials * Pos * EtcChar^0 * Pos
-- «htmlizeline» (to ".htmlizeline")
htmlizeline = function (str)
DBG("e208"); return lpeg.match(lpeg.Ct(EtcSpecialsEtc) / sbeconcat(str, Q), str)
end
htmlizelines = function (bigstr)
DBG("e210"); return table.concat(map(htmlizeline, splitlines(bigstr)), "\n")
end
-- «htmlizelines» (to ".htmlizelines")
-- 2008jan03: new definition:
htmlizelines = function (bigstr) DBG("e216"); return (bigstr:gsub("[^\n]+", htmlizeline)) end
--------[ Functions for tests: M and M2 ]--------
esubj = [[(find-angg ".emacs" "foo")]]
esubj2 = [[(find-ongg ".emacs" "foo")]]
M = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj)); print() end
M2 = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj2)); print() end
--[[
-- Tests
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "escripts.lua"
str = "\195\171Hello\195\187"
str = "\171Hello\187"
= AnchorT:match(str)
--]]