|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- elisp.lua: parse and interpret sexp hyperlinks.
-- This file:
-- http://angg.twu.net/blogme4/elisp.lua.html
-- http://angg.twu.net/blogme4/elisp.lua
-- (find-blogme4 "elisp.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2011aug01
-- License: GPL3
--
-- The docs below are a mess!!!
--
-- Let me start by supposing that you know what a sexp is. Then you
-- know what a "sexp one-liner" is, and I will say that a line "has an
-- elink" if it is made of some "prefix characters" (possibly zero),
-- then a sexp (a list), then optional spaces.
-- The "sexp hyperlinks" used by eev are elinks.
-- See: (find-eevarticlesection "hyperlinks")
-- http://en.wikipedia.org/wiki/S-expression
--
-- One of the hardest parts of htmlzing the material in
-- http://angg.twu.net/ is that many of the files there require
-- htmlizing "sexp hyperlinks", like this one:
--
-- (find-blogme4 "def.lua" "undollar")
--
-- the htmlization makes the "find-blogme4" into a link to a section
-- of the documentation about eev, and makes the two last chars of the
-- sexp, '")', behave somehow like what the sexp would do when run in
-- Emacs...
-- (find-blogme4 "hyperlinks")
-- Here is a rough sketch of what we need to do on each line that may
-- end with a sexp ("rough sketch" means "the details are below,
-- scattered around")... We need to:
--
-- 1) detect whether that line ends with a sexp,
-- 2) split each line that ends with a sexp into what comes before
-- the sexp (we call that the "pre"), the hyperlink itself (the
-- "sexp") and the optional spaces after the sexp ("the spaces"),
-- 3) split the sexp into its "elements",
-- 4) check whether the first "element" (the "head") is a symbol,
-- 5) check whether the "head" has an entry in the table "ewords",
-- 6) if it has, we need to run sexp:sexphtml(), that usually:
-- 7) splits the sexp into an "opening parenthesis" (the "o"),
-- the "word" (the "w"), the "rest" ("r") and the "close"
-- (usually the two last chars - '")'),
-- 8) determines the "help url" that will be associated to the
-- "word" and the "target url" that will be associated to the
-- "close",
-- 9) compose "o", "w", "r", "c" and the help url and the target
-- url to build an htmlization of the sexp,
--
-- I found a nice hackish way to detect if a line "has an elink".
-- The algorithm is non-recursive, does not backtrack, runs very
-- quickly, and can be implemented in Lua using just string.gsub,
-- string.reverse and string.match. And it doesn't need Lpeg!...
--
-- The rough idea is:
-- 1) first simplify all literal strings - like "foo bar" - by:
-- 1a) replacing all backslash-char pairs by "__"s, and then
-- 1b) replacing all chars inside double-quotes by "_"s;
-- 2) then, starting from the right, use Lua's "%b" pattern to find
-- matching "()"s.
--
-- Part of the trick is that we use string.reverse judiciously at the
-- right points of the algorithm to perform pattern matches "starting
-- from the right". Also, we produce a "simplified string" and work on
-- it, but we keep the original string (that has the same length as
-- the simplified one), and after doing all the parsing and
-- discovering where the sexp and all its "elements" start and end we
-- go back to the original string.
--
-- Here's an example that illustrates how the algorithm works.
-- line = [[ # (foo "a") (bar "plic: \"ploc\"") ]]
--
-- skel = [[ # (foo "a") (bar "plic: __ploc__") ]]
-- leks = [[ # (foo "_") (bar "______________") ]]:reverse()
-- secaps = [[ ]]:reverse()
-- lekspxes = [[(bar "______________")]]:reverse()
-- sexpskel = [[(bar "______________")]]
-- erp = [[ # (foo "_") ]]:reverse()
--
-- pre = [[ # (foo "a") ]]
-- sexp = [[(bar "plic: \"ploc\"")]]
-- spaces = [[ ]]
-- 1 = {0=[[bar]], 2, 5},
-- 2 = {0=[["plic: \"ploc\""]], 6, 22}
--
-- o = [[(]]
-- w = [[bar]]
-- r = [[ "plic: \"ploc\"]]
-- c = [[")]]
require "eoo" -- (find-blogme4 "eoo.lua")
require "common" -- (find-blogme4 "common.lua")
Q = Q or id -- (find-blogme4 "anggdefs.lua" "Q")
-- Some utility functions
notdir = function (str) return str:match "[^/]$" end -- "" is a directory
addfileext = function (fname, ext)
if fname and ext and notdir(fname) then return fname..ext end
return fname
end
addanchor = function (url, anchor)
if url and anchor then return url.."#"..anchor end
return url
end
addextanchor = function (fname, ext, anchor)
return addanchor(addfileext(fname, ext), anchor)
end
href_ = function (url, text)
if url then return "<a href=\""..url.."\">"..text.."</a>" end
return text
end
buildurl_ = function (base, offset, ext, anchor)
if not offset then return end
local url = base..offset
if notdir(url) and ext then url = url..ext end
if anchor then url = url.."#"..anchor end
return url
end
Sexpline = Class {
type = "Sexpline",
__index = {
-- Two functions to split fields, calculating new fields.
-- This one splits "line" into "pre", "sexp", and "spaces",
-- and, as a bonus, it obtain the "elements" of the sexp
-- (stored in integer-indexed positions).
presexpspaces_ = function (sexpline)
local line = sexpline.line
local skel = line:gsub("\\.", "__")
local leks = skel:reverse()
local f = function (s) return '"'..("_"):rep(#s)..'"' end
local leks = leks:gsub('"([^"]-)"', f)
local secaps, lekspxes, erp = leks:match("^([ \t]*)(%b)()(.*)")
if not erp then return end
local pre = line:sub(1, #erp)
local sexpskel = lekspxes:reverse()
local sexp = line:sub(1+#pre, #pre+#sexpskel)
local spaces = secaps:reverse()
-- bonus: split the "sexp" into its "elements" and store them
-- as tables in integer-indexed fields in the sexpline structure.
local n, pos = 0, 2
local parseelement = function (pat)
local s, e = sexpskel:match(pat, pos)
if s then
n = n + 1
sexpline[n] = {s, e, [0]=sexp:sub(s, e-1)}
pos = e
return true
end
end
while parseelement "^[ \t]*()[^ \t\"()]+()" -- symbol or number
or parseelement "^[ \t]*()\"_*\"()" -- string
or parseelement "^[ \t]*()%b()()" do -- list
end
sexpline.pre = pre
sexpline.sexp = sexp
sexpline.spaces = spaces
return true
end,
-- This one splits the "sexp" field into "o", "w", "r", "c"
-- (for the standard way of htmlizing sexp hyperlinks).
owrc_ = function (sexpline)
if sexpline.sexp then
local pat = "^(%()([-!$%&*+,/:<=>?@^_0-9A-Za-z]+)(.-)(\"?%))$"
local o, w, r, c = sexpline.sexp:match(pat) -- open, word, rest, close
sexpline.o = o
sexpline.w = w
sexpline.r = r
sexpline.c = c
return true
end
end,
-- Notice the logical gap here! "sexphtml__" uses the fields
-- "helpurl" and "targeturl", that are set by "sexpurls_" (below).
-- About specials (like images): they're not supported yet!
sexphtml__ = function (sexpline)
if sexpline.helpurl or sexpline.targeturl then
sexpline.sexphtml = sexpline.o ..
href_(sexpline.helpurl, Q(sexpline.w)) ..
Q(sexpline.r) ..
href_(sexpline.targeturl, sexpline.c )
return true
end
end,
linehtml__ = function (sexpline, htmlizer)
htmlizer = htmlizer or Q
if sexpline.sexphtml then
sexpline.linehtml = htmlizer(sexpline.pre) ..
sexpline.sexphtml ..
sexpline.spaces
else
sexpline.linehtml = htmlizer(sexpline.line)
end
return sexpline
end,
--
sexphtml_ = function (sexpline, htmlizer)
return sexpline:presexpspaces_()
and sexpline:eword_()
and sexpline:sexpurls_() -- defined below
and sexpline:owrc_()
and sexpline:sexphtml__(htmlizer)
end,
linehtml_ = function (sexpline, htmlizer)
sexpline:sexphtml_()
sexpline:linehtml__()
return sexpline
end,
--
-- Two functions to extract the "elements" of the sexp, as strings.
-- Like this, but 1-based and typed: (find-elnode "List Elements" "nth")
symbol = function (sexpline, n)
return sexpline[n] and sexpline[n][0]:match"^([^()\"].*)$"
end,
string = function (sexpline, n)
return sexpline[n] and sexpline[n][0]:match"^\"(.*)\"$"
end,
--
eword_ = function (sexpline)
sexpline.word = sexpline:symbol(1)
sexpline.eword = ewords[sexpline.word]
return sexpline.eword
end,
sexpurls_ = function (sexpline)
local eword = sexpline.eword
if eword then
local a, b = sexpline:string(2), sexpline:string(3)
-- no specials yet
sexpline.helpurl = eword:helpurl_()
sexpline.targeturl = eword:targeturl_(a, b)
-- return sexpline.helpurl, sexpline.targeturl
return true
end
end,
},
}
Eword = Class {
type = "Eword",
__index = {
helpurl_ = function (eword) return eword.help end,
targeturl_ = function (eword, a, b)
return eword.base and eword:f(a, b) end,
f = function (eword, a, b)
return addextanchor(a and eword.base..a, eword.ext, b) end,
},
}
ewords = {}
htmlizeline_ = function (line, htmlizer)
return (Sexpline {line=line}):linehtml_(htmlizer)
end
htmlizeline = function (line, htmlizer)
return (Sexpline {line=line}):linehtml_(htmlizer).linehtml
end
htmlizelines = function (bigstr, htmlizer)
local f = function (line) return htmlizeline(line, htmlizer) end
return bigstr:gsub("[^\n]*", f)
end
--
-- __ _ _ __ __ _ __ _
-- / _` | '_ \ / _` |/ _` |
-- | (_| | | | | (_| | (_| |
-- \__,_|_| |_|\__, |\__, |
-- |___/ |___/
targeturl_base_a = function (eword, a, b)
return a and eword.base..a -- use just the a
end
targeturl_to = function (eword, a, b)
return a and "#"..a
end
eevarticle = eevarticle or "http://angg.twu.net/eev-article.html"
--[[
ewords["to"] = Eword {
help = eevarticle.."#anchors",
-- base = "",
-- targeturl = function (eword, sexp)
-- local anchor = sexp:string(2)
-- if anchor then return "#"..anchor end
-- end,
}
--]]
Ew = function (ew)
ew.help = ew.help or eevarticle.."#shorter-hyperlinks"
return Eword(ew)
end
Ewa = function (ew)
ew.targeturl = targeturl_base_a
return Ew(ew)
end
ewords["to"] = Ew {
help = eevarticle.."#anchors",
targeturl_ = targeturl_to,
}
code_c_d_angg = function (c, d) code_c_d_remote(c, pathto(d)) end
code_c_d_remote = function (c, d)
ewords["find-"..c.."file"] = Ewa {base = d}
ewords["find-"..c] = Ew {base = d, ext = ".html"}
ewords["find-"..c.."w3m"] = Ewa {base = d}
end
code_c_d_angg("angg", "") -- (find-angg "blogme4/")
code_c_d_angg("es", "e/") -- (find-es "lua5")
code_c_d_angg("dednat4", "dednat4/") -- (find-dednat4 "")
code_c_d_angg("dn4", "dednat4/")
code_c_d_angg("dn4ex", "dednat4/examples/")
code_c_d_angg("dn5", "dednat5/")
code_c_d_angg("blogme", "blogme/")
code_c_d_angg("blogme3", "blogme3/")
code_c_d_angg("blogme4", "blogme4/")
code_c_d_angg("eev", "eev-current/")
code_c_d_angg("flua", "flua/")
code_c_d_angg("rubyforth", "rubyforth/")
code_c_d_angg("vtutil", "vtutil/")
code_c_d_angg("vtutil4", "vtutil4/")
code_c_d_angg("RETRO", "RETRO/")
ewords["find-es"].ext = ".e.html"
-- dump-to: tests
-- (find-blogme4 "angglisp.lua")
-- _____ _ __ _ _
-- |_ _|__ ___| |_ / _|_ _ _ __ ___| |_(_) ___ _ __ ___
-- | |/ _ \/ __| __| | |_| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
-- | | __/\__ \ |_ | _| |_| | | | | (__| |_| | (_) | | | \__ \
-- |_|\___||___/\__| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
--
elinksplittest1 = function (line)
local pre, sexp, spaces, elements = elinksplit_(line)
if not pre then return end
local chars = {}
local absrange = function (s, e, char) for i=s,e-1 do chars[i]=char end end
local range = function (s, e, char) absrange(#pre+s, #pre+e, char) end
absrange(1, 1+#pre, "p")
absrange(1+#pre, 1+#pre+#sexp, "-")
absrange(1+#pre+#sexp, 1+#pre+#sexp+#spaces, "s")
for i,elt in ipairs(elements) do range(elt[1], elt[2], i) end
return table.concat(chars)
end
elinksplittest = function (bigstr)
for _,line in ipairs(splitlines(bigstr)) do
print(" -- [["..line.."]]")
local ranges = elinksplittest1(line)
if ranges then print(" -- "..ranges) end
end
end
transpose = function (T)
local TT = {}
for k,v in pairs(T) do TT[v] = k end
return TT
end
sortedpairs = function (T, K)
local P = {}
local add = function (k) table.insert(P, {key=k, val=T[k]}) end
K = (type(K) == "string" and split(K)) or K or {}
local KT = transpose(K)
for _,k in ipairs(K) do add(k) end
for _,pair in ipairs(tos_sorted_pairs(T)) do
if not KT[pair.key] then add(pair.key) end
end
return P
end
sexppairs = function (sexp)
return sortedpairs(sexp, [[ line pre sexp spaces word eword o w r c
helpurl targeturl sexphtml linehtml ]])
end
isexppairs_ = function (sexp) return ipairs(sexppairs(sexp)) end
isexppairs = function (line) return ipairs(sexppairs(htmlizeline_(line))) end
isp = function (line)
for _,kv in isexppairs(line) do
if kv.val then print(kv.key.." = "..tos(kv.val)) end
end
end
-- _____ _
-- |_ _|__ ___| |_ ___
-- | |/ _ \/ __| __/ __|
-- | | __/\__ \ |_\__ \
-- |_|\___||___/\__|___/
--
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
eevarticle = "eev-article.html"
dofile "elisp.lua"
li = [[ foo (to "plic") ]]
PP(htmlizeline_(li))
se = Sexpline {line = li}
PP(se:presexpspaces_()); PP(se)
PP(se:eword_() ); PP(se)
PP(se:sexpurls_() ); PP(se)
PP(se:owrc_() ); PP(se)
PP(se:sexphtml_(htmlizer)); PP(se)
isp [[ foo (to "plic") ]]
for _,kv in isexppairs(" foo (bar plic) ") do
PP(kv)
end
for _,kv in isexppairs([[ foo (to "bar") ]]) do
if kv.val then print(kv.key.." = "..tos(kv.val)) end
end
PP(elinksplit " foo (bar plic) ") --> {
-- line =" foo (bar plic) ",
-- pre =" foo " ,
-- sexp = "(bar plic)" ,
-- spaces= " ",
-- 1={0="bar", 2, 5},
-- 2={0="plic", 6, 10},
-- }
elinksplittest [[
For lines with elinks, like the one below,
# (foo "a") (bar "plic: \"ploc\"")
the test function shows a "range dump".
]]
--> [[ For lines with elinks, like the one below,]]
-- [[ # (foo "a") (bar "plic: \"ploc\"")]]
-- pppppppppppppp-111-2222222222222222-
-- [[ the test function shows a "range dump".]]
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
require "elisp"
for li in io.lines("build.lua") do
local sexp = elinksplit(li)
local symbol = sexp:symbol(1)
if symbol then
local a, b = sexp:string(2), sexp:string(3)
print(sexp.sexp)
PP(a, b)
end
end
-- (find-fline "build.lua")
-- for _,li in ipairs(splitlines(readfile "dednat5/README")) do tt(li) end
-- map(tt, splitlines(readfile "dednat5/README"))
-- tt [[ # (find-fline "foo") (find-fline "bar") ]]
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "elisp.lua"
el = elinksplit [[# (find-angg "blogme3/elisp.lua")]]
PP(el)
PP(el:sexpsplit())
el = elinksplit [[# (find-angg "blogme3/")]]
print(el:sexphtml())
el = elinksplit [[# (find-angg "blogme3/elisp.lua")]]
print(el:sexphtml())
el = elinksplit [[# (find-angg "blogme3/elisp.lua" "foo")]]
print(el:sexphtml())
el = elinksplit [[# (find-image "foo.jpg")]]
print(el:sexphtml())
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
require "elisp"
s = Sexpline { line = [[ # (to "targ") ]] }
= s:get_linehtml()
PP(s)
mykeys = split [[ line pre sexp spaces word eword o w r c
helpurl targeturl
sexphtml linehtml ]]
mykeyst = transpose(mykeys)
for _,k in ipairs(mykeys) do
print(" "..k.."="..tos(s[k])..",")
end
for _,p in ipairs(tos_sorted_pairs(s)) do
if not mykeyst[p.key] then
print(" "..p.key.."="..tos(p.val)..",")
end
end
PP(s.eword)
PP(s.eword:get_targeturl("targ", "b"))
line=" # (to \"targ\") ",
pre =" # ",
sexp= "(to \"targ\")",
spaces= " ",
1= {0="to", 1=2, 2=4},
2= {0="\"targ\"", 1=5, 2=11},
word= "to",
eword={"help"="http://angg.twu.net/eev-article.html#anchors", "targeturl"=<function: 0x8f45f78>},
helpurl="http://angg.twu.net/eev-article.html#anchors",
o="(",
w="to",
r=" \"targ",
c="\")",
sexphtml="(<a href=\"http://angg.twu.net/eev-article.html#anchors\">to</a> \"targ\")",
linehtml=" # (<a href=\"http://angg.twu.net/eev-article.html#anchors\">to</a> \"targ\") ",
--]==]
-- Local Variables:
-- coding: raw-text-unix
-- ee-anchor-format: "«%s»"
-- End: