|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- eval.lua: functions for parsing and evaluating blogme code.
-- This file:
-- http://angg.twu.net/blogme4/eval.lua.html
-- http://angg.twu.net/blogme4/eval.lua
-- (find-blogme4file "eval.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2011feb17
-- License: GPL3
--
-- «.control-flow» (to "control-flow")
-- «.parse_pattern» (to "parse_pattern")
-- «.tlongwords» (to "tlongwords")
-- «.qlongwords» (to "qlongwords")
-- «.tlongword_mapconcat» (to "tlongword_mapconcat")
-- «.longwords» (to "longwords")
-- «.readvword» (to "readvword")
-- «.readvrest» (to "readvrest")
-- «.readqword» (to "readqword")
-- «.readqrest» (to "readqrest")
-- «._A» (to "_A")
-- «.with_subj» (to "with_subj")
-- «.blogme_eval» (to "blogme_eval")
-- «.test-tlongword» (to "test-tlongword")
-- «.test-parse» (to "test-parse")
-- «.test-read» (to "test-read")
-- «control-flow» (to ".control-flow")
-- The difficult part of evaluation is dealing with the [] "blocks".
-- The execution flow involves this very nasty recursion:
--
-- blogme_eval(subj)
-- v
-- with_subj(subj, readverylongword)
-- v
-- readverylongword() <--------------------------\
-- v |
-- parse_verylongword(blogme_evalblock) |
-- v |
-- /-> tlongword_mapconcat(blogme_evalblock, T, "") |
-- | : |
-- | v |
-- | blogme_evalblock(s, e) |
-- | v |
-- | with_pos_endpos(s+1, e-1, blogme__eval) |
-- | v |
-- | blogme__eval() |
-- | v |
-- | _A[argp]() |
-- | : |
-- | v |
-- | readvvvrest() |
-- | | | \ |
-- | v v v |
-- | readvword() readvrest() |
-- | v | |
-- | readlongword() \------------------------/
-- | v
-- \-- parse_longword(blogme_evalblock)
-- «parse_pattern» (to ".parse_pattern")
-- Conventions for the "parse*" functions
-- ======================================
-- They operate on these three globals:
-- subj (read-only),
-- pos (advanced when parsing succeeds, unchanged when not),
-- oldpos (gets the old value of pos on success, garbage on failure),
-- result (discussed below; relevant on success, garbage on failure),
-- and they return true if they succed, nil if they fail.
-- They are all called either "parse__blah" or "parse_blah".
-- The "parse__blah" functions just store "oldpos" in "result".
-- The "parse_blah" functions are more complex, and they produce less
-- trivial "result"s. In most cases - mainly the cases implemented
-- with "parse_pattern" - their results are the substring of subj
-- between oldpos and pos; the cases involving "longwords" will be
-- described below.
--
-- The "blah" in the names of the "parse__blah" and "parse_blah"
-- function indicate what those functions try to parse, and
-- (sometimes) how the result is calculated from the parsed region:
-- "spaces": one or more whitespace chars
-- "block": a region enclosed in balanced "[]"s
-- "wchars": one or more "word chars", i.e., which are neither
-- whitespace nor "[]"s
-- "rchars": one or more "regular chars", i.e., those which are
-- not "[]"s.
--
parse_pattern = function (pat)
oldpos, result, pos = pos, subj:match(pat, pos)
if not pos then pos = oldpos else return true end
end
parse__spaces = function () return parse_pattern("^()[ \t\n]+()") end
parse_spaces = function () return parse_pattern("^([ \t\n]+)()") end
parse__wchars = function () return parse_pattern("^()[^ \t\n%[%]]+()") end
parse_wchars = function () return parse_pattern("^([^ \t\n%[%]]+)()") end
parse__rchars = function () return parse_pattern("^()[^%[%]]+()") end
parse_rchars = function () return parse_pattern("^([^%[%]]+)()") end
parse__block = function () return parse_pattern("^()%b[]()") end -- slow
parse_block = function () return parse_pattern("^(%b[])()") end -- slow
-- «tlongwords» (to ".tlongwords")
-- A "longword" is something of the form "(wchars | block)+", and a
-- "verylongword" is something of the form "(rchars | block)+". A
-- string like "a[+ 1 2][+ 3 4]b c[+ 5 6]d" is two longwords,
-- separated by a space, and is a single verylongword; verylongwords
-- end at "]"s or the end of the string, while longwords can also end
-- at whitespace.
-- Usually we want the "value" of a longword/verylongword; the "value"
-- is calculated by replacing each "[]" in the {very}longword by its
-- result - for example, the "value" of "a[+ 1 2][+ 3 4]b" is "a37b".
-- To calculate these "values" we need a nasty recursion, so here we
-- start with something simpler.
--
-- A "tlongword" is an array of strings and {begpos, endpos} pairs.
-- For example, the value "as a tlongword" of the string
-- -- 11111111112222
-- -- 12345678901234567890123
-- subj = "e[+ 1 [+ 2 3]]f[+ 4 5]"
-- is:
-- {"e", {2, 15}, "f", {16, 22}}
-- Note that:
-- subj:sub(2, 15) == "[+ 1 [+ 2 3]]"
--
parse__xcharsandblocks = function (parse_xchars)
local origpos = pos
local T = {} -- a table of strings and {begpos, endpos} pairs
local push = function (r) table.insert(T, r) end
while parse__block() do push({oldpos, pos}) end -- push pair
while parse_xchars() do
push(result) -- push string
while parse__block() do push({oldpos, pos}) end -- push pair
end
result = T -- the result is a table of strings and pairs
oldpos = origpos
return #T > 0
end
parse_tlongword = function ()
return parse__xcharsandblocks(parse_wchars)
end
parse_tverylongword = function ()
return parse__xcharsandblocks(parse_rchars)
end
-- «qlongwords» (to ".qlongwords")
-- Quoted longwords.
-- These are used by blogme words like "#" and "lua:".
parse__qlongword = function ()
if parse_tlongword() then
result = oldpos
return true
end
end
parse_qlongword = function ()
if parse_tlongword() then
result = subj:sub(oldpos, pos-1)
return true
end
end
parse__qverylongword = function ()
if pos < endpos then
oldpos, pos, result = pos, endpos, pos
return true
end
end
parse_qverylongword = function ()
if pos < endpos then
oldpos, pos, result = pos, endpos, subj:sub(pos, endpos-1)
return true
end
end
-- «tlongword_mapconcat» (to ".tlongword_mapconcat")
-- This is the function that we use to evaluate tlongwords.
-- The function "f" is usually "blogme_evalblock", so this ends up
-- being recursive.
-- See: (find-blogme4 "eval.lua" "blogme_eval")
-- (find-elnode "Mapping Functions" "Function: mapconcat")
-- In blogme3 I implemented a special behavior for tlongwords of
-- length 1 - I skipped the concatenation step. This should be done
-- here too, I think (for HLIST and friends?).
--
tlongword_mapconcat = function (f, T, sep)
if not T then return nil end
for i=1,#T do
if type(T[i]) == "table" then
T[i] = f(T[i][1], T[i][2]) or ""
end -- Note that *we change the table T*!!!
end
if #T == 1 then return T[1] end
return table.concat(T, sep)
end
-- «longwords» (to ".longwords")
-- These functions are similar to the ones that return tlongwords, but
-- here we run tlongword_mapconcat to return the "values" of these
-- tlongwords.
-- (I think that they destroy endpos... is that important?)
parse_longword = function (eval_block)
if not parse_tlongword() then return nil end
result = tlongword_mapconcat(eval_block, result, "")
return true
end
parse_verylongword = function (eval_block)
if not parse_tverylongword() then return nil end
result = tlongword_mapconcat(eval_block, result, "")
return true
end
-- «readvword» (to ".readvword")
-- «readvrest» (to ".readvrest")
-- The "read*" functions are high-level functions used to parse
-- arguments for blogme "calls"; they follow conventions that are
-- quite different from the "parse*" functions.
-- For example, the argparser for "HREF" has to parse a longword
-- and a verylongword; in the evaluation process for
-- "[HREF http://foo/ bar plic]"
-- we get:
-- HREF(readvvrest())
-- that becomes:
-- HREF("http://foo/", "bar plic")
-- but
-- "[HREF http://foo/ bar plic]"
-- becomes:
-- HREF("http://foo/", "")
--
-- Shorthands: a "vword" is the value of a longword; a "vrest" (used
-- to obtain the "rest of the arguments", as &rest in Lisp) is the
-- value of a verylongword. Additional "v"s in the prefix mean vwords;
-- for example, a "vvvrest" is a vword, then another vword, then a
-- vrest.
--
-- Remember that the "parse*" functions returned a flag, and stored
-- the "result" of the parsed region in the global variable "result".
-- The "read*" functions return their "results" straight away, and in
-- the case of failure (i.e., of parsing nothing) they return the
-- empty string. Also, they parse (and discard) spaces before each
-- vword and vrest.
readlongword = function ()
if parse_longword(blogme_evalblock) then return result end
return ""
end
readverylongword = function ()
if parse_verylongword(blogme_evalblock) then return result end
return ""
end
readvword = function () parse__spaces(); return readlongword() end
readvrest = function () parse__spaces(); return readverylongword() end
readvvrest = function () return readvword(), readvrest() end
readvvvrest = function () return readvword(), readvvrest() end
readvvvvrest = function () return readvword(), readvvvrest() end
readvvvvvrest = function () return readvword(), readvvvvrest() end
-- «readqword» (to ".readqword")
-- «readqrest» (to ".readqrest")
readqlongword = function ()
if parse_qlongword() then return result end
return ""
end
readqverylongword = function ()
if parse_qverylongword() then return result end
return ""
end
readqword = function () parse__spaces(); return readqlongword() end
readqrest = function () parse__spaces(); return readqverylongword() end
readqqrest = function () return readqword(), readqrest() end
readqqqrest = function () return readqword(), readqqrest() end
readqqqqrest = function () return readqword(), readqqqrest() end
readqqqqqrest = function () return readqword(), readqqqqrest() end
-- «_A» (to "._A")
-- (find-blogme3 "definers.lua" "_AA")
-- (find-blogme3 "brackets.lua" "readvword")
-- (find-blogme3 "anggdefs.lua" "basic-special-words" "lua:")
_A = _A or {}
_A["0"] = nop
_A["1"] = readvrest
_A["2"] = readvvrest
_A["3"] = readvvvrest
_A["4"] = readvvvvrest
_A["5"] = readvvvvvrest
_A["1Q"] = readqrest
_A["2Q"] = readqqrest
_A["3Q"] = readqqqrest
_A["4Q"] = readqqqqrest
_A["5Q"] = readqqqqqrest
-- «with_subj» (to ".with_subj")
with_pos_endpos = function (pos_, endpos_, f)
local backups = {pos=pos, endpos=endpos}
pos, endpos = pos_, endpos_
local r = f(pos, endpos)
pos, endpos = backups.pos, backups.endpos
return r
end
with_subj = function (subj_, f)
local backups = {subj=subj, pos=pos, endpos=endpos}
subj, pos, endpos = subj_, 1, #subj_+1
local r = f(pos, endpos)
subj, pos, endpos = backups.subj, backups.pos, backups.endpos
return r
end
-- «blogme_eval» (to ".blogme_eval")
-- "blogme__eval" (with a double "__") is a very low-level function,
-- that does the heavy work for both "blogme_evalblock" and
-- "blogme_eval". It takes as its "input" the global variables subj,
-- pos and endpos, parses a word, and then returns the result of
-- wordf(argpf()). Here is a typical example of how it runs. If:
-- subj = "ab [HREF http://foo/ bar] cd"
-- pos = 5
-- endpos = 25
-- then "word" is "HREF", "argp" is "2", and the result of argpf()
-- is the sequence "http://foo/", "bar"; then blogme__eval will
-- return the result of HREF("http://foo/", "bar").
-- Note that blogme__eval uses the table _B of blogmewords and
-- the table _A or argparser codes. See:
-- (find-blogme4 "def.lua" "BlogmeWord")
--
-- "blogme_evalblock" is used to run a blogme "call" inside "[]s" (as
-- in the example above).
--
-- "blogme_eval" is used to evaluate all the blogme calls inside a
-- string, replacing each one by its result; for example,
-- blogme_eval "ab [HREF http://foo/ bar] cd"
-- returns
-- "ab " .. HREF("http://foo/", "bar") .. " cd".
--
blogme__eval = function ()
parse__spaces()
if not parse_wchars() then error("Empty word!") end
local word = result
local bword = _B[word] or error("Unknown blogme word: "..word)
local wordf = bword.fun
local argp = bword.argp
local argpf = (type(argp) == "string" and (_A[argp] or _G[argp]))
or argp or error("Unknow arglist parser: "..argp)
return wordf(argpf())
end
blogme_evalblock = function (s, e)
return with_pos_endpos(s+1, e-1, blogme__eval) -- skip the '[' and the ']'
end
blogme_eval = function (subj_)
return with_subj(subj_, readverylongword)
end
-- dump-to: tests
--[===[
-- «test-tlongword» (to ".test-tlongword")
-- (find-blogme4 "argparsers.lua")
-- (find-blogme4 "brackets.lua")
-- (find-blogme4 "def.lua")
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
userocks()
ee_dofile "~/blogme4/blogme4-all.lua"
-- These tests should make clear what a tlongword is.
-- /-----------------------------------------------------------\
-- | /-----\ /-----------------\/-------\ |
str = "[HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]"
-- ^ ^ ^ ^ ^ ^^
-- 1 7 14 20 ^24 29 4748 ^56 ^61
angf = function (s, e) return "<"..subj:sub(s, e-1)..">" end
test = function (p, f, ...)
subj, pos = str, p
PP(f(...), oldpos, pos, result)
end
test(7, parse_tlongword)
--> <true> 7 22 {"http://", {14, 21}, "/"}
-- bool oldpos pos result
test(24, parse_tverylongword)
--> <true> 24 61 {"bar ", {29, 48}, {48, 57}, "plic"}
-- bool oldpos pos result
test(24, parse_tverylongword) -- reset "result"
PP(tlongword__mapconcat(angf, result, "..")) -- this changes "result"
--> "bar ..<[* [+ 1 2] [+ 3 4]]>..<[* 10 10]>..plic"
PP(result)
--> {1="bar ", 2="<[* [+ 1 2] [+ 3 4]]>", 3="<[* 10 10]>", 4="plic"}
test(7, parse_tlongword)
--> <true> 7 22 {1="http://", 2={1=14, 2=21}, 3="/"}
test(7, parse_longword, angf)
-- <true> 7 22 "http://<[+ 1 2]>/"
test(24, parse_verylongword, angf)
--> <true> 24 61 "bar <[* [+ 1 2] [+ 3 4]]><[* 10 10]>plic"
def [[ HREF 2 url,str "<a href=\"$url\">$str</a>" ]]
def [[ * 2 a,b a*b ]]
def [[ + 2 a,b a+b ]]
-- /-----------------------------------------------------------\
-- | /-----\ /-----------------\/-------\ |
str = "[HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]"
-- ^ ^ ^ ^ ^ ^^
-- 1 7 14 20 ^24 29 4748 ^56 ^61
subj = str
= blogme_evalblock(14, 20)
= blogme_evalblock(29, 48)
= blogme_evalblock( 1, 61)
= blogme_eval(str)
-- «test-parse» (to ".test-parse")
-- «test-read» (to ".test-read")
-- High-level tests.
-- "be" tests evaluating a string using blogme_eval,
-- "tp" tests a "parse_*" word,
-- "tr" tests a "read*" word.
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
-- (find-blogme4 "blogme4-all.lua")
ee_dofile "~/blogme4/blogme4-all.lua"
def [[ pp 1 body "<"..body..">", print("<"..body..">") ]]
def [[ pq 1Q body "<"..body..">", print("<"..body..">") ]]
comp = function (f, g) return function (...) return f(g(...)) end end
be = function (str) PP(blogme_eval(str)) end
tp = function (f) return function (subj) PP(with_subj(subj, f), result) end end
tr = function (f) return function (subj) with_subj(subj, comp(PP, f)) end end
tp(parse_spaces) [==[ ab cd ef ]==]
tp(parse_rchars) [==[ ab cd ef gh ]==]
tp(parse_rchars) [==[ ab cd [pp ef] gh ]==]
tp(parse_qverylongword) [==[ ab cd [pp ef] gh ]==]
tp(parse_tverylongword) [==[ ab cd [pp ef] gh ]==]
tp(parse_tlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse__qlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse_qlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse_qlongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readverylongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqlongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqverylongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
be [==[ ab[ pp cd ]ef [pq [pp gh] ij] kl ]==]
--]===]
-- Local Variables:
-- coding: raw-text-unix
-- ee-anchor-format: "«%s»"
-- End: