|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- Taken from:
-- (find-dn5 "gab.lua" "old-parser")
--
-- _ __ __ _ _ __ ___ ___
-- | '_ \ / _` | '__/ __|/ _ \
-- | |_) | (_| | | \__ \ __/
-- | .__/ \__,_|_| |___/\___|
-- |_|
-- «precedence-table» (to ".precedence-table")
-- «precedence» (to ".precedence")
-- (find-dn5 "gab-tests.lua" "parse-2")
-- (find-dn5 "gab-tests.lua" "parse-3")
ops = {}
binop = function (op, top, l, r)
ops[op] = {kind="binop", top=top, l=l, r=r}
end
prefix = function (op, top, r)
ops[op] = {kind="prefix", top=top, r=r}
end
binop("*", 8, 7, 8)
binop("/", 8, 7, 8)
binop("+", 7, 6, 7)
binop("-", 7, 6, 7)
binop("==", 6, 6, 6)
binop("<=", 6, 6, 6)
binop(">=", 6, 6, 6)
binop("<", 6, 6, 6)
binop(">", 6, 6, 6)
binop("!=", 6, 6, 6)
binop("&", 4, 3, 4)
binop("|", 3, 2, 3)
binop("->", 2, 2, 2)
prefix("u-", 9, 100)
prefix("nt", 5, 4)
binop("in", 6, 6, 6)
binop("<-", 6, 6, 6)
-- «recursive-descent» (to ".recursive-descent")
subj = split "2 + 3 * 4 + 5 * 6 * 7 (eof)"
pos = 1
parse = function (str, b)
subj = split(str) -- token are separated by spaces
pos = 1
return pa_expr(b)
end
pparse = function (str, b) -- print the result of a parse
local e = parse(str, b)
print(e:tolisp())
print(e:torect())
return e
end
pparsee = function (str, b) pparse(str):peval() end
-- Abbreviations:
-- "la" = "looking at"
-- "pa" = "parse"
-- "wos" = "without suffixes"
-- «parser-grammar» (to ".parser-grammar")
-- We use a very simple recursive parser with one token of lookahead.
-- It operates on the array "subj", that at the moment is just an
-- array of strings obtained by running "split" on a given string -
-- to avoid lexing. Our grammar is:
--
-- parenexpr ::= ( expr )
-- setexpr ::= { }
-- | { expr commaexpr^* }
-- | { expr | expr commaexpr^* }
-- qlexpr ::= Fa expr . expr
-- | Ex expr . expr
-- | \\ expr . expr
-- exprwos ::= parenexpr
-- | setexpr
-- | qlexpr
-- | prefixop expr
-- | number
-- | var
-- complement1 ::= suffixop
-- | parenexpr
-- | binop expr
-- expr ::= exprwos complement1^*
--
-- Note that each alternative starts with a different token - we use
-- the "la_*" functions to choose which alternative to follow.
perror = function (str) error("(la = `"..la().."') "..str, 2) end
la = function () return subj[pos] end
eat = function () pos = pos+1; return subj[pos-1] end
pa_forced = function (token)
if la() ~= token then perror("Expected `"..token.."'") end
return eat()
end
under = function (b, op) return (not b) or b <= ops[op].top end
la_op = function (k) return ops[la()] and ops[la()].kind == k end
la_op_under = function (k, b) return la_op(k) and under(b, la()) end
la_binop = function (b) return la_op_under("binop", b) end
la_prefix = function (b) return la_op_under("prefix", b) end
la_suffix = function (b) return la_op_under("suffix", b) end
--
la_number = function () return (la():match"^[0-9]+$") end
pa_number = function () return Num(eat()+0) end
la_var = function () return (la():match"^[A-Za-z]+$") end
pa_var = function () return Var(eat()) end
la_paren = function () return la() == "(" end
pa_parenclose = function () return pa_forced(")") end
pa_parenexpr = function ()
local o, e, c = eat(), pa_expr(), pa_parenclose()
return e
end
pa_parenexpr = function ()
local o, e, es, c = eat(), pa_expr(), pa_commaexprs(), pa_parenclose()
if #es == 0 then return e else return Tuple(e, unpack(es)) end
end
la_expr = function ()
return la_number() or la_var() or la_paren() or la_set() or la_prefix()
end
pa_exprwos = function ()
-- PP("pa_exprwos with pos = "..pos)
if la_paren() then return pa_parenexpr()
elseif la_set() then return pa_set()
elseif la_ql() then return pa_qlexpr()
elseif la_prefix() then
local op = eat()
local e = pa_expr(ops[op].r)
return Expr {[0]=op, e}
elseif la_number() then return pa_number()
elseif la_var() then return pa_var()
end
perror("Not an expr (in exprwos)")
end
la_complement = function (b)
return la_suffix(b) or la_paren() or la_binop(b)
end
pa_expr = function (b)
-- PP("pa_expr with pos = "..pos)
local e = pa_exprwos()
while la_complement(b) do
if la_suffix(b) then e = Expr {[0]=eat(), e}
elseif la_paren() then e = App(e, pa_parenexpr())
elseif la_binop(b) then
local op = eat()
local re = pa_expr(ops[op].r)
e = Expr {[0]=op, e, re}
else perror("Not an expr")
end
end
return e
end
-- (find-angg "LUA/lua50init.lua" "pack-and-unpack")
la_set = function () return la() == "{" end
pa_setclose = function () return pa_forced("}") end
pa_commaexprs = function ()
local es = {}
while la() == "," do eat(); table.insert(es, pa_expr()) end
return es
end
pa_set = function ()
eat()
if la() == "}" then eat(); return Set()
elseif la_expr() then
local e = pa_expr(4)
if la() == "|" then
local _, er, ers, _ = eat(), pa_expr(), pa_commaexprs(), pa_setclose()
if e[0] == "<-" then
local s = Subset(e, er, unpack(ers))
table.insert(s, e[1])
return s
else
local s = Setof(er, unpack(ers))
table.insert(s, e)
return s
end
else local es, _ = pa_commaexprs(), pa_setclose()
return Set(e, unpack(es))
end
end
perror("Not a set")
end
la_ql = function () return la() == "Fa" or la() == "Ex" or la() == "\\" end
pa_dot = function () return pa_forced(".") end
pa_qlexpr = function ()
local ql, vare, dot, bodye = eat(), pa_expr(), pa_dot(), pa_expr()
if ql == "\\" then return Lambda(vare, bodye) end
if vare[0] == "<-" or vare[0] == "in" then
return Expr {[0]=ql, vare[1], vare[2], bodye}
else
perror("varexpr must be `_ <- _' or `_ in _'")
end
end
-- Local Variables:
-- coding: raw-text-unix
-- ee-anchor-format: "«%s»"
-- End: