|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- gab-lpeg.lua: an explanation for the LPeg tricks used in gab.lua.
-- Latest version of this file:
-- http://angg.twu.net/gab/gab-lpeg.lua.html
-- (find-gab "gab-lpeg.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Varsion: 2012may15
-- Public domain.
--
-- I sent the code below to lua-l in 2012may14. See:
-- http://article.gmane.org/gmane.comp.lang.lua.general/91233
-- See also:
-- http://angg.twu.net/gab/README.html
--
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
loadlpeg()
--]==]
require "lpeg"
-- A nicer syntax for local variables in LPeg patterns.
set = function (name, o)
if getmetatable(o) == getmetatable(lpeg.P"") -- if o is a pattern
then return o:Cg(name) -- then name := eval(o)
else return lpeg.Cc(o):Cg(name) -- else name := o (constant)
end
end
get = function (name) return lpeg.Cb(name) end
group = function (cmds) return lpeg.Cg(cmds) end
-- A trivial example:
patt = set("x", "2") *
get("x") *
group(set("x", "3") *
get("x")
) *
get("x")
print(patt:match"") --> 2 3 2
-- A bigger example:
-- binary operators, with precedence.
--
-- Suppose that E is an expression in tree form - for example, this:
--
-- +
-- / \
-- E = * <
-- / \ / \
-- 2 3 4 5
--
-- Then to find out how to print E in infix notation we need to
-- determine, for each of its "wires", whether we need or not to add
-- parentheses around the subexpression under it. The best way to do
-- that is to associate to each operator three numbers, that we will
-- call "top", "left" and "right" (op.t, op.l, or.r). We get his:
-- ___
-- /19 \
-- / + \
-- |_18_20_|
-- / \()
-- ___ ___
-- /21 \ /17 \
-- / * \ / < \
-- |_20_22_| |_18_18_|
-- / \ / \
-- 2 3 4 5
--
-- The wires that need parentheses are the ones in which the number
-- above is higher than the number below, and we get:
--
-- 2 * 3 + (4 < 5)
--
-- This idea can be easily adapted to parsing. After reading the "*"
-- we need to parse its second argument - the longest "expression
-- under the `*'" possible. Let's look at a bigger example:
--
-- &______________.
-- | |
-- <=_. >__.
-- | | | |
-- 2 +__. 7 8
-- /------------------------\ | |
-- /----------------\ 3 *_____.
-- /-----------\ | |
-- /-------\ *__. 6
-- /---\ /---\ | |
-- 2 <= 3 + 4 * 5 * 6 & 7 > 8 4 5
--
-- The second argument for the `+' in that expression is the longest
-- "expression under the `+'" possible, starting at the "4". In the
-- grammar below, this will be an "expression without complements" -
-- just the "4" - followed by two "complements", "* 5" and "* 6",
-- which we will be added to the "4" using lpeg.Cf:
--
-- http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-f
--
-- Note: I took the idea of op.t, op.l and op.r from Andrej Bauer's
-- PLZoo (but he uses that only for printing, not for parsing)...
-- See:
--
-- http://andrej.com/plzoo/
-- http://andrej.com/plzoo/html/calc.html
-- http://www.gnu.org/software/bison/manual/html_node/Infix-Calc.html
ops = {}
newbinop = function (l, t, r, str)
for op in str:gmatch("[^ ]+") do ops[op] = {l=l, t=t, r=r} end
end
newbinop(10, 11, 12, "->")
newbinop(12, 13, 14, "or")
newbinop(14, 15, 16, "&")
newbinop(18, 17, 18, "== <= >= < > != <-")
newbinop(18, 19, 20, "+ -")
newbinop(20, 21, 22, "* /")
pack_bin = function (e1, op, e2) return "("..op.." "..e1.." "..e2..")" end
apply_all = function (e, T) return T[#T](e, unpack(T, 1, #T-1)) end
above = function (lop, rop) return lop == "" or ops[lop].r < ops[rop].t end
is_binop = function (subj, pos, c) if ops[c] then return true, c end end
is_under = function (subj, pos, lop, op)
if above(lop, op) then return true, op end
end
P = lpeg.P
V = lpeg.V
Cc = lpeg.Cc
sp = lpeg.S" \t\n"^0
sP = function (str) return sp * P(str) end
Expr_grammar = {
"Expr", -- initial rule name
Num = sp * (lpeg.R"09"^1):C(),
Parenexpr = sP"(" * V"Expr" * sP")",
--
Binop_any = sp * ((lpeg.P(2):Cmt(is_binop) +
lpeg.P(1):Cmt(is_binop))),
Binop_under = (get"lop" * V"Binop_any"):Cmt(is_under),
Binop = set("lop", V"Binop_under") * get("lop"),
--
Complement = group(V"Binop" * V"Expr_under" * Cc(pack_bin)):Ct(),
--
Expr_woc = V"Num" + V"Parenexpr",
Expr_under = (V"Expr_woc" * V"Complement"^0):Cf(apply_all),
Expr = group(set("lop", "") * V"Expr_under"),
}
Expr_pattern = lpeg.P(Expr_grammar)
print(Expr_pattern:match"2 <= 3 + 4 * 5 * 6 & 7 > 8")
--> (& (<= 2 (+ 3 (* (* 4 5) 6))) (> 7 8))
-- Local Variables:
-- coding: raw-text-unix
-- ee-anchor-format: "«%s»"
-- End: