|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- Scratch code that is not being used but that I do not want to
-- delete (yet).
--- Low-level parsing functions for blogme4 (for parsing arglists).
-- To explain these functions I need to explain what they are designed
-- to parse, and introduce some terminology. Here it goes.
--
-- The Language
-- ============
-- In the blogme "language" we have only three character classes:
-- "brackets" ('[' and ']'),
-- "space chars" (SPC, TAB, CR, NL, maybe FF), and
-- "word char" (everything else).
-- The "regular chars" are all those that are not brackets; i.e.,
-- the word chars plus the space chars.
-- A "block" is a sequence of chars that matches the lua pattern
-- "%b[]".
-- A "word" is a sequence of one or more word chars.
-- A "spaces" is a sequence of one or more space chars.
-- A "regulars" is a sequence of one or more regular chars.
-- A "long word" is a sequence of one or more (word | block)s.
-- A "very long word" is a sequence of one or more (regulars | block)s.
--
-- Evaluation
-- ==========
-- Lets's start with an example.
-- If we define the blogme word HREF as this,
-- def [[ HREF 2 url,body "<a href=\"$url\">$body</a>" ]]
-- then evaluating this
-- [HREF http://foo/ bar plic]
-- calls
-- HREF("http://foo/", "bar plic"),
-- which returns:
-- '<a href="http://foo/">bar plic</a>'.
-- What matters to us now is that the "2" in "def [[ HREF 2 ... ]]"
-- says that "HREF", as a blogme word, is a function of two arguments,
-- parsed in the default way; this means that to build an arglist from
-- " http://foo/ bar plic"
-- we first skip spaces, then we parse a long word, the skip spaces
-- again, then we parse a "very long word" - because the default is
-- that the last argument is always a very long word.
--
-- Here is a more realistic example. Evaluating
-- [HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]
-- calls, after all the "+"s and "*"s, this:
-- HREF("http://3/", "bar 21100plic")
-- That is, apparently
-- [HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]
-- gets replaced by
-- [HREF http://3/ bar [* 3 7]100plic]
-- and that by:
-- [HREF http://3/ bar 21100plic]
-- More precisely: to obtain the "result" of parsing a long word, like
-- "http://[+ 1 2]/"
-- or a very long word, like
-- "bar [* [+ 1 2] [+ 3 4]][* 10 10]plic"
-- we leave the "words" and the "regulars" in them unchanged, we
-- evaluate the blocks in them, and we concatenate all the results...
--
-- Doing that fully involves a nasty recursion, and the best way that
-- I have found to factor the complexity out is parse "tlongwords"
-- and "tverylongwords". The result of parsing this:
-- "http://[+ 1 2]/"
-- ^ ^ ^
-- 1 8 14
-- as a tlongword is:
-- {"http://", {8, 14}, "/"}
-- The "literal" parts that do not require evaluation are stored as
-- strings, the other ones are stored as pairs - the position of the
-- '[' and the position after the corresponding ']'.
-- See: (find-angg "LUA/canvas2.lua" "Class")
-- (find-blogme3file "definers.lua")
-- require "eoo" -- (find-blogme4 "eoo.lua")
-- (find-blogme3 "brackets.lua" "parsers")
-- (find-blogme4 "brackets.lua")
-- (find-es "lua5" "lpeg-quickref")
-- (find-luamanualw3m "#5.4.1" "Patterns")
-- (find-es "lua5" "lpeg-quickref")
-- (find-luamanualw3m "#5.4.1" "Patterns")
require "brackets" -- (find-blogme4 "eoo.lua")
printbracketstructure(str)
PP(bracketstructure(str))
-- (find-angg "LUA/canvas2.lua")
string.replace = function (str, other, pos)
local left = str:sub(1, pos - 1)
if #left < pos - 1 then left = left .. (" "):rep(pos - #left - 1) end
local right = str:sub(pos + #other)
return left .. other .. right
end
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
userocks()
loadlpeg()
lpeg.pattern = function (pat)
return function (subj, pos) return subj:match(pat, pos) end
end
BracketChar = lpeg.S "[]"
SpaceChar = lpeg.S " \t\n\r"
WordChar = 1 - BracketChar - SpaceChar
RegularChar = 1 - BracketChar
Spaces = SpaceChar ^ 1
OptSpaces = SpaceChar ^ 0
Word = WordChar ^ 1
RegularChars = RegularChar ^ 1
Block = lpeg.pattern "^%b[]()"
LongWord = (Block + Word) ^ 1
VeryLongWord = (Block + RegularChars) ^ 1
-- Local Variables:
-- coding: raw-text-unix
-- ee-anchor-format: "«%s»"
-- End: