|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
/* lrexlib.c - POSIX regular expression library */
/* can use Spencer extensions for matching NULs if available */
/* Reimplement strfind and gsub; test with Spencer's lib */
/*
* Modified by Edrx; version 2001oct15.
* <http://angg.twu.net/lua-4.0/src/libdllua/lrexlib-new.c>
* Added (on top of rtt's version):
* «.regmatch» (to "regmatch")
* Some ways to produce a Lua that includes this lib:
* (find-angg "lua-4.0/")
* (find-angg "lua-4.0/README.yada")
* (find-angg "lua-4.0/README.other")
* (find-es "lua" "lua_4.0-0.3")
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include "lua.h"
#include "lauxlib.h"
#include "../lmem.h"
static int regex(lua_State *L) {
int tag = lua_tonumber(L, -1);
size_t l;
const char *pattern;
int res;
regex_t *rp = luaM_new(L, regex_t);
lua_pop(L, 1); /* remove upvalue */
pattern = luaL_check_lstr(L, 1, &l);
#ifdef REG_BASIC
rp->re_endp = lua_strlen(L, 1);
res = regcomp(rp, pattern, REG_EXTENDED | REG_PEND);
#else
res = regcomp(rp, pattern, REG_EXTENDED);
#endif
if (res) {
size_t sz = regerror(res, rp, NULL, 0);
char *errbuf;
errbuf = luaM_newvector(L, sz, char);
regerror(res, rp, errbuf, sz);
lua_error(L, errbuf);
}
lua_pushuserdata(L, rp);
lua_settag(L, tag);
return 1;
}
/*
* (find-node "(libc)POSIX Regexp Compilation")
* (find-node "(libc)POSIX Regexp Compilation" "Function: int regcomp")
* (find-man "7 regex")
* (find-man "7 regex" "Henry Spencer's")
* (find-fline "/usr/include/regex.h")
* (find-fline "/usr/include/regexp.h")
* (find-fline "/usr/include/regex.h" "#define RE_SYNTAX_EMACS 0")
* (find-fline "/usr/include/regex.h" "extern int re_match")
* REG_BASIC seems to be a BSDism.
* (find-luatag "lua_strlen")
* (find-luatag "str_sub")
* (find-luanode "Calling Lua Functions")
* (find-luafile "README.rttpatch" "match(s, r)")
*
* «regmatch» (to ".regmatch")
* regmatch(re, subj, [offset]) -> start, matchedstuff, match1, match2...
* Inputs:
* re: a compiled regexp. Example: re = regex("^([^\n]*)(\n?)")
* subj: the string to be searched.
* offset: where to start the search; 0 means from the beginning of
* the string. If nil, act as if was 0.
* Outputs:
* If the search failed, then none. If it succeeded, then:
* start: where the match started (in # of chars after offset)
* matchedstuff: a string with the portion matched.
* match1: contents of the first ()
* match2: contents of the second ()
* etc.
*/
static int regmatch(lua_State *L) { /* edrx; experimental (but working ok) */
int re_tag = lua_tonumber(L, -1); /* take it from the upvalue */
regex_t *re; const char *subj;
size_t subj_len, subj_offset;
int result;
int nmatch, i;
regmatch_t *pmatch;
/* pop the upvalue: */
lua_pop(L, 1);
/* Get the arguments: */
luaL_checkany(L, 1);
re = (regex_t *)lua_touserdata(L, 1);
if (lua_tag(L, 1) != re_tag)
luaL_argerror(L, 1, "regular expression expected");
subj = luaL_check_lstr(L, 2, &subj_len);
subj_offset = luaL_opt_long(L, 3, 0); /* be careful, we don't check it */
subj += subj_offset; /* ugly hack */
/* Don't run if there isn't enough space in the Lua stack: */
nmatch = re->re_nsub;
luaL_checkstack(L, nmatch + 2, "too many captures");
/* (find-node "(libc)Variable Size Automatic")
* Oops, alloca is not ANSI, reverting to malloc (note the "free"s below)
*/
pmatch = malloc(sizeof(regmatch_t)*(nmatch + 2));
/* (find-man "regexec")
* (find-node "(libc)Matching POSIX Regexps" "int regexec")
*/
result = regexec(re, subj, nmatch + 1, pmatch, 0);
if (result) {
free(pmatch);
return 0; /* failed */
}
/* return the offset of the match, the full string matched, and the subs */
lua_pushnumber(L, pmatch[0].rm_so);
for (i = 0; i <= nmatch; i++)
lua_pushlstring(L, subj + pmatch[i].rm_so,
pmatch[i].rm_eo - pmatch[i].rm_so);
free(pmatch);
return nmatch + 2;
}
static int match(lua_State *L) {
int res, tag = lua_tonumber(L, -1);
size_t l, nmatch, i;
const char *text;
#ifdef REG_BASIC
size_t len;
#endif
regex_t *rp;
regmatch_t *match;
lua_pop(L, 1); /* pop upvalue */
text = luaL_check_lstr(L, 1, &l);
#ifdef REG_BASIC
len = lua_strlen(L, 1);
#endif
luaL_checkany(L, 2);
rp = (regex_t *)lua_touserdata(L, 2);
if (lua_tag(L, 2) != tag)
luaL_argerror(L, 2, "regular expression expected");
nmatch = rp->re_nsub;
luaL_checkstack(L, nmatch + 2, "too many captures");
match = luaM_newvector(L, nmatch + 1, regmatch_t);
#ifdef REG_BASIC
match[0].rm_so = 0;
match[0].rm_eo = len;
res = regexec(rp, text, nmatch + 1, match, REG_STARTEND);
#else
res = regexec(rp, text, nmatch + 1, match, 0);
#endif
if (!res) {
lua_pushnumber(L, match[0].rm_so + 1);
lua_pushnumber(L, match[0].rm_eo);
lua_newtable(L);
for (i = 1; i <= nmatch; i++) {
if (match[i].rm_so >= 0) {
lua_pushlstring(L, text + match[i].rm_so,
match[i].rm_eo - match[i].rm_so);
lua_rawseti(L, -2, i);
}
}
lua_pushstring(L, "n");
lua_pushnumber(L, nmatch);
lua_rawset(L, -3);
} else {
lua_pushnil(L);
lua_pushnil(L);
lua_pushnil(L);
}
return 3;
}
static int rex_collect (lua_State *L) {
regex_t *rp = (regex_t *)lua_touserdata(L, -1);
regfree(rp);
free(rp);
return 0;
}
static const struct luaL_reg rexlib[] = {
{"regex", regex},
{"regmatch", regmatch}, /* edrx */
{"match", match},
};
LUALIB_API void lua_rexlibopen(lua_State *L) {
unsigned int i;
int tag = lua_newtag(L);
lua_pushnumber(L, tag);
for (i=0; i<sizeof(rexlib)/sizeof(rexlib[0]); i++) {
/* put tag as upvalue for these functions */
lua_pushvalue(L, -1);
lua_pushcclosure(L, rexlib[i].func, 1);
lua_setglobal(L, rexlib[i].name);
}
/* free compiled regexps when collected */
lua_pushcclosure(L, rex_collect, 1); /* pops tag from stack */
lua_settagmethod(L, tag, "gc");
}