Replace wxRegex bindings for lua with boost::regex bindings

API is mostly unchanged other than the addition of a lot more flags.
Should be less buggy since it has an actual test suite, and generally
has a more powerful regex syntax with better support for Unicode.

The bindings are written in MoonScript. For now the compiled form is
store in the repo for convenince.
This commit is contained in:
Thomas Goyne 2013-04-09 20:46:59 -07:00
parent 44188267d6
commit c556a47509
8 changed files with 753 additions and 401 deletions

View file

@ -1,103 +1,128 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the wxRegex binding
local regex = aegisub.__init_regex()
-- Compiled regular expression type protoype
local re_proto = {}
local re_proto_mt = { __index = re_proto }
-- Convert an iterator to an array
local function to_table(...)
local arr = {}
local i = 1
for v in ... do
arr[i] = v
i = i + 1
local select_first
select_first = function(n, a, ...)
if n == 0 then
return
end
return arr
end
-- Return the first n elements from ...
local function select_first(n, a, ...)
if n == 0 then return end
return a, select_first(n - 1, ...)
end
-- Extract the flags from ..., bitwise OR them together, and move them to the
-- front of ...
local function unpack_args(...)
local n = select('#', ...)
local unpack_args
unpack_args = function(...)
local userdata_start = nil
for i = 1, n do
for i = 1, select('#', ...) do
local v = select(i, ...)
if type(v) == "userdata" then
if type(v) == 'userdata' then
userdata_start = i
break
end
end
if not userdata_start then
if not (userdata_start) then
return 0, ...
end
flags = regex.process_flags(select(userdata_start, ...))
if type(flags) == "string" then
local flags = regex.process_flags(select(userdata_start, ...))
if type(flags) == 'string' then
error(flags, 3)
end
return flags, select_first(userdata_start - 1, ...)
end
-- Verify that a valid value was passed for self
local function check_self(self)
if getmetatable(self) ~= re_proto_mt then
error("re method called with invalid self. You probably used . when : is needed.", 3)
end
end
-- Typecheck a variable and throw an error if it fails
local function check_arg(arg, expected_type, argn, func_name, level)
local check_arg
check_arg = function(arg, expected_type, argn, func_name, level)
if type(arg) ~= expected_type then
error(
string.format("Argument %d to %s should be a '%s', is '%s' (%s)",
argn, func_name, expected_type, type(arg), tostring(arg)),
level + 1)
return error("Argument " .. tostring(argn) .. " to " .. tostring(func_name) .. " should be a '" .. tostring(expected_type) .. "', is '" .. tostring(type(arg)) .. "' (" .. tostring(arg) .. ")", level + 1)
end
end
function re_proto.gsplit(self, str, skip_empty, max_split)
check_self(self)
check_arg(str, "string", 2, "gsplit", self._level)
if not max_split or max_split <= 0 then max_split = str:len() end
local function do_split()
local replace_match
replace_match = function(match, func, str, last, acc)
if last < match.last then
acc[#acc + 1] = str:sub(last, match.first - 1)
end
local repl = func(match.str, match.first, match.last)
if type(repl) == 'string' then
acc[#acc + 1] = repl
else
acc[#acc + 1] = match.str
end
return match.first, match.last + 1
end
local do_single_replace_fun
do_single_replace_fun = function(re, func, str, acc, pos)
local matches = re:match(str, pos)
if not (matches) then
return pos
end
local start
if #matches == 1 then
start = 1
else
start = 2
end
local last = pos
local first
for i = start, #matches do
first, last = replace_match(matches[i], func, str, last, acc)
end
if first == last then
acc[#acc + 1] = str:sub(last, last)
last = last + 1
end
return last, matches[1].first <= str:len()
end
local do_replace_fun
do_replace_fun = function(re, func, str, max)
local acc = { }
local pos = 1
local i
for i = 1, max do
local more
pos, more = do_single_replace_fun(re, func, str, acc, pos)
if not (more) then
max = i
break
end
end
return table.concat(acc, '') .. str:sub(pos)
end
local RegEx
do
local start
local _parent_0 = nil
local _base_0 = {
_check_self = function(self)
if not (self.__class == RegEx) then
return error('re method called with invalid self. You probably used . when : is needed.', 3)
end
end,
gsplit = function(self, str, skip_empty, max_split)
self:_check_self()
check_arg(str, 'string', 2, 'gsplit', self._level)
if not max_split or max_split <= 0 then
max_split = str:len()
end
start = 1
local prev = 1
local do_split
do_split = function()
if not str or str:len() == 0 then
return nil
return
end
if max_split == 0 or not regex.matches(self._regex, str) then
local ret = str
local first, last
if max_split > 0 then
first, last = regex.search(self._regex, str, start)
end
if not first or first > str:len() then
local ret = str:sub(prev, str:len())
str = nil
return ret
end
local first, last = regex.get_match(self._regex, str, 0)
local ret = str:sub(1, first - 1)
str = str:sub(last + 1)
local ret = str:sub(prev, first - 1)
prev = last + 1
start = 1 + (function()
if start >= last then
return start
else
return last
end
end)()
if skip_empty and ret:len() == 0 then
return do_split()
else
@ -105,197 +130,185 @@ function re_proto.gsplit(self, str, skip_empty, max_split)
return ret
end
end
return do_split
end,
split = function(self, str, skip_empty, max_split)
self:_check_self()
check_arg(str, 'string', 2, 'split', self._level)
return (function()
local _accum_0 = { }
local _len_0 = 1
for v in self:gsplit(str, skip_empty, max_split) do
_accum_0[_len_0] = v
_len_0 = _len_0 + 1
end
function re_proto.split(self, str, skip_empty, max_split)
check_self(self)
check_arg(str, "string", 2, "split", self._level)
return to_table(self:gsplit(str, skip_empty, max_split))
end
function re_proto.gfind(self, str)
check_self(self)
check_arg(str, "string", 2, "gfind", self._level)
local offset = 0
return _accum_0
end)()
end,
gfind = function(self, str)
self:_check_self()
check_arg(str, 'string', 2, 'gfind', self._level)
start = 1
return function()
local has_matches = regex.matches(self._regex, str)
if not has_matches then return end
local first, last = regex.get_match(self._regex, str, 0)
local ret = str:sub(first, last)
str = str:sub(last + 1)
last = last + offset
offset = offset + first
return ret, offset, last
local first, last = regex.search(self._regex, str, start)
if not (first) then
return
end
if last >= start then
start = last + 1
else
start = start + 1
end
function re_proto.find(self, str)
check_self(self)
check_arg(str, "string", 2, "find", self._level)
local i = 1
local ret = {}
return str:sub(first, last), first, last
end
end,
find = function(self, str)
self:_check_self()
check_arg(str, 'string', 2, 'find', self._level)
local ret = (function()
local _accum_0 = { }
local _len_0 = 1
for s, f, l in self:gfind(str) do
ret[i] = {
_accum_0[_len_0] = {
str = s,
first = f,
last = l
}
i = i + 1
_len_0 = _len_0 + 1
end
return ret
return _accum_0
end)()
return next(ret) and ret
end,
sub = function(self, str, repl, max_count)
self:_check_self()
check_arg(str, 'string', 2, 'sub', self._level)
if max_count ~= nil then
check_arg(max_count, 'number', 4, 'sub', self._level)
end
-- Replace a match with the value returned from func when passed the match
local function replace_match(match, func, str, last, acc)
if last < match.last then
acc[#acc + 1] = str:sub(last, match.first - 1)
if not max_count or max_count == 0 then
max_count = str:len() + 1
end
local ret = func(match.str, match.first, match.last)
if type(ret) == "string" then
acc[#acc + 1] = ret
if type(repl) == 'function' then
return do_replace_fun(self, repl, str, max_count)
elseif type(repl) == 'string' then
return regex.replace(self._regex, repl, str, max_count)
else
-- If it didn't return a string just leave the old value
acc[#acc + 1] = match.str
return error("Argument 2 to sub should be a string or function, is '" .. tostring(type(repl)) .. "' (" .. tostring(repl) .. ")", self._level)
end
return match.last + 1
end
-- Replace all matches from a single iteration of the regexp
local function do_single_replace_fun(re, func, str, acc)
local matches = re:match(str)
-- No more matches so just return what we have so far
if not matches then
return str
end
-- One match means no capturing groups, so pass the entire thing to
-- the replace function
if #matches == 1 then
local rest = replace_match(matches[1], func, str, 1, acc)
return str:sub(rest), true
end
-- Multiple matches means there were capture groups, so skip the first one
-- and pass the rest to the replace function
local last = 1
for i = 2, #matches do
last = replace_match(matches[i], func, str, last, acc)
end
return str:sub(last), true
end
local function do_replace_fun(re, func, str, max)
local acc = {}
local i
for i = 1, max do
str, continue = do_single_replace_fun(re, func, str, acc)
if not continue then max = i end
end
return table.concat(acc, "") .. str, max
end
function re_proto.sub(self, str, repl, count)
check_self(self)
check_arg(str, "string", 2, "sub", self._level)
if count ~= nil then
check_arg(count, "number", 4, "sub", self._level)
end
if not count or count == 0 then count = str:len() end
if type(repl) == "function" then
return do_replace_fun(self, repl, str, count)
elseif type(repl) == "string" then
return regex.replace(self._regex, repl, str, count)
end,
gmatch = function(self, str, start)
self:_check_self()
check_arg(str, 'string', 2, 'gmatch', self._level)
if start then
start = start - 1
else
error(
string.format("Argument 2 to sub should be a string or function, is '%s' (%s)",
type(repl), tostring(repl)),
self._level)
start = 0
end
end
function re_proto.gmatch(self, str)
check_self(self)
check_arg(str, "string", 2, "gmatch", self._level)
local match_count = regex.match_count(self._regex, str)
local i = 0
local match = regex.match(self._regex, str, start)
local i = 1
return function()
if i == match_count then return end
if not (match) then
return
end
local first, last = regex.get_match(match, i)
if not (first) then
return
end
i = i + 1
local first, last = regex.get_match(self._regex, str, i - 1)
return {
str = str:sub(first, last),
first = first,
last = last
str = str:sub(first + start, last + start),
first = first + start,
last = last + start
}
end
end,
match = function(self, str, start)
self:_check_self()
check_arg(str, 'string', 2, 'match', self._level)
local ret = (function()
local _accum_0 = { }
local _len_0 = 1
for v in self:gmatch(str, start) do
_accum_0[_len_0] = v
_len_0 = _len_0 + 1
end
return _accum_0
end)()
if next(ret) == nil then
return nil
end
function re_proto.match(self, str)
check_self(self)
check_arg(str, "string", 2, "match", self._level)
local ret = to_table(self:gmatch(str))
-- Return nil rather than a empty table so that if re.match(...) works
if next(ret) == nil then return end
return ret
end
-- Create a wxRegExp object from a pattern, flags, and error depth
local function real_compile(pattern, level, flags, stored_level)
local regex = regex.compile(pattern, flags)
if not regex then
error("Bad syntax in regular expression", level + 1)
}
_base_0.__index = _base_0
if _parent_0 then
setmetatable(_base_0, _parent_0.__base)
end
return setmetatable({
_regex = regex,
_level = stored_level or level + 1
},
re_proto_mt)
local _class_0 = setmetatable({
__init = function(self, _regex, _level)
self._regex, self._level = _regex, _level
end,
__base = _base_0,
__name = "RegEx",
__parent = _parent_0
}, {
__index = function(cls, name)
local val = rawget(_base_0, name)
if val == nil and _parent_0 then
return _parent_0[name]
else
return val
end
-- Compile a pattern then invoke a method on it
local function invoke(str, pattern, fn, flags, ...)
local comp = real_compile(pattern, 3, flags)
return comp[fn](comp, str, ...)
end,
__call = function(cls, ...)
local _self_0 = setmetatable({}, _base_0)
cls.__init(_self_0, ...)
return _self_0
end
-- Generate a static version of a method with arg type checking
local function gen_wrapper(impl_name)
})
_base_0.__class = _class_0
local self = _class_0
start = 1
if _parent_0 and _parent_0.__inherited then
_parent_0.__inherited(_parent_0, _class_0)
end
RegEx = _class_0
end
local real_compile
real_compile = function(pattern, level, flags, stored_level)
if pattern == '' then
error('Regular expression must not be empty', level + 1)
end
local re = regex.compile(pattern, flags)
if type(re) == 'string' then
error(regex, level + 1)
end
return RegEx(re, stored_level or level + 1)
end
local invoke
invoke = function(str, pattern, fn, flags, ...)
local compiled_regex = real_compile(pattern, 3, flags)
return compiled_regex[fn](compiled_regex, str, ...)
end
local gen_wrapper
gen_wrapper = function(impl_name)
return function(str, pattern, ...)
check_arg(str, "string", 1, impl_name, 2)
check_arg(pattern, "string", 2, impl_name, 2)
check_arg(str, 'string', 1, impl_name, 2)
check_arg(pattern, 'string', 2, impl_name, 2)
return invoke(str, pattern, impl_name, unpack_args(...))
end
end
-- And now at last the actual public API
local re = regex.init_flags(re)
function re.compile(pattern, ...)
check_arg(pattern, "string", 1, "compile", 2)
re.compile = function(pattern, ...)
check_arg(pattern, 'string', 1, 'compile', 2)
return real_compile(pattern, 2, regex.process_flags(...), 2)
end
re.split = gen_wrapper("split")
re.gsplit = gen_wrapper("gsplit")
re.find = gen_wrapper("find")
re.gfind = gen_wrapper("gfind")
re.match = gen_wrapper("match")
re.gmatch = gen_wrapper("gmatch")
re.sub = gen_wrapper("sub")
_G.re = re
return _G.re
re.split = gen_wrapper('split')
re.gsplit = gen_wrapper('gsplit')
re.find = gen_wrapper('find')
re.gfind = gen_wrapper('gfind')
re.match = gen_wrapper('match')
re.gmatch = gen_wrapper('gmatch')
re.sub = gen_wrapper('sub')
return re

View file

@ -0,0 +1,244 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the boost::regex binding
regex = aegisub.__init_regex()
-- Return the first n elements from ...
select_first = (n, a, ...) ->
if n == 0 then return
a, select_first n - 1, ...
-- Extract the flags from ..., bitwise OR them together, and move them to the
-- front of ...
unpack_args = (...) ->
userdata_start = nil
for i = 1, select '#', ...
v = select i, ...
if type(v) == 'userdata'
userdata_start = i
break
return 0, ... unless userdata_start
flags = regex.process_flags select userdata_start, ...
if type(flags) == 'string'
error(flags, 3)
flags, select_first userdata_start - 1, ...
-- Typecheck a variable and throw an error if it fails
check_arg = (arg, expected_type, argn, func_name, level) ->
if type(arg) != expected_type
error "Argument #{argn} to #{func_name} should be a '#{expected_type}', is '#{type(arg)}' (#{arg})",
level + 1
-- Replace a match with the value returned from func when passed the match
replace_match = (match, func, str, last, acc) ->
-- Copy everything between the last match and this match
if last < match.last
acc[#acc + 1] = str\sub last, match.first - 1
repl = func match.str, match.first, match.last
-- If it didn't return a string just leave the old value
acc[#acc + 1] = if type(repl) == 'string' then repl else match.str
match.first, match.last + 1
-- Replace all matches from a single iteration of the regexp
do_single_replace_fun = (re, func, str, acc, pos) ->
matches = re\match str, pos
-- No more matches so just return what's left of the input
return pos unless matches
-- If there's only one match then there's no capturing groups and we need
-- to pass the entire match to the replace function, but if there's
-- multiple then we want to skip the full match and only pass the capturing
-- groups.
start = if #matches == 1 then 1 else 2
last = pos
local first
for i = start, #matches
first, last = replace_match matches[i], func, str, last, acc
-- Always eat at least one character from the input or we'll just make the
-- same match max_count times
if first == last
acc[#acc + 1] = str\sub last, last
last += 1
return last, matches[1].first <= str\len()
do_replace_fun = (re, func, str, max) ->
acc = {}
pos = 1
local i
for i = 1, max do
pos, more = do_single_replace_fun re, func, str, acc, pos
unless more
max = i
break
table.concat(acc, '') .. str\sub pos
-- Compiled regular expression type protoype
class RegEx
-- Verify that a valid value was passed for self
_check_self: =>
unless @__class == RegEx
error 're method called with invalid self. You probably used . when : is needed.', 3
new: (@_regex, @_level) =>
start = 1
gsplit: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'gsplit', @_level
if not max_split or max_split <= 0 then max_split = str\len()
start = 1
prev = 1
do_split = () ->
if not str or str\len() == 0 then return
local first, last
if max_split > 0
first, last = regex.search @_regex, str, start
if not first or first > str\len()
ret = str\sub prev, str\len()
str = nil
return ret
ret = str\sub prev, first - 1
prev = last + 1
start = 1 + if start >= last then start else last
if skip_empty and ret\len() == 0
do_split()
else
max_split -= 1
ret
do_split
split: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'split', @_level
[v for v in @gsplit str, skip_empty, max_split]
gfind: (str) =>
@_check_self!
check_arg str, 'string', 2, 'gfind', @_level
start = 1
->
first, last = regex.search(@_regex, str, start)
return unless first
start = if last >= start then last + 1 else start + 1
str\sub(first, last), first, last
find: (str) =>
@_check_self!
check_arg str, 'string', 2, 'find', @_level
ret = [str: s, first: f, last: l for s, f, l in @gfind(str)]
next(ret) and ret
sub: (str, repl, max_count) =>
@_check_self!
check_arg str, 'string', 2, 'sub', @_level
if max_count != nil
check_arg max_count, 'number', 4, 'sub', @_level
max_count = str\len() + 1 if not max_count or max_count == 0
if type(repl) == 'function'
do_replace_fun @, repl, str, max_count
elseif type(repl) == 'string'
regex.replace @_regex, repl, str, max_count
else
error "Argument 2 to sub should be a string or function, is '#{type(repl)}' (#{repl})", @_level
gmatch: (str, start) =>
@_check_self!
check_arg str, 'string', 2, 'gmatch', @_level
start = if start then start - 1 else 0
match = regex.match @_regex, str, start
i = 1
->
return unless match
first, last = regex.get_match match, i
return unless first
i += 1
{
str: str\sub first + start, last + start
first: first + start
last: last + start
}
match: (str, start) =>
@_check_self!
check_arg(str, 'string', 2, 'match', @_level)
ret = [v for v in @gmatch str, start]
-- Return nil rather than a empty table so that if re.match(...) works
return nil if next(ret) == nil
ret
-- Create a regex object from a pattern, flags, and error depth
real_compile = (pattern, level, flags, stored_level) ->
if pattern == ''
error 'Regular expression must not be empty', level + 1
re = regex.compile pattern, flags
if type(re) == 'string'
error regex, level + 1
RegEx re, stored_level or level + 1
-- Compile a pattern then invoke a method on it
invoke = (str, pattern, fn, flags, ...) ->
compiled_regex = real_compile(pattern, 3, flags)
compiled_regex[fn](compiled_regex, str, ...)
-- Generate a static version of a method with arg type checking
gen_wrapper = (impl_name) ->
(str, pattern, ...) ->
check_arg str, 'string', 1, impl_name, 2
check_arg pattern, 'string', 2, impl_name, 2
invoke str, pattern, impl_name, unpack_args ...
-- And now at last the actual public API
re = regex.init_flags(re)
re.compile = (pattern, ...) ->
check_arg pattern, 'string', 1, 'compile', 2
real_compile pattern, 2, regex.process_flags(...), 2
re.split = gen_wrapper 'split'
re.gsplit = gen_wrapper 'gsplit'
re.find = gen_wrapper 'find'
re.gfind = gen_wrapper 'gfind'
re.match = gen_wrapper 'match'
re.gmatch = gen_wrapper 'gmatch'
re.sub = gen_wrapper 'sub'
re

View file

@ -327,6 +327,7 @@
<ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" />
<ClCompile Include="$(SrcDir)auto4_regex.cpp" />
<ClCompile Include="$(SrcDir)avisynth_wrap.cpp" />
<ClCompile Include="$(SrcDir)base_grid.cpp" />
<ClCompile Include="$(SrcDir)charset_detect.cpp" />

View file

@ -1238,6 +1238,9 @@
<ClCompile Include="$(SrcDir)subs_controller.cpp">
<Filter>ASS</Filter>
</ClCompile>
<ClCompile Include="$(SrcDir)auto4_regex.cpp">
<Filter>Automation\Lua</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="$(SrcDir)res.rc">

View file

@ -92,11 +92,11 @@ endif
# AUTOMATION
############
ifeq (yes, $(HAVE_AUTO4_LUA))
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_regex.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
LIBS += $(LIBS_LUA)
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
else
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
endif
##############

View file

@ -71,7 +71,6 @@
#include <wx/filename.h>
#include <wx/log.h>
#include <wx/msgdlg.h>
#include <wx/regex.h>
#include <wx/window.h>
namespace {
@ -111,125 +110,6 @@ namespace {
return 1;
}
inline wxRegEx *get_regex(lua_State *L)
{
return static_cast<wxRegEx*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
int regex_matches(lua_State *L)
{
lua_pushboolean(L, get_regex(L)->Matches(check_wxstring(L, 2)));
return 1;
}
int regex_match_count(lua_State *L)
{
wxRegEx *re = get_regex(L);
if (re->Matches(check_wxstring(L, 2)))
push_value(L, re->GetMatchCount());
else
push_value(L, 0);
return 1;
}
size_t utf8_len(wxString const& w)
{
#if wxUSE_UNICODE_UTF8
return w.utf8_length();
#else
return w.utf8_str().length();
#endif
}
int regex_get_match(lua_State *L)
{
wxString str(check_wxstring(L, 2));
size_t start, len;
get_regex(L)->GetMatch(&start, &len, luaL_checkinteger(L, 3));
push_value(L, utf8_len(str.Left(start)) + 1);
push_value(L, utf8_len(str.Left(start + len)));
return 2;
}
int regex_replace(lua_State *L)
{
wxString str(check_wxstring(L, 3));
int reps = get_regex(L)->Replace(&str, check_wxstring(L, 2), luaL_checkinteger(L, 4));
push_value(L, str);
push_value(L, reps);
return 2;
}
int regex_compile(lua_State *L)
{
wxString pattern(check_wxstring(L, 1));
int flags = luaL_checkinteger(L, 2);
wxRegEx *re = static_cast<wxRegEx*>(lua_newuserdata(L, sizeof(wxRegEx)));
new(re) wxRegEx(pattern, wxRE_ADVANCED | flags);
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
// return nil and handle the error in lua as it's a bit easier to
// report the actual call site from there
if (!re->IsValid()) {
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_gc(lua_State *L) {
get_regex(L)->~wxRegEx();
return 0;
}
int regex_process_flags(lua_State *L)
{
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L)
{
lua_newtable(L);
set_field(L, "ICASE", (void*)wxRE_ICASE);
set_field(L, "NOSUB", (void*)wxRE_NOSUB);
set_field(L, "NEWLINE", (void*)wxRE_NEWLINE);
return 1;
}
int regex_init(lua_State *L)
{
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "match_count", regex_match_count);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
int clipboard_get(lua_State *L)
{
std::string data = GetClipboard();
@ -274,6 +154,8 @@ namespace {
}
namespace Automation4 {
int regex_init(lua_State *L);
// LuaScript
LuaScript::LuaScript(agi::fs::path const& filename)
: Script(filename)

View file

@ -33,6 +33,7 @@ inline void push_value(lua_State *L, const char *value) { lua_pushstring(L, valu
inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); }
inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, long value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); }
inline void push_value(lua_State *L, wxString const& value) {

208
aegisub/src/auto4_regex.cpp Normal file
View file

@ -0,0 +1,208 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "config.h"
#ifdef WITH_AUTO4_LUA
#include "auto4_lua_utils.h"
#include <boost/regex/icu.hpp>
namespace {
boost::u32regex& get_regex(lua_State *L) {
return *static_cast<boost::u32regex*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
boost::smatch& get_smatch(lua_State *L) {
return *static_cast<boost::smatch*>(luaL_checkudata(L, 1, "aegisub.smatch"));
}
int regex_matches(lua_State *L) {
lua_pushboolean(L, u32regex_match(luaL_checkstring(L, 2), get_regex(L)));
return 1;
}
int regex_match(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = lua_tointeger(L, 3);
auto result = static_cast<boost::smatch*>(lua_newuserdata(L, sizeof(boost::smatch)));
new(result) boost::smatch;
luaL_getmetatable(L, "aegisub.smatch");
lua_setmetatable(L, -2);
if (!u32regex_search(str.cbegin() + start, str.cend(), *result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_get_match(lua_State *L) {
auto match = get_smatch(L);
int idx = luaL_checkinteger(L, 2) - 1;
if (static_cast<size_t>(idx) > match.size() || !match[idx].matched) {
lua_pushnil(L);
return 1;
}
push_value(L, distance(match.prefix().first, match[idx].first + 1));
push_value(L, distance(match.prefix().first, match[idx].second));
return 2;
}
int regex_search(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = luaL_checkinteger(L, 3) - 1;
boost::smatch result;
if (!u32regex_search(str.cbegin() + start, str.cend(), result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pushnil(L);
return 1;
}
push_value(L, start + result.position() + 1);
push_value(L, start + result.position() + result.length());
return 2;
}
int regex_replace(lua_State *L) {
auto re = get_regex(L);
const auto replacement = luaL_checkstring(L, 2);
const std::string str = luaL_checkstring(L, 3);
int max_count = luaL_checkinteger(L, 4);
// Can't just use regex_replace here since it can only do one or infinite replacements
auto match = boost::u32regex_iterator<std::string::const_iterator>(begin(str), end(str), re);
auto end_it = boost::u32regex_iterator<std::string::const_iterator>();
auto suffix = begin(str);
std::string ret;
auto out = back_inserter(ret);
while (match != end_it && max_count > 0) {
copy(suffix, match->prefix().second, out);
match->format(out, replacement);
suffix = match->suffix().first;
++match;
--max_count;
}
copy(suffix, end(str), out);
push_value(L, ret);
return 1;
}
int regex_compile(lua_State *L) {
std::string pattern(luaL_checkstring(L, 1));
int flags = luaL_checkinteger(L, 2);
boost::u32regex *re = static_cast<boost::u32regex*>(lua_newuserdata(L, sizeof(boost::u32regex)));
try {
new(re) boost::u32regex;
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
}
catch (std::exception const& e) {
lua_pop(L, 1);
push_value(L, e.what());
return 1;
// Do the actual triggering of the error in the Lua code as that code
// can report the original call site
}
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
return 1;
}
int regex_gc(lua_State *L) {
using boost::u32regex;
get_regex(L).~u32regex();
return 0;
}
int smatch_gc(lua_State *L) {
using boost::smatch;
get_smatch(L).~smatch();
return 0;
}
int regex_process_flags(lua_State *L) {
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L) {
lua_newtable(L);
set_field(L, "ICASE", (void*)boost::u32regex::icase);
set_field(L, "NOSUB", (void*)boost::u32regex::nosubs);
set_field(L, "COLLATE", (void*)boost::u32regex::collate);
set_field(L, "NEWLINE_ALT", (void*)boost::u32regex::newline_alt);
set_field(L, "NO_MOD_M", (void*)boost::u32regex::no_mod_m);
set_field(L, "NO_MOD_S", (void*)boost::u32regex::no_mod_s);
set_field(L, "MOD_S", (void*)boost::u32regex::mod_s);
set_field(L, "MOD_X", (void*)boost::u32regex::mod_x);
set_field(L, "NO_EMPTY_SUBEXPRESSIONS", (void*)boost::u32regex::no_empty_expressions);
return 1;
}
}
namespace Automation4 {
int regex_init(lua_State *L) {
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
if (luaL_newmetatable(L, "aegisub.smatch")) {
set_field(L, "__gc", smatch_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "search", regex_search);
set_field(L, "match", regex_match);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
}
#endif