Replace wxRegex bindings for lua with boost::regex bindings

API is mostly unchanged other than the addition of a lot more flags.
Should be less buggy since it has an actual test suite, and generally
has a more powerful regex syntax with better support for Unicode.

The bindings are written in MoonScript. For now the compiled form is
store in the repo for convenince.
This commit is contained in:
Thomas Goyne 2013-04-09 20:46:59 -07:00
parent 44188267d6
commit c556a47509
8 changed files with 753 additions and 401 deletions

View file

@ -1,103 +1,128 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the wxRegex binding
local regex = aegisub.__init_regex() local regex = aegisub.__init_regex()
local select_first
-- Compiled regular expression type protoype select_first = function(n, a, ...)
local re_proto = {} if n == 0 then
local re_proto_mt = { __index = re_proto } return
-- Convert an iterator to an array
local function to_table(...)
local arr = {}
local i = 1
for v in ... do
arr[i] = v
i = i + 1
end end
return arr
end
-- Return the first n elements from ...
local function select_first(n, a, ...)
if n == 0 then return end
return a, select_first(n - 1, ...) return a, select_first(n - 1, ...)
end end
local unpack_args
-- Extract the flags from ..., bitwise OR them together, and move them to the unpack_args = function(...)
-- front of ...
local function unpack_args(...)
local n = select('#', ...)
local userdata_start = nil local userdata_start = nil
for i = 1, n do for i = 1, select('#', ...) do
local v = select(i, ...) local v = select(i, ...)
if type(v) == "userdata" then if type(v) == 'userdata' then
userdata_start = i userdata_start = i
break break
end end
end end
if not (userdata_start) then
if not userdata_start then
return 0, ... return 0, ...
end end
local flags = regex.process_flags(select(userdata_start, ...))
flags = regex.process_flags(select(userdata_start, ...)) if type(flags) == 'string' then
if type(flags) == "string" then
error(flags, 3) error(flags, 3)
end end
return flags, select_first(userdata_start - 1, ...) return flags, select_first(userdata_start - 1, ...)
end end
local check_arg
-- Verify that a valid value was passed for self check_arg = function(arg, expected_type, argn, func_name, level)
local function check_self(self)
if getmetatable(self) ~= re_proto_mt then
error("re method called with invalid self. You probably used . when : is needed.", 3)
end
end
-- Typecheck a variable and throw an error if it fails
local function check_arg(arg, expected_type, argn, func_name, level)
if type(arg) ~= expected_type then if type(arg) ~= expected_type then
error( return error("Argument " .. tostring(argn) .. " to " .. tostring(func_name) .. " should be a '" .. tostring(expected_type) .. "', is '" .. tostring(type(arg)) .. "' (" .. tostring(arg) .. ")", level + 1)
string.format("Argument %d to %s should be a '%s', is '%s' (%s)",
argn, func_name, expected_type, type(arg), tostring(arg)),
level + 1)
end end
end end
local replace_match
function re_proto.gsplit(self, str, skip_empty, max_split) replace_match = function(match, func, str, last, acc)
check_self(self) if last < match.last then
check_arg(str, "string", 2, "gsplit", self._level) acc[#acc + 1] = str:sub(last, match.first - 1)
if not max_split or max_split <= 0 then max_split = str:len() end end
local repl = func(match.str, match.first, match.last)
local function do_split() if type(repl) == 'string' then
acc[#acc + 1] = repl
else
acc[#acc + 1] = match.str
end
return match.first, match.last + 1
end
local do_single_replace_fun
do_single_replace_fun = function(re, func, str, acc, pos)
local matches = re:match(str, pos)
if not (matches) then
return pos
end
local start
if #matches == 1 then
start = 1
else
start = 2
end
local last = pos
local first
for i = start, #matches do
first, last = replace_match(matches[i], func, str, last, acc)
end
if first == last then
acc[#acc + 1] = str:sub(last, last)
last = last + 1
end
return last, matches[1].first <= str:len()
end
local do_replace_fun
do_replace_fun = function(re, func, str, max)
local acc = { }
local pos = 1
local i
for i = 1, max do
local more
pos, more = do_single_replace_fun(re, func, str, acc, pos)
if not (more) then
max = i
break
end
end
return table.concat(acc, '') .. str:sub(pos)
end
local RegEx
do
local start
local _parent_0 = nil
local _base_0 = {
_check_self = function(self)
if not (self.__class == RegEx) then
return error('re method called with invalid self. You probably used . when : is needed.', 3)
end
end,
gsplit = function(self, str, skip_empty, max_split)
self:_check_self()
check_arg(str, 'string', 2, 'gsplit', self._level)
if not max_split or max_split <= 0 then
max_split = str:len()
end
start = 1
local prev = 1
local do_split
do_split = function()
if not str or str:len() == 0 then if not str or str:len() == 0 then
return nil return
end end
local first, last
if max_split == 0 or not regex.matches(self._regex, str) then if max_split > 0 then
local ret = str first, last = regex.search(self._regex, str, start)
end
if not first or first > str:len() then
local ret = str:sub(prev, str:len())
str = nil str = nil
return ret return ret
end end
local ret = str:sub(prev, first - 1)
local first, last = regex.get_match(self._regex, str, 0) prev = last + 1
local ret = str:sub(1, first - 1) start = 1 + (function()
str = str:sub(last + 1) if start >= last then
return start
else
return last
end
end)()
if skip_empty and ret:len() == 0 then if skip_empty and ret:len() == 0 then
return do_split() return do_split()
else else
@ -105,197 +130,185 @@ function re_proto.gsplit(self, str, skip_empty, max_split)
return ret return ret
end end
end end
return do_split return do_split
end,
split = function(self, str, skip_empty, max_split)
self:_check_self()
check_arg(str, 'string', 2, 'split', self._level)
return (function()
local _accum_0 = { }
local _len_0 = 1
for v in self:gsplit(str, skip_empty, max_split) do
_accum_0[_len_0] = v
_len_0 = _len_0 + 1
end end
return _accum_0
function re_proto.split(self, str, skip_empty, max_split) end)()
check_self(self) end,
check_arg(str, "string", 2, "split", self._level) gfind = function(self, str)
return to_table(self:gsplit(str, skip_empty, max_split)) self:_check_self()
end check_arg(str, 'string', 2, 'gfind', self._level)
start = 1
function re_proto.gfind(self, str)
check_self(self)
check_arg(str, "string", 2, "gfind", self._level)
local offset = 0
return function() return function()
local has_matches = regex.matches(self._regex, str) local first, last = regex.search(self._regex, str, start)
if not has_matches then return end if not (first) then
return
local first, last = regex.get_match(self._regex, str, 0)
local ret = str:sub(first, last)
str = str:sub(last + 1)
last = last + offset
offset = offset + first
return ret, offset, last
end end
if last >= start then
start = last + 1
else
start = start + 1
end end
return str:sub(first, last), first, last
function re_proto.find(self, str) end
check_self(self) end,
check_arg(str, "string", 2, "find", self._level) find = function(self, str)
self:_check_self()
local i = 1 check_arg(str, 'string', 2, 'find', self._level)
local ret = {} local ret = (function()
local _accum_0 = { }
local _len_0 = 1
for s, f, l in self:gfind(str) do for s, f, l in self:gfind(str) do
ret[i] = { _accum_0[_len_0] = {
str = s, str = s,
first = f, first = f,
last = l last = l
} }
i = i + 1 _len_0 = _len_0 + 1
end end
return ret return _accum_0
end)()
return next(ret) and ret
end,
sub = function(self, str, repl, max_count)
self:_check_self()
check_arg(str, 'string', 2, 'sub', self._level)
if max_count ~= nil then
check_arg(max_count, 'number', 4, 'sub', self._level)
end end
if not max_count or max_count == 0 then
-- Replace a match with the value returned from func when passed the match max_count = str:len() + 1
local function replace_match(match, func, str, last, acc)
if last < match.last then
acc[#acc + 1] = str:sub(last, match.first - 1)
end end
if type(repl) == 'function' then
local ret = func(match.str, match.first, match.last) return do_replace_fun(self, repl, str, max_count)
if type(ret) == "string" then elseif type(repl) == 'string' then
acc[#acc + 1] = ret return regex.replace(self._regex, repl, str, max_count)
else else
-- If it didn't return a string just leave the old value return error("Argument 2 to sub should be a string or function, is '" .. tostring(type(repl)) .. "' (" .. tostring(repl) .. ")", self._level)
acc[#acc + 1] = match.str
end end
end,
return match.last + 1 gmatch = function(self, str, start)
end self:_check_self()
check_arg(str, 'string', 2, 'gmatch', self._level)
-- Replace all matches from a single iteration of the regexp if start then
local function do_single_replace_fun(re, func, str, acc) start = start - 1
local matches = re:match(str)
-- No more matches so just return what we have so far
if not matches then
return str
end
-- One match means no capturing groups, so pass the entire thing to
-- the replace function
if #matches == 1 then
local rest = replace_match(matches[1], func, str, 1, acc)
return str:sub(rest), true
end
-- Multiple matches means there were capture groups, so skip the first one
-- and pass the rest to the replace function
local last = 1
for i = 2, #matches do
last = replace_match(matches[i], func, str, last, acc)
end
return str:sub(last), true
end
local function do_replace_fun(re, func, str, max)
local acc = {}
local i
for i = 1, max do
str, continue = do_single_replace_fun(re, func, str, acc)
if not continue then max = i end
end
return table.concat(acc, "") .. str, max
end
function re_proto.sub(self, str, repl, count)
check_self(self)
check_arg(str, "string", 2, "sub", self._level)
if count ~= nil then
check_arg(count, "number", 4, "sub", self._level)
end
if not count or count == 0 then count = str:len() end
if type(repl) == "function" then
return do_replace_fun(self, repl, str, count)
elseif type(repl) == "string" then
return regex.replace(self._regex, repl, str, count)
else else
error( start = 0
string.format("Argument 2 to sub should be a string or function, is '%s' (%s)",
type(repl), tostring(repl)),
self._level)
end end
end local match = regex.match(self._regex, str, start)
local i = 1
function re_proto.gmatch(self, str)
check_self(self)
check_arg(str, "string", 2, "gmatch", self._level)
local match_count = regex.match_count(self._regex, str)
local i = 0
return function() return function()
if i == match_count then return end if not (match) then
return
end
local first, last = regex.get_match(match, i)
if not (first) then
return
end
i = i + 1 i = i + 1
local first, last = regex.get_match(self._regex, str, i - 1)
return { return {
str = str:sub(first, last), str = str:sub(first + start, last + start),
first = first, first = first + start,
last = last last = last + start
} }
end end
end,
match = function(self, str, start)
self:_check_self()
check_arg(str, 'string', 2, 'match', self._level)
local ret = (function()
local _accum_0 = { }
local _len_0 = 1
for v in self:gmatch(str, start) do
_accum_0[_len_0] = v
_len_0 = _len_0 + 1
end
return _accum_0
end)()
if next(ret) == nil then
return nil
end end
function re_proto.match(self, str)
check_self(self)
check_arg(str, "string", 2, "match", self._level)
local ret = to_table(self:gmatch(str))
-- Return nil rather than a empty table so that if re.match(...) works
if next(ret) == nil then return end
return ret return ret
end end
}
-- Create a wxRegExp object from a pattern, flags, and error depth _base_0.__index = _base_0
local function real_compile(pattern, level, flags, stored_level) if _parent_0 then
local regex = regex.compile(pattern, flags) setmetatable(_base_0, _parent_0.__base)
if not regex then
error("Bad syntax in regular expression", level + 1)
end end
return setmetatable({ local _class_0 = setmetatable({
_regex = regex, __init = function(self, _regex, _level)
_level = stored_level or level + 1 self._regex, self._level = _regex, _level
}, end,
re_proto_mt) __base = _base_0,
__name = "RegEx",
__parent = _parent_0
}, {
__index = function(cls, name)
local val = rawget(_base_0, name)
if val == nil and _parent_0 then
return _parent_0[name]
else
return val
end end
end,
-- Compile a pattern then invoke a method on it __call = function(cls, ...)
local function invoke(str, pattern, fn, flags, ...) local _self_0 = setmetatable({}, _base_0)
local comp = real_compile(pattern, 3, flags) cls.__init(_self_0, ...)
return comp[fn](comp, str, ...) return _self_0
end end
})
-- Generate a static version of a method with arg type checking _base_0.__class = _class_0
local function gen_wrapper(impl_name) local self = _class_0
start = 1
if _parent_0 and _parent_0.__inherited then
_parent_0.__inherited(_parent_0, _class_0)
end
RegEx = _class_0
end
local real_compile
real_compile = function(pattern, level, flags, stored_level)
if pattern == '' then
error('Regular expression must not be empty', level + 1)
end
local re = regex.compile(pattern, flags)
if type(re) == 'string' then
error(regex, level + 1)
end
return RegEx(re, stored_level or level + 1)
end
local invoke
invoke = function(str, pattern, fn, flags, ...)
local compiled_regex = real_compile(pattern, 3, flags)
return compiled_regex[fn](compiled_regex, str, ...)
end
local gen_wrapper
gen_wrapper = function(impl_name)
return function(str, pattern, ...) return function(str, pattern, ...)
check_arg(str, "string", 1, impl_name, 2) check_arg(str, 'string', 1, impl_name, 2)
check_arg(pattern, "string", 2, impl_name, 2) check_arg(pattern, 'string', 2, impl_name, 2)
return invoke(str, pattern, impl_name, unpack_args(...)) return invoke(str, pattern, impl_name, unpack_args(...))
end end
end end
-- And now at last the actual public API
local re = regex.init_flags(re) local re = regex.init_flags(re)
re.compile = function(pattern, ...)
function re.compile(pattern, ...) check_arg(pattern, 'string', 1, 'compile', 2)
check_arg(pattern, "string", 1, "compile", 2)
return real_compile(pattern, 2, regex.process_flags(...), 2) return real_compile(pattern, 2, regex.process_flags(...), 2)
end end
re.split = gen_wrapper('split')
re.split = gen_wrapper("split") re.gsplit = gen_wrapper('gsplit')
re.gsplit = gen_wrapper("gsplit") re.find = gen_wrapper('find')
re.find = gen_wrapper("find") re.gfind = gen_wrapper('gfind')
re.gfind = gen_wrapper("gfind") re.match = gen_wrapper('match')
re.match = gen_wrapper("match") re.gmatch = gen_wrapper('gmatch')
re.gmatch = gen_wrapper("gmatch") re.sub = gen_wrapper('sub')
re.sub = gen_wrapper("sub") return re
_G.re = re
return _G.re

View file

@ -0,0 +1,244 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the boost::regex binding
regex = aegisub.__init_regex()
-- Return the first n elements from ...
select_first = (n, a, ...) ->
if n == 0 then return
a, select_first n - 1, ...
-- Extract the flags from ..., bitwise OR them together, and move them to the
-- front of ...
unpack_args = (...) ->
userdata_start = nil
for i = 1, select '#', ...
v = select i, ...
if type(v) == 'userdata'
userdata_start = i
break
return 0, ... unless userdata_start
flags = regex.process_flags select userdata_start, ...
if type(flags) == 'string'
error(flags, 3)
flags, select_first userdata_start - 1, ...
-- Typecheck a variable and throw an error if it fails
check_arg = (arg, expected_type, argn, func_name, level) ->
if type(arg) != expected_type
error "Argument #{argn} to #{func_name} should be a '#{expected_type}', is '#{type(arg)}' (#{arg})",
level + 1
-- Replace a match with the value returned from func when passed the match
replace_match = (match, func, str, last, acc) ->
-- Copy everything between the last match and this match
if last < match.last
acc[#acc + 1] = str\sub last, match.first - 1
repl = func match.str, match.first, match.last
-- If it didn't return a string just leave the old value
acc[#acc + 1] = if type(repl) == 'string' then repl else match.str
match.first, match.last + 1
-- Replace all matches from a single iteration of the regexp
do_single_replace_fun = (re, func, str, acc, pos) ->
matches = re\match str, pos
-- No more matches so just return what's left of the input
return pos unless matches
-- If there's only one match then there's no capturing groups and we need
-- to pass the entire match to the replace function, but if there's
-- multiple then we want to skip the full match and only pass the capturing
-- groups.
start = if #matches == 1 then 1 else 2
last = pos
local first
for i = start, #matches
first, last = replace_match matches[i], func, str, last, acc
-- Always eat at least one character from the input or we'll just make the
-- same match max_count times
if first == last
acc[#acc + 1] = str\sub last, last
last += 1
return last, matches[1].first <= str\len()
do_replace_fun = (re, func, str, max) ->
acc = {}
pos = 1
local i
for i = 1, max do
pos, more = do_single_replace_fun re, func, str, acc, pos
unless more
max = i
break
table.concat(acc, '') .. str\sub pos
-- Compiled regular expression type protoype
class RegEx
-- Verify that a valid value was passed for self
_check_self: =>
unless @__class == RegEx
error 're method called with invalid self. You probably used . when : is needed.', 3
new: (@_regex, @_level) =>
start = 1
gsplit: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'gsplit', @_level
if not max_split or max_split <= 0 then max_split = str\len()
start = 1
prev = 1
do_split = () ->
if not str or str\len() == 0 then return
local first, last
if max_split > 0
first, last = regex.search @_regex, str, start
if not first or first > str\len()
ret = str\sub prev, str\len()
str = nil
return ret
ret = str\sub prev, first - 1
prev = last + 1
start = 1 + if start >= last then start else last
if skip_empty and ret\len() == 0
do_split()
else
max_split -= 1
ret
do_split
split: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'split', @_level
[v for v in @gsplit str, skip_empty, max_split]
gfind: (str) =>
@_check_self!
check_arg str, 'string', 2, 'gfind', @_level
start = 1
->
first, last = regex.search(@_regex, str, start)
return unless first
start = if last >= start then last + 1 else start + 1
str\sub(first, last), first, last
find: (str) =>
@_check_self!
check_arg str, 'string', 2, 'find', @_level
ret = [str: s, first: f, last: l for s, f, l in @gfind(str)]
next(ret) and ret
sub: (str, repl, max_count) =>
@_check_self!
check_arg str, 'string', 2, 'sub', @_level
if max_count != nil
check_arg max_count, 'number', 4, 'sub', @_level
max_count = str\len() + 1 if not max_count or max_count == 0
if type(repl) == 'function'
do_replace_fun @, repl, str, max_count
elseif type(repl) == 'string'
regex.replace @_regex, repl, str, max_count
else
error "Argument 2 to sub should be a string or function, is '#{type(repl)}' (#{repl})", @_level
gmatch: (str, start) =>
@_check_self!
check_arg str, 'string', 2, 'gmatch', @_level
start = if start then start - 1 else 0
match = regex.match @_regex, str, start
i = 1
->
return unless match
first, last = regex.get_match match, i
return unless first
i += 1
{
str: str\sub first + start, last + start
first: first + start
last: last + start
}
match: (str, start) =>
@_check_self!
check_arg(str, 'string', 2, 'match', @_level)
ret = [v for v in @gmatch str, start]
-- Return nil rather than a empty table so that if re.match(...) works
return nil if next(ret) == nil
ret
-- Create a regex object from a pattern, flags, and error depth
real_compile = (pattern, level, flags, stored_level) ->
if pattern == ''
error 'Regular expression must not be empty', level + 1
re = regex.compile pattern, flags
if type(re) == 'string'
error regex, level + 1
RegEx re, stored_level or level + 1
-- Compile a pattern then invoke a method on it
invoke = (str, pattern, fn, flags, ...) ->
compiled_regex = real_compile(pattern, 3, flags)
compiled_regex[fn](compiled_regex, str, ...)
-- Generate a static version of a method with arg type checking
gen_wrapper = (impl_name) ->
(str, pattern, ...) ->
check_arg str, 'string', 1, impl_name, 2
check_arg pattern, 'string', 2, impl_name, 2
invoke str, pattern, impl_name, unpack_args ...
-- And now at last the actual public API
re = regex.init_flags(re)
re.compile = (pattern, ...) ->
check_arg pattern, 'string', 1, 'compile', 2
real_compile pattern, 2, regex.process_flags(...), 2
re.split = gen_wrapper 'split'
re.gsplit = gen_wrapper 'gsplit'
re.find = gen_wrapper 'find'
re.gfind = gen_wrapper 'gfind'
re.match = gen_wrapper 'match'
re.gmatch = gen_wrapper 'gmatch'
re.sub = gen_wrapper 'sub'
re

View file

@ -327,6 +327,7 @@
<ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" />
<ClCompile Include="$(SrcDir)auto4_regex.cpp" />
<ClCompile Include="$(SrcDir)avisynth_wrap.cpp" /> <ClCompile Include="$(SrcDir)avisynth_wrap.cpp" />
<ClCompile Include="$(SrcDir)base_grid.cpp" /> <ClCompile Include="$(SrcDir)base_grid.cpp" />
<ClCompile Include="$(SrcDir)charset_detect.cpp" /> <ClCompile Include="$(SrcDir)charset_detect.cpp" />

View file

@ -1238,6 +1238,9 @@
<ClCompile Include="$(SrcDir)subs_controller.cpp"> <ClCompile Include="$(SrcDir)subs_controller.cpp">
<Filter>ASS</Filter> <Filter>ASS</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="$(SrcDir)auto4_regex.cpp">
<Filter>Automation\Lua</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="$(SrcDir)res.rc"> <ResourceCompile Include="$(SrcDir)res.rc">

View file

@ -92,11 +92,11 @@ endif
# AUTOMATION # AUTOMATION
############ ############
ifeq (yes, $(HAVE_AUTO4_LUA)) ifeq (yes, $(HAVE_AUTO4_LUA))
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA) auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_regex.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
LIBS += $(LIBS_LUA) LIBS += $(LIBS_LUA)
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
else else
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
endif endif
############## ##############

View file

@ -71,7 +71,6 @@
#include <wx/filename.h> #include <wx/filename.h>
#include <wx/log.h> #include <wx/log.h>
#include <wx/msgdlg.h> #include <wx/msgdlg.h>
#include <wx/regex.h>
#include <wx/window.h> #include <wx/window.h>
namespace { namespace {
@ -111,125 +110,6 @@ namespace {
return 1; return 1;
} }
inline wxRegEx *get_regex(lua_State *L)
{
return static_cast<wxRegEx*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
int regex_matches(lua_State *L)
{
lua_pushboolean(L, get_regex(L)->Matches(check_wxstring(L, 2)));
return 1;
}
int regex_match_count(lua_State *L)
{
wxRegEx *re = get_regex(L);
if (re->Matches(check_wxstring(L, 2)))
push_value(L, re->GetMatchCount());
else
push_value(L, 0);
return 1;
}
size_t utf8_len(wxString const& w)
{
#if wxUSE_UNICODE_UTF8
return w.utf8_length();
#else
return w.utf8_str().length();
#endif
}
int regex_get_match(lua_State *L)
{
wxString str(check_wxstring(L, 2));
size_t start, len;
get_regex(L)->GetMatch(&start, &len, luaL_checkinteger(L, 3));
push_value(L, utf8_len(str.Left(start)) + 1);
push_value(L, utf8_len(str.Left(start + len)));
return 2;
}
int regex_replace(lua_State *L)
{
wxString str(check_wxstring(L, 3));
int reps = get_regex(L)->Replace(&str, check_wxstring(L, 2), luaL_checkinteger(L, 4));
push_value(L, str);
push_value(L, reps);
return 2;
}
int regex_compile(lua_State *L)
{
wxString pattern(check_wxstring(L, 1));
int flags = luaL_checkinteger(L, 2);
wxRegEx *re = static_cast<wxRegEx*>(lua_newuserdata(L, sizeof(wxRegEx)));
new(re) wxRegEx(pattern, wxRE_ADVANCED | flags);
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
// return nil and handle the error in lua as it's a bit easier to
// report the actual call site from there
if (!re->IsValid()) {
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_gc(lua_State *L) {
get_regex(L)->~wxRegEx();
return 0;
}
int regex_process_flags(lua_State *L)
{
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L)
{
lua_newtable(L);
set_field(L, "ICASE", (void*)wxRE_ICASE);
set_field(L, "NOSUB", (void*)wxRE_NOSUB);
set_field(L, "NEWLINE", (void*)wxRE_NEWLINE);
return 1;
}
int regex_init(lua_State *L)
{
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "match_count", regex_match_count);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
int clipboard_get(lua_State *L) int clipboard_get(lua_State *L)
{ {
std::string data = GetClipboard(); std::string data = GetClipboard();
@ -274,6 +154,8 @@ namespace {
} }
namespace Automation4 { namespace Automation4 {
int regex_init(lua_State *L);
// LuaScript // LuaScript
LuaScript::LuaScript(agi::fs::path const& filename) LuaScript::LuaScript(agi::fs::path const& filename)
: Script(filename) : Script(filename)

View file

@ -33,6 +33,7 @@ inline void push_value(lua_State *L, const char *value) { lua_pushstring(L, valu
inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); } inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); }
inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, long value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); } inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); }
inline void push_value(lua_State *L, wxString const& value) { inline void push_value(lua_State *L, wxString const& value) {

208
aegisub/src/auto4_regex.cpp Normal file
View file

@ -0,0 +1,208 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "config.h"
#ifdef WITH_AUTO4_LUA
#include "auto4_lua_utils.h"
#include <boost/regex/icu.hpp>
namespace {
boost::u32regex& get_regex(lua_State *L) {
return *static_cast<boost::u32regex*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
boost::smatch& get_smatch(lua_State *L) {
return *static_cast<boost::smatch*>(luaL_checkudata(L, 1, "aegisub.smatch"));
}
int regex_matches(lua_State *L) {
lua_pushboolean(L, u32regex_match(luaL_checkstring(L, 2), get_regex(L)));
return 1;
}
int regex_match(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = lua_tointeger(L, 3);
auto result = static_cast<boost::smatch*>(lua_newuserdata(L, sizeof(boost::smatch)));
new(result) boost::smatch;
luaL_getmetatable(L, "aegisub.smatch");
lua_setmetatable(L, -2);
if (!u32regex_search(str.cbegin() + start, str.cend(), *result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_get_match(lua_State *L) {
auto match = get_smatch(L);
int idx = luaL_checkinteger(L, 2) - 1;
if (static_cast<size_t>(idx) > match.size() || !match[idx].matched) {
lua_pushnil(L);
return 1;
}
push_value(L, distance(match.prefix().first, match[idx].first + 1));
push_value(L, distance(match.prefix().first, match[idx].second));
return 2;
}
int regex_search(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = luaL_checkinteger(L, 3) - 1;
boost::smatch result;
if (!u32regex_search(str.cbegin() + start, str.cend(), result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pushnil(L);
return 1;
}
push_value(L, start + result.position() + 1);
push_value(L, start + result.position() + result.length());
return 2;
}
int regex_replace(lua_State *L) {
auto re = get_regex(L);
const auto replacement = luaL_checkstring(L, 2);
const std::string str = luaL_checkstring(L, 3);
int max_count = luaL_checkinteger(L, 4);
// Can't just use regex_replace here since it can only do one or infinite replacements
auto match = boost::u32regex_iterator<std::string::const_iterator>(begin(str), end(str), re);
auto end_it = boost::u32regex_iterator<std::string::const_iterator>();
auto suffix = begin(str);
std::string ret;
auto out = back_inserter(ret);
while (match != end_it && max_count > 0) {
copy(suffix, match->prefix().second, out);
match->format(out, replacement);
suffix = match->suffix().first;
++match;
--max_count;
}
copy(suffix, end(str), out);
push_value(L, ret);
return 1;
}
int regex_compile(lua_State *L) {
std::string pattern(luaL_checkstring(L, 1));
int flags = luaL_checkinteger(L, 2);
boost::u32regex *re = static_cast<boost::u32regex*>(lua_newuserdata(L, sizeof(boost::u32regex)));
try {
new(re) boost::u32regex;
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
}
catch (std::exception const& e) {
lua_pop(L, 1);
push_value(L, e.what());
return 1;
// Do the actual triggering of the error in the Lua code as that code
// can report the original call site
}
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
return 1;
}
int regex_gc(lua_State *L) {
using boost::u32regex;
get_regex(L).~u32regex();
return 0;
}
int smatch_gc(lua_State *L) {
using boost::smatch;
get_smatch(L).~smatch();
return 0;
}
int regex_process_flags(lua_State *L) {
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L) {
lua_newtable(L);
set_field(L, "ICASE", (void*)boost::u32regex::icase);
set_field(L, "NOSUB", (void*)boost::u32regex::nosubs);
set_field(L, "COLLATE", (void*)boost::u32regex::collate);
set_field(L, "NEWLINE_ALT", (void*)boost::u32regex::newline_alt);
set_field(L, "NO_MOD_M", (void*)boost::u32regex::no_mod_m);
set_field(L, "NO_MOD_S", (void*)boost::u32regex::no_mod_s);
set_field(L, "MOD_S", (void*)boost::u32regex::mod_s);
set_field(L, "MOD_X", (void*)boost::u32regex::mod_x);
set_field(L, "NO_EMPTY_SUBEXPRESSIONS", (void*)boost::u32regex::no_empty_expressions);
return 1;
}
}
namespace Automation4 {
int regex_init(lua_State *L) {
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
if (luaL_newmetatable(L, "aegisub.smatch")) {
set_field(L, "__gc", smatch_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "search", regex_search);
set_field(L, "match", regex_match);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
}
#endif