Replace wxRegex bindings for lua with boost::regex bindings

API is mostly unchanged other than the addition of a lot more flags.
Should be less buggy since it has an actual test suite, and generally
has a more powerful regex syntax with better support for Unicode.

The bindings are written in MoonScript. For now the compiled form is
store in the repo for convenince.
This commit is contained in:
Thomas Goyne 2013-04-09 20:46:59 -07:00
parent 44188267d6
commit c556a47509
8 changed files with 753 additions and 401 deletions

View file

@ -1,301 +1,314 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the wxRegex binding
local regex = aegisub.__init_regex() local regex = aegisub.__init_regex()
local select_first
-- Compiled regular expression type protoype select_first = function(n, a, ...)
local re_proto = {} if n == 0 then
local re_proto_mt = { __index = re_proto } return
end
-- Convert an iterator to an array return a, select_first(n - 1, ...)
local function to_table(...)
local arr = {}
local i = 1
for v in ... do
arr[i] = v
i = i + 1
end
return arr
end end
local unpack_args
-- Return the first n elements from ... unpack_args = function(...)
local function select_first(n, a, ...) local userdata_start = nil
if n == 0 then return end for i = 1, select('#', ...) do
return a, select_first(n - 1, ...) local v = select(i, ...)
if type(v) == 'userdata' then
userdata_start = i
break
end
end
if not (userdata_start) then
return 0, ...
end
local flags = regex.process_flags(select(userdata_start, ...))
if type(flags) == 'string' then
error(flags, 3)
end
return flags, select_first(userdata_start - 1, ...)
end end
local check_arg
-- Extract the flags from ..., bitwise OR them together, and move them to the check_arg = function(arg, expected_type, argn, func_name, level)
-- front of ... if type(arg) ~= expected_type then
local function unpack_args(...) return error("Argument " .. tostring(argn) .. " to " .. tostring(func_name) .. " should be a '" .. tostring(expected_type) .. "', is '" .. tostring(type(arg)) .. "' (" .. tostring(arg) .. ")", level + 1)
local n = select('#', ...) end
local userdata_start = nil
for i = 1, n do
local v = select(i, ...)
if type(v) == "userdata" then
userdata_start = i
break
end
end
if not userdata_start then
return 0, ...
end
flags = regex.process_flags(select(userdata_start, ...))
if type(flags) == "string" then
error(flags, 3)
end
return flags, select_first(userdata_start - 1, ...)
end end
local replace_match
-- Verify that a valid value was passed for self replace_match = function(match, func, str, last, acc)
local function check_self(self) if last < match.last then
if getmetatable(self) ~= re_proto_mt then acc[#acc + 1] = str:sub(last, match.first - 1)
error("re method called with invalid self. You probably used . when : is needed.", 3) end
end local repl = func(match.str, match.first, match.last)
if type(repl) == 'string' then
acc[#acc + 1] = repl
else
acc[#acc + 1] = match.str
end
return match.first, match.last + 1
end end
local do_single_replace_fun
-- Typecheck a variable and throw an error if it fails do_single_replace_fun = function(re, func, str, acc, pos)
local function check_arg(arg, expected_type, argn, func_name, level) local matches = re:match(str, pos)
if type(arg) ~= expected_type then if not (matches) then
error( return pos
string.format("Argument %d to %s should be a '%s', is '%s' (%s)", end
argn, func_name, expected_type, type(arg), tostring(arg)), local start
level + 1) if #matches == 1 then
end start = 1
else
start = 2
end
local last = pos
local first
for i = start, #matches do
first, last = replace_match(matches[i], func, str, last, acc)
end
if first == last then
acc[#acc + 1] = str:sub(last, last)
last = last + 1
end
return last, matches[1].first <= str:len()
end end
local do_replace_fun
function re_proto.gsplit(self, str, skip_empty, max_split) do_replace_fun = function(re, func, str, max)
check_self(self) local acc = { }
check_arg(str, "string", 2, "gsplit", self._level) local pos = 1
if not max_split or max_split <= 0 then max_split = str:len() end local i
for i = 1, max do
local function do_split() local more
pos, more = do_single_replace_fun(re, func, str, acc, pos)
if not (more) then
max = i
break
end
end
return table.concat(acc, '') .. str:sub(pos)
end
local RegEx
do
local start
local _parent_0 = nil
local _base_0 = {
_check_self = function(self)
if not (self.__class == RegEx) then
return error('re method called with invalid self. You probably used . when : is needed.', 3)
end
end,
gsplit = function(self, str, skip_empty, max_split)
self:_check_self()
check_arg(str, 'string', 2, 'gsplit', self._level)
if not max_split or max_split <= 0 then
max_split = str:len()
end
start = 1
local prev = 1
local do_split
do_split = function()
if not str or str:len() == 0 then if not str or str:len() == 0 then
return nil return
end end
local first, last
if max_split == 0 or not regex.matches(self._regex, str) then if max_split > 0 then
local ret = str first, last = regex.search(self._regex, str, start)
str = nil
return ret
end end
if not first or first > str:len() then
local first, last = regex.get_match(self._regex, str, 0) local ret = str:sub(prev, str:len())
local ret = str:sub(1, first - 1) str = nil
str = str:sub(last + 1) return ret
end
local ret = str:sub(prev, first - 1)
prev = last + 1
start = 1 + (function()
if start >= last then
return start
else
return last
end
end)()
if skip_empty and ret:len() == 0 then if skip_empty and ret:len() == 0 then
return do_split() return do_split()
else else
max_split = max_split - 1 max_split = max_split - 1
return ret return ret
end end
end end
return do_split
return do_split end,
end split = function(self, str, skip_empty, max_split)
self:_check_self()
function re_proto.split(self, str, skip_empty, max_split) check_arg(str, 'string', 2, 'split', self._level)
check_self(self) return (function()
check_arg(str, "string", 2, "split", self._level) local _accum_0 = { }
return to_table(self:gsplit(str, skip_empty, max_split)) local _len_0 = 1
end for v in self:gsplit(str, skip_empty, max_split) do
_accum_0[_len_0] = v
function re_proto.gfind(self, str) _len_0 = _len_0 + 1
check_self(self) end
check_arg(str, "string", 2, "gfind", self._level) return _accum_0
end)()
local offset = 0 end,
return function() gfind = function(self, str)
local has_matches = regex.matches(self._regex, str) self:_check_self()
if not has_matches then return end check_arg(str, 'string', 2, 'gfind', self._level)
start = 1
local first, last = regex.get_match(self._regex, str, 0) return function()
local ret = str:sub(first, last) local first, last = regex.search(self._regex, str, start)
str = str:sub(last + 1) if not (first) then
return
last = last + offset end
offset = offset + first if last >= start then
return ret, offset, last start = last + 1
end else
end start = start + 1
end
function re_proto.find(self, str) return str:sub(first, last), first, last
check_self(self) end
check_arg(str, "string", 2, "find", self._level) end,
find = function(self, str)
local i = 1 self:_check_self()
local ret = {} check_arg(str, 'string', 2, 'find', self._level)
for s, f, l in self:gfind(str) do local ret = (function()
ret[i] = { local _accum_0 = { }
str = s, local _len_0 = 1
for s, f, l in self:gfind(str) do
_accum_0[_len_0] = {
str = s,
first = f, first = f,
last = l last = l
} }
_len_0 = _len_0 + 1
end
return _accum_0
end)()
return next(ret) and ret
end,
sub = function(self, str, repl, max_count)
self:_check_self()
check_arg(str, 'string', 2, 'sub', self._level)
if max_count ~= nil then
check_arg(max_count, 'number', 4, 'sub', self._level)
end
if not max_count or max_count == 0 then
max_count = str:len() + 1
end
if type(repl) == 'function' then
return do_replace_fun(self, repl, str, max_count)
elseif type(repl) == 'string' then
return regex.replace(self._regex, repl, str, max_count)
else
return error("Argument 2 to sub should be a string or function, is '" .. tostring(type(repl)) .. "' (" .. tostring(repl) .. ")", self._level)
end
end,
gmatch = function(self, str, start)
self:_check_self()
check_arg(str, 'string', 2, 'gmatch', self._level)
if start then
start = start - 1
else
start = 0
end
local match = regex.match(self._regex, str, start)
local i = 1
return function()
if not (match) then
return
end
local first, last = regex.get_match(match, i)
if not (first) then
return
end
i = i + 1 i = i + 1
end
return ret
end
-- Replace a match with the value returned from func when passed the match
local function replace_match(match, func, str, last, acc)
if last < match.last then
acc[#acc + 1] = str:sub(last, match.first - 1)
end
local ret = func(match.str, match.first, match.last)
if type(ret) == "string" then
acc[#acc + 1] = ret
else
-- If it didn't return a string just leave the old value
acc[#acc + 1] = match.str
end
return match.last + 1
end
-- Replace all matches from a single iteration of the regexp
local function do_single_replace_fun(re, func, str, acc)
local matches = re:match(str)
-- No more matches so just return what we have so far
if not matches then
return str
end
-- One match means no capturing groups, so pass the entire thing to
-- the replace function
if #matches == 1 then
local rest = replace_match(matches[1], func, str, 1, acc)
return str:sub(rest), true
end
-- Multiple matches means there were capture groups, so skip the first one
-- and pass the rest to the replace function
local last = 1
for i = 2, #matches do
last = replace_match(matches[i], func, str, last, acc)
end
return str:sub(last), true
end
local function do_replace_fun(re, func, str, max)
local acc = {}
local i
for i = 1, max do
str, continue = do_single_replace_fun(re, func, str, acc)
if not continue then max = i end
end
return table.concat(acc, "") .. str, max
end
function re_proto.sub(self, str, repl, count)
check_self(self)
check_arg(str, "string", 2, "sub", self._level)
if count ~= nil then
check_arg(count, "number", 4, "sub", self._level)
end
if not count or count == 0 then count = str:len() end
if type(repl) == "function" then
return do_replace_fun(self, repl, str, count)
elseif type(repl) == "string" then
return regex.replace(self._regex, repl, str, count)
else
error(
string.format("Argument 2 to sub should be a string or function, is '%s' (%s)",
type(repl), tostring(repl)),
self._level)
end
end
function re_proto.gmatch(self, str)
check_self(self)
check_arg(str, "string", 2, "gmatch", self._level)
local match_count = regex.match_count(self._regex, str)
local i = 0
return function()
if i == match_count then return end
i = i + 1
local first, last = regex.get_match(self._regex, str, i - 1)
return { return {
str = str:sub(first, last), str = str:sub(first + start, last + start),
first = first, first = first + start,
last = last last = last + start
} }
end
end,
match = function(self, str, start)
self:_check_self()
check_arg(str, 'string', 2, 'match', self._level)
local ret = (function()
local _accum_0 = { }
local _len_0 = 1
for v in self:gmatch(str, start) do
_accum_0[_len_0] = v
_len_0 = _len_0 + 1
end
return _accum_0
end)()
if next(ret) == nil then
return nil
end
return ret
end end
end }
_base_0.__index = _base_0
function re_proto.match(self, str) if _parent_0 then
check_self(self) setmetatable(_base_0, _parent_0.__base)
check_arg(str, "string", 2, "match", self._level) end
local _class_0 = setmetatable({
local ret = to_table(self:gmatch(str)) __init = function(self, _regex, _level)
-- Return nil rather than a empty table so that if re.match(...) works self._regex, self._level = _regex, _level
if next(ret) == nil then return end end,
return ret __base = _base_0,
end __name = "RegEx",
__parent = _parent_0
-- Create a wxRegExp object from a pattern, flags, and error depth }, {
local function real_compile(pattern, level, flags, stored_level) __index = function(cls, name)
local regex = regex.compile(pattern, flags) local val = rawget(_base_0, name)
if not regex then if val == nil and _parent_0 then
error("Bad syntax in regular expression", level + 1) return _parent_0[name]
else
return val
end
end,
__call = function(cls, ...)
local _self_0 = setmetatable({}, _base_0)
cls.__init(_self_0, ...)
return _self_0
end end
return setmetatable({ })
_regex = regex, _base_0.__class = _class_0
_level = stored_level or level + 1 local self = _class_0
}, start = 1
re_proto_mt) if _parent_0 and _parent_0.__inherited then
_parent_0.__inherited(_parent_0, _class_0)
end
RegEx = _class_0
end end
local real_compile
-- Compile a pattern then invoke a method on it real_compile = function(pattern, level, flags, stored_level)
local function invoke(str, pattern, fn, flags, ...) if pattern == '' then
local comp = real_compile(pattern, 3, flags) error('Regular expression must not be empty', level + 1)
return comp[fn](comp, str, ...) end
local re = regex.compile(pattern, flags)
if type(re) == 'string' then
error(regex, level + 1)
end
return RegEx(re, stored_level or level + 1)
end end
local invoke
-- Generate a static version of a method with arg type checking invoke = function(str, pattern, fn, flags, ...)
local function gen_wrapper(impl_name) local compiled_regex = real_compile(pattern, 3, flags)
return function(str, pattern, ...) return compiled_regex[fn](compiled_regex, str, ...)
check_arg(str, "string", 1, impl_name, 2) end
check_arg(pattern, "string", 2, impl_name, 2) local gen_wrapper
return invoke(str, pattern, impl_name, unpack_args(...)) gen_wrapper = function(impl_name)
end return function(str, pattern, ...)
check_arg(str, 'string', 1, impl_name, 2)
check_arg(pattern, 'string', 2, impl_name, 2)
return invoke(str, pattern, impl_name, unpack_args(...))
end
end end
-- And now at last the actual public API
local re = regex.init_flags(re) local re = regex.init_flags(re)
re.compile = function(pattern, ...)
function re.compile(pattern, ...) check_arg(pattern, 'string', 1, 'compile', 2)
check_arg(pattern, "string", 1, "compile", 2) return real_compile(pattern, 2, regex.process_flags(...), 2)
return real_compile(pattern, 2, regex.process_flags(...), 2)
end end
re.split = gen_wrapper('split')
re.split = gen_wrapper("split") re.gsplit = gen_wrapper('gsplit')
re.gsplit = gen_wrapper("gsplit") re.find = gen_wrapper('find')
re.find = gen_wrapper("find") re.gfind = gen_wrapper('gfind')
re.gfind = gen_wrapper("gfind") re.match = gen_wrapper('match')
re.match = gen_wrapper("match") re.gmatch = gen_wrapper('gmatch')
re.gmatch = gen_wrapper("gmatch") re.sub = gen_wrapper('sub')
re.sub = gen_wrapper("sub") return re
_G.re = re
return _G.re

View file

@ -0,0 +1,244 @@
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
--
-- Permission to use, copy, modify, and distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-- Get the boost::regex binding
regex = aegisub.__init_regex()
-- Return the first n elements from ...
select_first = (n, a, ...) ->
if n == 0 then return
a, select_first n - 1, ...
-- Extract the flags from ..., bitwise OR them together, and move them to the
-- front of ...
unpack_args = (...) ->
userdata_start = nil
for i = 1, select '#', ...
v = select i, ...
if type(v) == 'userdata'
userdata_start = i
break
return 0, ... unless userdata_start
flags = regex.process_flags select userdata_start, ...
if type(flags) == 'string'
error(flags, 3)
flags, select_first userdata_start - 1, ...
-- Typecheck a variable and throw an error if it fails
check_arg = (arg, expected_type, argn, func_name, level) ->
if type(arg) != expected_type
error "Argument #{argn} to #{func_name} should be a '#{expected_type}', is '#{type(arg)}' (#{arg})",
level + 1
-- Replace a match with the value returned from func when passed the match
replace_match = (match, func, str, last, acc) ->
-- Copy everything between the last match and this match
if last < match.last
acc[#acc + 1] = str\sub last, match.first - 1
repl = func match.str, match.first, match.last
-- If it didn't return a string just leave the old value
acc[#acc + 1] = if type(repl) == 'string' then repl else match.str
match.first, match.last + 1
-- Replace all matches from a single iteration of the regexp
do_single_replace_fun = (re, func, str, acc, pos) ->
matches = re\match str, pos
-- No more matches so just return what's left of the input
return pos unless matches
-- If there's only one match then there's no capturing groups and we need
-- to pass the entire match to the replace function, but if there's
-- multiple then we want to skip the full match and only pass the capturing
-- groups.
start = if #matches == 1 then 1 else 2
last = pos
local first
for i = start, #matches
first, last = replace_match matches[i], func, str, last, acc
-- Always eat at least one character from the input or we'll just make the
-- same match max_count times
if first == last
acc[#acc + 1] = str\sub last, last
last += 1
return last, matches[1].first <= str\len()
do_replace_fun = (re, func, str, max) ->
acc = {}
pos = 1
local i
for i = 1, max do
pos, more = do_single_replace_fun re, func, str, acc, pos
unless more
max = i
break
table.concat(acc, '') .. str\sub pos
-- Compiled regular expression type protoype
class RegEx
-- Verify that a valid value was passed for self
_check_self: =>
unless @__class == RegEx
error 're method called with invalid self. You probably used . when : is needed.', 3
new: (@_regex, @_level) =>
start = 1
gsplit: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'gsplit', @_level
if not max_split or max_split <= 0 then max_split = str\len()
start = 1
prev = 1
do_split = () ->
if not str or str\len() == 0 then return
local first, last
if max_split > 0
first, last = regex.search @_regex, str, start
if not first or first > str\len()
ret = str\sub prev, str\len()
str = nil
return ret
ret = str\sub prev, first - 1
prev = last + 1
start = 1 + if start >= last then start else last
if skip_empty and ret\len() == 0
do_split()
else
max_split -= 1
ret
do_split
split: (str, skip_empty, max_split) =>
@_check_self!
check_arg str, 'string', 2, 'split', @_level
[v for v in @gsplit str, skip_empty, max_split]
gfind: (str) =>
@_check_self!
check_arg str, 'string', 2, 'gfind', @_level
start = 1
->
first, last = regex.search(@_regex, str, start)
return unless first
start = if last >= start then last + 1 else start + 1
str\sub(first, last), first, last
find: (str) =>
@_check_self!
check_arg str, 'string', 2, 'find', @_level
ret = [str: s, first: f, last: l for s, f, l in @gfind(str)]
next(ret) and ret
sub: (str, repl, max_count) =>
@_check_self!
check_arg str, 'string', 2, 'sub', @_level
if max_count != nil
check_arg max_count, 'number', 4, 'sub', @_level
max_count = str\len() + 1 if not max_count or max_count == 0
if type(repl) == 'function'
do_replace_fun @, repl, str, max_count
elseif type(repl) == 'string'
regex.replace @_regex, repl, str, max_count
else
error "Argument 2 to sub should be a string or function, is '#{type(repl)}' (#{repl})", @_level
gmatch: (str, start) =>
@_check_self!
check_arg str, 'string', 2, 'gmatch', @_level
start = if start then start - 1 else 0
match = regex.match @_regex, str, start
i = 1
->
return unless match
first, last = regex.get_match match, i
return unless first
i += 1
{
str: str\sub first + start, last + start
first: first + start
last: last + start
}
match: (str, start) =>
@_check_self!
check_arg(str, 'string', 2, 'match', @_level)
ret = [v for v in @gmatch str, start]
-- Return nil rather than a empty table so that if re.match(...) works
return nil if next(ret) == nil
ret
-- Create a regex object from a pattern, flags, and error depth
real_compile = (pattern, level, flags, stored_level) ->
if pattern == ''
error 'Regular expression must not be empty', level + 1
re = regex.compile pattern, flags
if type(re) == 'string'
error regex, level + 1
RegEx re, stored_level or level + 1
-- Compile a pattern then invoke a method on it
invoke = (str, pattern, fn, flags, ...) ->
compiled_regex = real_compile(pattern, 3, flags)
compiled_regex[fn](compiled_regex, str, ...)
-- Generate a static version of a method with arg type checking
gen_wrapper = (impl_name) ->
(str, pattern, ...) ->
check_arg str, 'string', 1, impl_name, 2
check_arg pattern, 'string', 2, impl_name, 2
invoke str, pattern, impl_name, unpack_args ...
-- And now at last the actual public API
re = regex.init_flags(re)
re.compile = (pattern, ...) ->
check_arg pattern, 'string', 1, 'compile', 2
real_compile pattern, 2, regex.process_flags(...), 2
re.split = gen_wrapper 'split'
re.gsplit = gen_wrapper 'gsplit'
re.find = gen_wrapper 'find'
re.gfind = gen_wrapper 'gfind'
re.match = gen_wrapper 'match'
re.gmatch = gen_wrapper 'gmatch'
re.sub = gen_wrapper 'sub'
re

View file

@ -327,6 +327,7 @@
<ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" />
<ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" /> <ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" />
<ClCompile Include="$(SrcDir)auto4_regex.cpp" />
<ClCompile Include="$(SrcDir)avisynth_wrap.cpp" /> <ClCompile Include="$(SrcDir)avisynth_wrap.cpp" />
<ClCompile Include="$(SrcDir)base_grid.cpp" /> <ClCompile Include="$(SrcDir)base_grid.cpp" />
<ClCompile Include="$(SrcDir)charset_detect.cpp" /> <ClCompile Include="$(SrcDir)charset_detect.cpp" />

View file

@ -1238,6 +1238,9 @@
<ClCompile Include="$(SrcDir)subs_controller.cpp"> <ClCompile Include="$(SrcDir)subs_controller.cpp">
<Filter>ASS</Filter> <Filter>ASS</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="$(SrcDir)auto4_regex.cpp">
<Filter>Automation\Lua</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="$(SrcDir)res.rc"> <ResourceCompile Include="$(SrcDir)res.rc">

View file

@ -92,11 +92,11 @@ endif
# AUTOMATION # AUTOMATION
############ ############
ifeq (yes, $(HAVE_AUTO4_LUA)) ifeq (yes, $(HAVE_AUTO4_LUA))
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA) auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_regex.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
LIBS += $(LIBS_LUA) LIBS += $(LIBS_LUA)
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
else else
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
endif endif
############## ##############

View file

@ -71,7 +71,6 @@
#include <wx/filename.h> #include <wx/filename.h>
#include <wx/log.h> #include <wx/log.h>
#include <wx/msgdlg.h> #include <wx/msgdlg.h>
#include <wx/regex.h>
#include <wx/window.h> #include <wx/window.h>
namespace { namespace {
@ -111,125 +110,6 @@ namespace {
return 1; return 1;
} }
inline wxRegEx *get_regex(lua_State *L)
{
return static_cast<wxRegEx*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
int regex_matches(lua_State *L)
{
lua_pushboolean(L, get_regex(L)->Matches(check_wxstring(L, 2)));
return 1;
}
int regex_match_count(lua_State *L)
{
wxRegEx *re = get_regex(L);
if (re->Matches(check_wxstring(L, 2)))
push_value(L, re->GetMatchCount());
else
push_value(L, 0);
return 1;
}
size_t utf8_len(wxString const& w)
{
#if wxUSE_UNICODE_UTF8
return w.utf8_length();
#else
return w.utf8_str().length();
#endif
}
int regex_get_match(lua_State *L)
{
wxString str(check_wxstring(L, 2));
size_t start, len;
get_regex(L)->GetMatch(&start, &len, luaL_checkinteger(L, 3));
push_value(L, utf8_len(str.Left(start)) + 1);
push_value(L, utf8_len(str.Left(start + len)));
return 2;
}
int regex_replace(lua_State *L)
{
wxString str(check_wxstring(L, 3));
int reps = get_regex(L)->Replace(&str, check_wxstring(L, 2), luaL_checkinteger(L, 4));
push_value(L, str);
push_value(L, reps);
return 2;
}
int regex_compile(lua_State *L)
{
wxString pattern(check_wxstring(L, 1));
int flags = luaL_checkinteger(L, 2);
wxRegEx *re = static_cast<wxRegEx*>(lua_newuserdata(L, sizeof(wxRegEx)));
new(re) wxRegEx(pattern, wxRE_ADVANCED | flags);
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
// return nil and handle the error in lua as it's a bit easier to
// report the actual call site from there
if (!re->IsValid()) {
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_gc(lua_State *L) {
get_regex(L)->~wxRegEx();
return 0;
}
int regex_process_flags(lua_State *L)
{
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L)
{
lua_newtable(L);
set_field(L, "ICASE", (void*)wxRE_ICASE);
set_field(L, "NOSUB", (void*)wxRE_NOSUB);
set_field(L, "NEWLINE", (void*)wxRE_NEWLINE);
return 1;
}
int regex_init(lua_State *L)
{
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "match_count", regex_match_count);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
int clipboard_get(lua_State *L) int clipboard_get(lua_State *L)
{ {
std::string data = GetClipboard(); std::string data = GetClipboard();
@ -274,6 +154,8 @@ namespace {
} }
namespace Automation4 { namespace Automation4 {
int regex_init(lua_State *L);
// LuaScript // LuaScript
LuaScript::LuaScript(agi::fs::path const& filename) LuaScript::LuaScript(agi::fs::path const& filename)
: Script(filename) : Script(filename)

View file

@ -33,6 +33,7 @@ inline void push_value(lua_State *L, const char *value) { lua_pushstring(L, valu
inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); } inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); }
inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, long value) { lua_pushinteger(L, value); }
inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); } inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); }
inline void push_value(lua_State *L, wxString const& value) { inline void push_value(lua_State *L, wxString const& value) {

208
aegisub/src/auto4_regex.cpp Normal file
View file

@ -0,0 +1,208 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "config.h"
#ifdef WITH_AUTO4_LUA
#include "auto4_lua_utils.h"
#include <boost/regex/icu.hpp>
namespace {
boost::u32regex& get_regex(lua_State *L) {
return *static_cast<boost::u32regex*>(luaL_checkudata(L, 1, "aegisub.regex"));
}
boost::smatch& get_smatch(lua_State *L) {
return *static_cast<boost::smatch*>(luaL_checkudata(L, 1, "aegisub.smatch"));
}
int regex_matches(lua_State *L) {
lua_pushboolean(L, u32regex_match(luaL_checkstring(L, 2), get_regex(L)));
return 1;
}
int regex_match(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = lua_tointeger(L, 3);
auto result = static_cast<boost::smatch*>(lua_newuserdata(L, sizeof(boost::smatch)));
new(result) boost::smatch;
luaL_getmetatable(L, "aegisub.smatch");
lua_setmetatable(L, -2);
if (!u32regex_search(str.cbegin() + start, str.cend(), *result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pop(L, 1);
lua_pushnil(L);
}
return 1;
}
int regex_get_match(lua_State *L) {
auto match = get_smatch(L);
int idx = luaL_checkinteger(L, 2) - 1;
if (static_cast<size_t>(idx) > match.size() || !match[idx].matched) {
lua_pushnil(L);
return 1;
}
push_value(L, distance(match.prefix().first, match[idx].first + 1));
push_value(L, distance(match.prefix().first, match[idx].second));
return 2;
}
int regex_search(lua_State *L) {
auto re = get_regex(L);
std::string str = luaL_checkstring(L, 2);
int start = luaL_checkinteger(L, 3) - 1;
boost::smatch result;
if (!u32regex_search(str.cbegin() + start, str.cend(), result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
{
lua_pushnil(L);
return 1;
}
push_value(L, start + result.position() + 1);
push_value(L, start + result.position() + result.length());
return 2;
}
int regex_replace(lua_State *L) {
auto re = get_regex(L);
const auto replacement = luaL_checkstring(L, 2);
const std::string str = luaL_checkstring(L, 3);
int max_count = luaL_checkinteger(L, 4);
// Can't just use regex_replace here since it can only do one or infinite replacements
auto match = boost::u32regex_iterator<std::string::const_iterator>(begin(str), end(str), re);
auto end_it = boost::u32regex_iterator<std::string::const_iterator>();
auto suffix = begin(str);
std::string ret;
auto out = back_inserter(ret);
while (match != end_it && max_count > 0) {
copy(suffix, match->prefix().second, out);
match->format(out, replacement);
suffix = match->suffix().first;
++match;
--max_count;
}
copy(suffix, end(str), out);
push_value(L, ret);
return 1;
}
int regex_compile(lua_State *L) {
std::string pattern(luaL_checkstring(L, 1));
int flags = luaL_checkinteger(L, 2);
boost::u32regex *re = static_cast<boost::u32regex*>(lua_newuserdata(L, sizeof(boost::u32regex)));
try {
new(re) boost::u32regex;
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
}
catch (std::exception const& e) {
lua_pop(L, 1);
push_value(L, e.what());
return 1;
// Do the actual triggering of the error in the Lua code as that code
// can report the original call site
}
luaL_getmetatable(L, "aegisub.regex");
lua_setmetatable(L, -2);
return 1;
}
int regex_gc(lua_State *L) {
using boost::u32regex;
get_regex(L).~u32regex();
return 0;
}
int smatch_gc(lua_State *L) {
using boost::smatch;
get_smatch(L).~smatch();
return 0;
}
int regex_process_flags(lua_State *L) {
int ret = 0;
int nargs = lua_gettop(L);
for (int i = 1; i <= nargs; ++i) {
if (!lua_islightuserdata(L, i)) {
push_value(L, "Flags must follow all non-flag arguments");
return 1;
}
ret |= (int)(intptr_t)lua_touserdata(L, i);
}
push_value(L, ret);
return 1;
}
int regex_init_flags(lua_State *L) {
lua_newtable(L);
set_field(L, "ICASE", (void*)boost::u32regex::icase);
set_field(L, "NOSUB", (void*)boost::u32regex::nosubs);
set_field(L, "COLLATE", (void*)boost::u32regex::collate);
set_field(L, "NEWLINE_ALT", (void*)boost::u32regex::newline_alt);
set_field(L, "NO_MOD_M", (void*)boost::u32regex::no_mod_m);
set_field(L, "NO_MOD_S", (void*)boost::u32regex::no_mod_s);
set_field(L, "MOD_S", (void*)boost::u32regex::mod_s);
set_field(L, "MOD_X", (void*)boost::u32regex::mod_x);
set_field(L, "NO_EMPTY_SUBEXPRESSIONS", (void*)boost::u32regex::no_empty_expressions);
return 1;
}
}
namespace Automation4 {
int regex_init(lua_State *L) {
if (luaL_newmetatable(L, "aegisub.regex")) {
set_field(L, "__gc", regex_gc);
lua_pop(L, 1);
}
if (luaL_newmetatable(L, "aegisub.smatch")) {
set_field(L, "__gc", smatch_gc);
lua_pop(L, 1);
}
lua_newtable(L);
set_field(L, "matches", regex_matches);
set_field(L, "search", regex_search);
set_field(L, "match", regex_match);
set_field(L, "get_match", regex_get_match);
set_field(L, "replace", regex_replace);
set_field(L, "compile", regex_compile);
set_field(L, "process_flags", regex_process_flags);
set_field(L, "init_flags", regex_init_flags);
return 1;
}
}
#endif