Replace wxRegex bindings for lua with boost::regex bindings
API is mostly unchanged other than the addition of a lot more flags. Should be less buggy since it has an actual test suite, and generally has a more powerful regex syntax with better support for Unicode. The bindings are written in MoonScript. For now the compiled form is store in the repo for convenince.
This commit is contained in:
parent
44188267d6
commit
c556a47509
8 changed files with 753 additions and 401 deletions
|
@ -1,103 +1,128 @@
|
||||||
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
|
||||||
--
|
|
||||||
-- Permission to use, copy, modify, and distribute this software for any
|
|
||||||
-- purpose with or without fee is hereby granted, provided that the above
|
|
||||||
-- copyright notice and this permission notice appear in all copies.
|
|
||||||
--
|
|
||||||
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
|
|
||||||
-- Get the wxRegex binding
|
|
||||||
local regex = aegisub.__init_regex()
|
local regex = aegisub.__init_regex()
|
||||||
|
local select_first
|
||||||
-- Compiled regular expression type protoype
|
select_first = function(n, a, ...)
|
||||||
local re_proto = {}
|
if n == 0 then
|
||||||
local re_proto_mt = { __index = re_proto }
|
return
|
||||||
|
|
||||||
-- Convert an iterator to an array
|
|
||||||
local function to_table(...)
|
|
||||||
local arr = {}
|
|
||||||
local i = 1
|
|
||||||
for v in ... do
|
|
||||||
arr[i] = v
|
|
||||||
i = i + 1
|
|
||||||
end
|
end
|
||||||
return arr
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Return the first n elements from ...
|
|
||||||
local function select_first(n, a, ...)
|
|
||||||
if n == 0 then return end
|
|
||||||
return a, select_first(n - 1, ...)
|
return a, select_first(n - 1, ...)
|
||||||
end
|
end
|
||||||
|
local unpack_args
|
||||||
-- Extract the flags from ..., bitwise OR them together, and move them to the
|
unpack_args = function(...)
|
||||||
-- front of ...
|
|
||||||
local function unpack_args(...)
|
|
||||||
local n = select('#', ...)
|
|
||||||
local userdata_start = nil
|
local userdata_start = nil
|
||||||
for i = 1, n do
|
for i = 1, select('#', ...) do
|
||||||
local v = select(i, ...)
|
local v = select(i, ...)
|
||||||
if type(v) == "userdata" then
|
if type(v) == 'userdata' then
|
||||||
userdata_start = i
|
userdata_start = i
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
if not (userdata_start) then
|
||||||
if not userdata_start then
|
|
||||||
return 0, ...
|
return 0, ...
|
||||||
end
|
end
|
||||||
|
local flags = regex.process_flags(select(userdata_start, ...))
|
||||||
flags = regex.process_flags(select(userdata_start, ...))
|
if type(flags) == 'string' then
|
||||||
if type(flags) == "string" then
|
|
||||||
error(flags, 3)
|
error(flags, 3)
|
||||||
end
|
end
|
||||||
|
|
||||||
return flags, select_first(userdata_start - 1, ...)
|
return flags, select_first(userdata_start - 1, ...)
|
||||||
end
|
end
|
||||||
|
local check_arg
|
||||||
-- Verify that a valid value was passed for self
|
check_arg = function(arg, expected_type, argn, func_name, level)
|
||||||
local function check_self(self)
|
|
||||||
if getmetatable(self) ~= re_proto_mt then
|
|
||||||
error("re method called with invalid self. You probably used . when : is needed.", 3)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Typecheck a variable and throw an error if it fails
|
|
||||||
local function check_arg(arg, expected_type, argn, func_name, level)
|
|
||||||
if type(arg) ~= expected_type then
|
if type(arg) ~= expected_type then
|
||||||
error(
|
return error("Argument " .. tostring(argn) .. " to " .. tostring(func_name) .. " should be a '" .. tostring(expected_type) .. "', is '" .. tostring(type(arg)) .. "' (" .. tostring(arg) .. ")", level + 1)
|
||||||
string.format("Argument %d to %s should be a '%s', is '%s' (%s)",
|
|
||||||
argn, func_name, expected_type, type(arg), tostring(arg)),
|
|
||||||
level + 1)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
local replace_match
|
||||||
function re_proto.gsplit(self, str, skip_empty, max_split)
|
replace_match = function(match, func, str, last, acc)
|
||||||
check_self(self)
|
if last < match.last then
|
||||||
check_arg(str, "string", 2, "gsplit", self._level)
|
acc[#acc + 1] = str:sub(last, match.first - 1)
|
||||||
if not max_split or max_split <= 0 then max_split = str:len() end
|
|
||||||
|
|
||||||
local function do_split()
|
|
||||||
if not str or str:len() == 0 then
|
|
||||||
return nil
|
|
||||||
end
|
end
|
||||||
|
local repl = func(match.str, match.first, match.last)
|
||||||
if max_split == 0 or not regex.matches(self._regex, str) then
|
if type(repl) == 'string' then
|
||||||
local ret = str
|
acc[#acc + 1] = repl
|
||||||
|
else
|
||||||
|
acc[#acc + 1] = match.str
|
||||||
|
end
|
||||||
|
return match.first, match.last + 1
|
||||||
|
end
|
||||||
|
local do_single_replace_fun
|
||||||
|
do_single_replace_fun = function(re, func, str, acc, pos)
|
||||||
|
local matches = re:match(str, pos)
|
||||||
|
if not (matches) then
|
||||||
|
return pos
|
||||||
|
end
|
||||||
|
local start
|
||||||
|
if #matches == 1 then
|
||||||
|
start = 1
|
||||||
|
else
|
||||||
|
start = 2
|
||||||
|
end
|
||||||
|
local last = pos
|
||||||
|
local first
|
||||||
|
for i = start, #matches do
|
||||||
|
first, last = replace_match(matches[i], func, str, last, acc)
|
||||||
|
end
|
||||||
|
if first == last then
|
||||||
|
acc[#acc + 1] = str:sub(last, last)
|
||||||
|
last = last + 1
|
||||||
|
end
|
||||||
|
return last, matches[1].first <= str:len()
|
||||||
|
end
|
||||||
|
local do_replace_fun
|
||||||
|
do_replace_fun = function(re, func, str, max)
|
||||||
|
local acc = { }
|
||||||
|
local pos = 1
|
||||||
|
local i
|
||||||
|
for i = 1, max do
|
||||||
|
local more
|
||||||
|
pos, more = do_single_replace_fun(re, func, str, acc, pos)
|
||||||
|
if not (more) then
|
||||||
|
max = i
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return table.concat(acc, '') .. str:sub(pos)
|
||||||
|
end
|
||||||
|
local RegEx
|
||||||
|
do
|
||||||
|
local start
|
||||||
|
local _parent_0 = nil
|
||||||
|
local _base_0 = {
|
||||||
|
_check_self = function(self)
|
||||||
|
if not (self.__class == RegEx) then
|
||||||
|
return error('re method called with invalid self. You probably used . when : is needed.', 3)
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
gsplit = function(self, str, skip_empty, max_split)
|
||||||
|
self:_check_self()
|
||||||
|
check_arg(str, 'string', 2, 'gsplit', self._level)
|
||||||
|
if not max_split or max_split <= 0 then
|
||||||
|
max_split = str:len()
|
||||||
|
end
|
||||||
|
start = 1
|
||||||
|
local prev = 1
|
||||||
|
local do_split
|
||||||
|
do_split = function()
|
||||||
|
if not str or str:len() == 0 then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
local first, last
|
||||||
|
if max_split > 0 then
|
||||||
|
first, last = regex.search(self._regex, str, start)
|
||||||
|
end
|
||||||
|
if not first or first > str:len() then
|
||||||
|
local ret = str:sub(prev, str:len())
|
||||||
str = nil
|
str = nil
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
|
local ret = str:sub(prev, first - 1)
|
||||||
local first, last = regex.get_match(self._regex, str, 0)
|
prev = last + 1
|
||||||
local ret = str:sub(1, first - 1)
|
start = 1 + (function()
|
||||||
str = str:sub(last + 1)
|
if start >= last then
|
||||||
|
return start
|
||||||
|
else
|
||||||
|
return last
|
||||||
|
end
|
||||||
|
end)()
|
||||||
if skip_empty and ret:len() == 0 then
|
if skip_empty and ret:len() == 0 then
|
||||||
return do_split()
|
return do_split()
|
||||||
else
|
else
|
||||||
|
@ -105,197 +130,185 @@ function re_proto.gsplit(self, str, skip_empty, max_split)
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return do_split
|
return do_split
|
||||||
end
|
end,
|
||||||
|
split = function(self, str, skip_empty, max_split)
|
||||||
function re_proto.split(self, str, skip_empty, max_split)
|
self:_check_self()
|
||||||
check_self(self)
|
check_arg(str, 'string', 2, 'split', self._level)
|
||||||
check_arg(str, "string", 2, "split", self._level)
|
return (function()
|
||||||
return to_table(self:gsplit(str, skip_empty, max_split))
|
local _accum_0 = { }
|
||||||
end
|
local _len_0 = 1
|
||||||
|
for v in self:gsplit(str, skip_empty, max_split) do
|
||||||
function re_proto.gfind(self, str)
|
_accum_0[_len_0] = v
|
||||||
check_self(self)
|
_len_0 = _len_0 + 1
|
||||||
check_arg(str, "string", 2, "gfind", self._level)
|
|
||||||
|
|
||||||
local offset = 0
|
|
||||||
return function()
|
|
||||||
local has_matches = regex.matches(self._regex, str)
|
|
||||||
if not has_matches then return end
|
|
||||||
|
|
||||||
local first, last = regex.get_match(self._regex, str, 0)
|
|
||||||
local ret = str:sub(first, last)
|
|
||||||
str = str:sub(last + 1)
|
|
||||||
|
|
||||||
last = last + offset
|
|
||||||
offset = offset + first
|
|
||||||
return ret, offset, last
|
|
||||||
end
|
end
|
||||||
end
|
return _accum_0
|
||||||
|
end)()
|
||||||
function re_proto.find(self, str)
|
end,
|
||||||
check_self(self)
|
gfind = function(self, str)
|
||||||
check_arg(str, "string", 2, "find", self._level)
|
self:_check_self()
|
||||||
|
check_arg(str, 'string', 2, 'gfind', self._level)
|
||||||
local i = 1
|
start = 1
|
||||||
local ret = {}
|
return function()
|
||||||
|
local first, last = regex.search(self._regex, str, start)
|
||||||
|
if not (first) then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if last >= start then
|
||||||
|
start = last + 1
|
||||||
|
else
|
||||||
|
start = start + 1
|
||||||
|
end
|
||||||
|
return str:sub(first, last), first, last
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
find = function(self, str)
|
||||||
|
self:_check_self()
|
||||||
|
check_arg(str, 'string', 2, 'find', self._level)
|
||||||
|
local ret = (function()
|
||||||
|
local _accum_0 = { }
|
||||||
|
local _len_0 = 1
|
||||||
for s, f, l in self:gfind(str) do
|
for s, f, l in self:gfind(str) do
|
||||||
ret[i] = {
|
_accum_0[_len_0] = {
|
||||||
str = s,
|
str = s,
|
||||||
first = f,
|
first = f,
|
||||||
last = l
|
last = l
|
||||||
}
|
}
|
||||||
i = i + 1
|
_len_0 = _len_0 + 1
|
||||||
end
|
end
|
||||||
return ret
|
return _accum_0
|
||||||
end
|
end)()
|
||||||
|
return next(ret) and ret
|
||||||
-- Replace a match with the value returned from func when passed the match
|
end,
|
||||||
local function replace_match(match, func, str, last, acc)
|
sub = function(self, str, repl, max_count)
|
||||||
if last < match.last then
|
self:_check_self()
|
||||||
acc[#acc + 1] = str:sub(last, match.first - 1)
|
check_arg(str, 'string', 2, 'sub', self._level)
|
||||||
|
if max_count ~= nil then
|
||||||
|
check_arg(max_count, 'number', 4, 'sub', self._level)
|
||||||
end
|
end
|
||||||
|
if not max_count or max_count == 0 then
|
||||||
local ret = func(match.str, match.first, match.last)
|
max_count = str:len() + 1
|
||||||
if type(ret) == "string" then
|
end
|
||||||
acc[#acc + 1] = ret
|
if type(repl) == 'function' then
|
||||||
|
return do_replace_fun(self, repl, str, max_count)
|
||||||
|
elseif type(repl) == 'string' then
|
||||||
|
return regex.replace(self._regex, repl, str, max_count)
|
||||||
else
|
else
|
||||||
-- If it didn't return a string just leave the old value
|
return error("Argument 2 to sub should be a string or function, is '" .. tostring(type(repl)) .. "' (" .. tostring(repl) .. ")", self._level)
|
||||||
acc[#acc + 1] = match.str
|
|
||||||
end
|
end
|
||||||
|
end,
|
||||||
return match.last + 1
|
gmatch = function(self, str, start)
|
||||||
end
|
self:_check_self()
|
||||||
|
check_arg(str, 'string', 2, 'gmatch', self._level)
|
||||||
-- Replace all matches from a single iteration of the regexp
|
if start then
|
||||||
local function do_single_replace_fun(re, func, str, acc)
|
start = start - 1
|
||||||
local matches = re:match(str)
|
|
||||||
|
|
||||||
-- No more matches so just return what we have so far
|
|
||||||
if not matches then
|
|
||||||
return str
|
|
||||||
end
|
|
||||||
|
|
||||||
-- One match means no capturing groups, so pass the entire thing to
|
|
||||||
-- the replace function
|
|
||||||
if #matches == 1 then
|
|
||||||
local rest = replace_match(matches[1], func, str, 1, acc)
|
|
||||||
return str:sub(rest), true
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Multiple matches means there were capture groups, so skip the first one
|
|
||||||
-- and pass the rest to the replace function
|
|
||||||
local last = 1
|
|
||||||
for i = 2, #matches do
|
|
||||||
last = replace_match(matches[i], func, str, last, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
return str:sub(last), true
|
|
||||||
end
|
|
||||||
|
|
||||||
local function do_replace_fun(re, func, str, max)
|
|
||||||
local acc = {}
|
|
||||||
local i
|
|
||||||
for i = 1, max do
|
|
||||||
str, continue = do_single_replace_fun(re, func, str, acc)
|
|
||||||
if not continue then max = i end
|
|
||||||
end
|
|
||||||
return table.concat(acc, "") .. str, max
|
|
||||||
end
|
|
||||||
|
|
||||||
function re_proto.sub(self, str, repl, count)
|
|
||||||
check_self(self)
|
|
||||||
check_arg(str, "string", 2, "sub", self._level)
|
|
||||||
if count ~= nil then
|
|
||||||
check_arg(count, "number", 4, "sub", self._level)
|
|
||||||
end
|
|
||||||
|
|
||||||
if not count or count == 0 then count = str:len() end
|
|
||||||
|
|
||||||
if type(repl) == "function" then
|
|
||||||
return do_replace_fun(self, repl, str, count)
|
|
||||||
elseif type(repl) == "string" then
|
|
||||||
return regex.replace(self._regex, repl, str, count)
|
|
||||||
else
|
else
|
||||||
error(
|
start = 0
|
||||||
string.format("Argument 2 to sub should be a string or function, is '%s' (%s)",
|
|
||||||
type(repl), tostring(repl)),
|
|
||||||
self._level)
|
|
||||||
end
|
end
|
||||||
end
|
local match = regex.match(self._regex, str, start)
|
||||||
|
local i = 1
|
||||||
function re_proto.gmatch(self, str)
|
|
||||||
check_self(self)
|
|
||||||
check_arg(str, "string", 2, "gmatch", self._level)
|
|
||||||
|
|
||||||
local match_count = regex.match_count(self._regex, str)
|
|
||||||
local i = 0
|
|
||||||
return function()
|
return function()
|
||||||
if i == match_count then return end
|
if not (match) then
|
||||||
|
return
|
||||||
|
end
|
||||||
|
local first, last = regex.get_match(match, i)
|
||||||
|
if not (first) then
|
||||||
|
return
|
||||||
|
end
|
||||||
i = i + 1
|
i = i + 1
|
||||||
local first, last = regex.get_match(self._regex, str, i - 1)
|
|
||||||
return {
|
return {
|
||||||
str = str:sub(first, last),
|
str = str:sub(first + start, last + start),
|
||||||
first = first,
|
first = first + start,
|
||||||
last = last
|
last = last + start
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end,
|
||||||
|
match = function(self, str, start)
|
||||||
function re_proto.match(self, str)
|
self:_check_self()
|
||||||
check_self(self)
|
check_arg(str, 'string', 2, 'match', self._level)
|
||||||
check_arg(str, "string", 2, "match", self._level)
|
local ret = (function()
|
||||||
|
local _accum_0 = { }
|
||||||
local ret = to_table(self:gmatch(str))
|
local _len_0 = 1
|
||||||
-- Return nil rather than a empty table so that if re.match(...) works
|
for v in self:gmatch(str, start) do
|
||||||
if next(ret) == nil then return end
|
_accum_0[_len_0] = v
|
||||||
return ret
|
_len_0 = _len_0 + 1
|
||||||
end
|
|
||||||
|
|
||||||
-- Create a wxRegExp object from a pattern, flags, and error depth
|
|
||||||
local function real_compile(pattern, level, flags, stored_level)
|
|
||||||
local regex = regex.compile(pattern, flags)
|
|
||||||
if not regex then
|
|
||||||
error("Bad syntax in regular expression", level + 1)
|
|
||||||
end
|
end
|
||||||
return setmetatable({
|
return _accum_0
|
||||||
_regex = regex,
|
end)()
|
||||||
_level = stored_level or level + 1
|
if next(ret) == nil then
|
||||||
},
|
return nil
|
||||||
re_proto_mt)
|
end
|
||||||
|
return ret
|
||||||
|
end
|
||||||
|
}
|
||||||
|
_base_0.__index = _base_0
|
||||||
|
if _parent_0 then
|
||||||
|
setmetatable(_base_0, _parent_0.__base)
|
||||||
|
end
|
||||||
|
local _class_0 = setmetatable({
|
||||||
|
__init = function(self, _regex, _level)
|
||||||
|
self._regex, self._level = _regex, _level
|
||||||
|
end,
|
||||||
|
__base = _base_0,
|
||||||
|
__name = "RegEx",
|
||||||
|
__parent = _parent_0
|
||||||
|
}, {
|
||||||
|
__index = function(cls, name)
|
||||||
|
local val = rawget(_base_0, name)
|
||||||
|
if val == nil and _parent_0 then
|
||||||
|
return _parent_0[name]
|
||||||
|
else
|
||||||
|
return val
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
__call = function(cls, ...)
|
||||||
|
local _self_0 = setmetatable({}, _base_0)
|
||||||
|
cls.__init(_self_0, ...)
|
||||||
|
return _self_0
|
||||||
|
end
|
||||||
|
})
|
||||||
|
_base_0.__class = _class_0
|
||||||
|
local self = _class_0
|
||||||
|
start = 1
|
||||||
|
if _parent_0 and _parent_0.__inherited then
|
||||||
|
_parent_0.__inherited(_parent_0, _class_0)
|
||||||
|
end
|
||||||
|
RegEx = _class_0
|
||||||
end
|
end
|
||||||
|
local real_compile
|
||||||
-- Compile a pattern then invoke a method on it
|
real_compile = function(pattern, level, flags, stored_level)
|
||||||
local function invoke(str, pattern, fn, flags, ...)
|
if pattern == '' then
|
||||||
local comp = real_compile(pattern, 3, flags)
|
error('Regular expression must not be empty', level + 1)
|
||||||
return comp[fn](comp, str, ...)
|
end
|
||||||
|
local re = regex.compile(pattern, flags)
|
||||||
|
if type(re) == 'string' then
|
||||||
|
error(regex, level + 1)
|
||||||
|
end
|
||||||
|
return RegEx(re, stored_level or level + 1)
|
||||||
end
|
end
|
||||||
|
local invoke
|
||||||
-- Generate a static version of a method with arg type checking
|
invoke = function(str, pattern, fn, flags, ...)
|
||||||
local function gen_wrapper(impl_name)
|
local compiled_regex = real_compile(pattern, 3, flags)
|
||||||
|
return compiled_regex[fn](compiled_regex, str, ...)
|
||||||
|
end
|
||||||
|
local gen_wrapper
|
||||||
|
gen_wrapper = function(impl_name)
|
||||||
return function(str, pattern, ...)
|
return function(str, pattern, ...)
|
||||||
check_arg(str, "string", 1, impl_name, 2)
|
check_arg(str, 'string', 1, impl_name, 2)
|
||||||
check_arg(pattern, "string", 2, impl_name, 2)
|
check_arg(pattern, 'string', 2, impl_name, 2)
|
||||||
return invoke(str, pattern, impl_name, unpack_args(...))
|
return invoke(str, pattern, impl_name, unpack_args(...))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- And now at last the actual public API
|
|
||||||
local re = regex.init_flags(re)
|
local re = regex.init_flags(re)
|
||||||
|
re.compile = function(pattern, ...)
|
||||||
function re.compile(pattern, ...)
|
check_arg(pattern, 'string', 1, 'compile', 2)
|
||||||
check_arg(pattern, "string", 1, "compile", 2)
|
|
||||||
return real_compile(pattern, 2, regex.process_flags(...), 2)
|
return real_compile(pattern, 2, regex.process_flags(...), 2)
|
||||||
end
|
end
|
||||||
|
re.split = gen_wrapper('split')
|
||||||
re.split = gen_wrapper("split")
|
re.gsplit = gen_wrapper('gsplit')
|
||||||
re.gsplit = gen_wrapper("gsplit")
|
re.find = gen_wrapper('find')
|
||||||
re.find = gen_wrapper("find")
|
re.gfind = gen_wrapper('gfind')
|
||||||
re.gfind = gen_wrapper("gfind")
|
re.match = gen_wrapper('match')
|
||||||
re.match = gen_wrapper("match")
|
re.gmatch = gen_wrapper('gmatch')
|
||||||
re.gmatch = gen_wrapper("gmatch")
|
re.sub = gen_wrapper('sub')
|
||||||
re.sub = gen_wrapper("sub")
|
return re
|
||||||
|
|
||||||
_G.re = re
|
|
||||||
return _G.re
|
|
||||||
|
|
244
aegisub/automation/include/re.moon
Normal file
244
aegisub/automation/include/re.moon
Normal file
|
@ -0,0 +1,244 @@
|
||||||
|
-- Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
|
--
|
||||||
|
-- Permission to use, copy, modify, and distribute this software for any
|
||||||
|
-- purpose with or without fee is hereby granted, provided that the above
|
||||||
|
-- copyright notice and this permission notice appear in all copies.
|
||||||
|
--
|
||||||
|
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
|
||||||
|
-- Get the boost::regex binding
|
||||||
|
regex = aegisub.__init_regex()
|
||||||
|
|
||||||
|
-- Return the first n elements from ...
|
||||||
|
select_first = (n, a, ...) ->
|
||||||
|
if n == 0 then return
|
||||||
|
a, select_first n - 1, ...
|
||||||
|
|
||||||
|
-- Extract the flags from ..., bitwise OR them together, and move them to the
|
||||||
|
-- front of ...
|
||||||
|
unpack_args = (...) ->
|
||||||
|
userdata_start = nil
|
||||||
|
for i = 1, select '#', ...
|
||||||
|
v = select i, ...
|
||||||
|
if type(v) == 'userdata'
|
||||||
|
userdata_start = i
|
||||||
|
break
|
||||||
|
|
||||||
|
return 0, ... unless userdata_start
|
||||||
|
|
||||||
|
flags = regex.process_flags select userdata_start, ...
|
||||||
|
if type(flags) == 'string'
|
||||||
|
error(flags, 3)
|
||||||
|
|
||||||
|
flags, select_first userdata_start - 1, ...
|
||||||
|
|
||||||
|
|
||||||
|
-- Typecheck a variable and throw an error if it fails
|
||||||
|
check_arg = (arg, expected_type, argn, func_name, level) ->
|
||||||
|
if type(arg) != expected_type
|
||||||
|
error "Argument #{argn} to #{func_name} should be a '#{expected_type}', is '#{type(arg)}' (#{arg})",
|
||||||
|
level + 1
|
||||||
|
|
||||||
|
-- Replace a match with the value returned from func when passed the match
|
||||||
|
replace_match = (match, func, str, last, acc) ->
|
||||||
|
-- Copy everything between the last match and this match
|
||||||
|
if last < match.last
|
||||||
|
acc[#acc + 1] = str\sub last, match.first - 1
|
||||||
|
|
||||||
|
repl = func match.str, match.first, match.last
|
||||||
|
|
||||||
|
-- If it didn't return a string just leave the old value
|
||||||
|
acc[#acc + 1] = if type(repl) == 'string' then repl else match.str
|
||||||
|
|
||||||
|
match.first, match.last + 1
|
||||||
|
|
||||||
|
-- Replace all matches from a single iteration of the regexp
|
||||||
|
do_single_replace_fun = (re, func, str, acc, pos) ->
|
||||||
|
matches = re\match str, pos
|
||||||
|
|
||||||
|
-- No more matches so just return what's left of the input
|
||||||
|
return pos unless matches
|
||||||
|
|
||||||
|
-- If there's only one match then there's no capturing groups and we need
|
||||||
|
-- to pass the entire match to the replace function, but if there's
|
||||||
|
-- multiple then we want to skip the full match and only pass the capturing
|
||||||
|
-- groups.
|
||||||
|
start = if #matches == 1 then 1 else 2
|
||||||
|
last = pos
|
||||||
|
local first
|
||||||
|
for i = start, #matches
|
||||||
|
first, last = replace_match matches[i], func, str, last, acc
|
||||||
|
|
||||||
|
-- Always eat at least one character from the input or we'll just make the
|
||||||
|
-- same match max_count times
|
||||||
|
if first == last
|
||||||
|
acc[#acc + 1] = str\sub last, last
|
||||||
|
last += 1
|
||||||
|
|
||||||
|
return last, matches[1].first <= str\len()
|
||||||
|
|
||||||
|
do_replace_fun = (re, func, str, max) ->
|
||||||
|
acc = {}
|
||||||
|
pos = 1
|
||||||
|
local i
|
||||||
|
for i = 1, max do
|
||||||
|
pos, more = do_single_replace_fun re, func, str, acc, pos
|
||||||
|
unless more
|
||||||
|
max = i
|
||||||
|
break
|
||||||
|
table.concat(acc, '') .. str\sub pos
|
||||||
|
|
||||||
|
-- Compiled regular expression type protoype
|
||||||
|
class RegEx
|
||||||
|
-- Verify that a valid value was passed for self
|
||||||
|
_check_self: =>
|
||||||
|
unless @__class == RegEx
|
||||||
|
error 're method called with invalid self. You probably used . when : is needed.', 3
|
||||||
|
|
||||||
|
new: (@_regex, @_level) =>
|
||||||
|
|
||||||
|
start = 1
|
||||||
|
gsplit: (str, skip_empty, max_split) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'gsplit', @_level
|
||||||
|
if not max_split or max_split <= 0 then max_split = str\len()
|
||||||
|
|
||||||
|
start = 1
|
||||||
|
prev = 1
|
||||||
|
do_split = () ->
|
||||||
|
if not str or str\len() == 0 then return
|
||||||
|
|
||||||
|
local first, last
|
||||||
|
if max_split > 0
|
||||||
|
first, last = regex.search @_regex, str, start
|
||||||
|
|
||||||
|
if not first or first > str\len()
|
||||||
|
ret = str\sub prev, str\len()
|
||||||
|
str = nil
|
||||||
|
return ret
|
||||||
|
|
||||||
|
ret = str\sub prev, first - 1
|
||||||
|
prev = last + 1
|
||||||
|
|
||||||
|
start = 1 + if start >= last then start else last
|
||||||
|
|
||||||
|
if skip_empty and ret\len() == 0
|
||||||
|
do_split()
|
||||||
|
else
|
||||||
|
max_split -= 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
do_split
|
||||||
|
|
||||||
|
split: (str, skip_empty, max_split) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'split', @_level
|
||||||
|
[v for v in @gsplit str, skip_empty, max_split]
|
||||||
|
|
||||||
|
gfind: (str) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'gfind', @_level
|
||||||
|
|
||||||
|
start = 1
|
||||||
|
->
|
||||||
|
first, last = regex.search(@_regex, str, start)
|
||||||
|
return unless first
|
||||||
|
|
||||||
|
start = if last >= start then last + 1 else start + 1
|
||||||
|
str\sub(first, last), first, last
|
||||||
|
|
||||||
|
find: (str) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'find', @_level
|
||||||
|
|
||||||
|
ret = [str: s, first: f, last: l for s, f, l in @gfind(str)]
|
||||||
|
next(ret) and ret
|
||||||
|
|
||||||
|
sub: (str, repl, max_count) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'sub', @_level
|
||||||
|
if max_count != nil
|
||||||
|
check_arg max_count, 'number', 4, 'sub', @_level
|
||||||
|
|
||||||
|
max_count = str\len() + 1 if not max_count or max_count == 0
|
||||||
|
|
||||||
|
if type(repl) == 'function'
|
||||||
|
do_replace_fun @, repl, str, max_count
|
||||||
|
elseif type(repl) == 'string'
|
||||||
|
regex.replace @_regex, repl, str, max_count
|
||||||
|
else
|
||||||
|
error "Argument 2 to sub should be a string or function, is '#{type(repl)}' (#{repl})", @_level
|
||||||
|
|
||||||
|
gmatch: (str, start) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg str, 'string', 2, 'gmatch', @_level
|
||||||
|
start = if start then start - 1 else 0
|
||||||
|
|
||||||
|
match = regex.match @_regex, str, start
|
||||||
|
i = 1
|
||||||
|
->
|
||||||
|
return unless match
|
||||||
|
first, last = regex.get_match match, i
|
||||||
|
return unless first
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
{
|
||||||
|
str: str\sub first + start, last + start
|
||||||
|
first: first + start
|
||||||
|
last: last + start
|
||||||
|
}
|
||||||
|
|
||||||
|
match: (str, start) =>
|
||||||
|
@_check_self!
|
||||||
|
check_arg(str, 'string', 2, 'match', @_level)
|
||||||
|
|
||||||
|
ret = [v for v in @gmatch str, start]
|
||||||
|
-- Return nil rather than a empty table so that if re.match(...) works
|
||||||
|
return nil if next(ret) == nil
|
||||||
|
ret
|
||||||
|
|
||||||
|
-- Create a regex object from a pattern, flags, and error depth
|
||||||
|
real_compile = (pattern, level, flags, stored_level) ->
|
||||||
|
if pattern == ''
|
||||||
|
error 'Regular expression must not be empty', level + 1
|
||||||
|
|
||||||
|
re = regex.compile pattern, flags
|
||||||
|
if type(re) == 'string'
|
||||||
|
error regex, level + 1
|
||||||
|
|
||||||
|
RegEx re, stored_level or level + 1
|
||||||
|
|
||||||
|
-- Compile a pattern then invoke a method on it
|
||||||
|
invoke = (str, pattern, fn, flags, ...) ->
|
||||||
|
compiled_regex = real_compile(pattern, 3, flags)
|
||||||
|
compiled_regex[fn](compiled_regex, str, ...)
|
||||||
|
|
||||||
|
-- Generate a static version of a method with arg type checking
|
||||||
|
gen_wrapper = (impl_name) ->
|
||||||
|
(str, pattern, ...) ->
|
||||||
|
check_arg str, 'string', 1, impl_name, 2
|
||||||
|
check_arg pattern, 'string', 2, impl_name, 2
|
||||||
|
invoke str, pattern, impl_name, unpack_args ...
|
||||||
|
|
||||||
|
-- And now at last the actual public API
|
||||||
|
re = regex.init_flags(re)
|
||||||
|
|
||||||
|
re.compile = (pattern, ...) ->
|
||||||
|
check_arg pattern, 'string', 1, 'compile', 2
|
||||||
|
real_compile pattern, 2, regex.process_flags(...), 2
|
||||||
|
|
||||||
|
re.split = gen_wrapper 'split'
|
||||||
|
re.gsplit = gen_wrapper 'gsplit'
|
||||||
|
re.find = gen_wrapper 'find'
|
||||||
|
re.gfind = gen_wrapper 'gfind'
|
||||||
|
re.match = gen_wrapper 'match'
|
||||||
|
re.gmatch = gen_wrapper 'gmatch'
|
||||||
|
re.sub = gen_wrapper 'sub'
|
||||||
|
|
||||||
|
re
|
|
@ -327,6 +327,7 @@
|
||||||
<ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" />
|
<ClCompile Include="$(SrcDir)auto4_lua_dialog.cpp" />
|
||||||
<ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" />
|
<ClCompile Include="$(SrcDir)auto4_lua_progresssink.cpp" />
|
||||||
<ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" />
|
<ClCompile Include="$(SrcDir)auto4_lua_scriptreader.cpp" />
|
||||||
|
<ClCompile Include="$(SrcDir)auto4_regex.cpp" />
|
||||||
<ClCompile Include="$(SrcDir)avisynth_wrap.cpp" />
|
<ClCompile Include="$(SrcDir)avisynth_wrap.cpp" />
|
||||||
<ClCompile Include="$(SrcDir)base_grid.cpp" />
|
<ClCompile Include="$(SrcDir)base_grid.cpp" />
|
||||||
<ClCompile Include="$(SrcDir)charset_detect.cpp" />
|
<ClCompile Include="$(SrcDir)charset_detect.cpp" />
|
||||||
|
|
|
@ -1238,6 +1238,9 @@
|
||||||
<ClCompile Include="$(SrcDir)subs_controller.cpp">
|
<ClCompile Include="$(SrcDir)subs_controller.cpp">
|
||||||
<Filter>ASS</Filter>
|
<Filter>ASS</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="$(SrcDir)auto4_regex.cpp">
|
||||||
|
<Filter>Automation\Lua</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ResourceCompile Include="$(SrcDir)res.rc">
|
<ResourceCompile Include="$(SrcDir)res.rc">
|
||||||
|
|
|
@ -92,11 +92,11 @@ endif
|
||||||
# AUTOMATION
|
# AUTOMATION
|
||||||
############
|
############
|
||||||
ifeq (yes, $(HAVE_AUTO4_LUA))
|
ifeq (yes, $(HAVE_AUTO4_LUA))
|
||||||
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
|
auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_regex.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA)
|
||||||
LIBS += $(LIBS_LUA)
|
LIBS += $(LIBS_LUA)
|
||||||
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp
|
SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
|
||||||
else
|
else
|
||||||
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp
|
EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
##############
|
##############
|
||||||
|
|
|
@ -71,7 +71,6 @@
|
||||||
#include <wx/filename.h>
|
#include <wx/filename.h>
|
||||||
#include <wx/log.h>
|
#include <wx/log.h>
|
||||||
#include <wx/msgdlg.h>
|
#include <wx/msgdlg.h>
|
||||||
#include <wx/regex.h>
|
|
||||||
#include <wx/window.h>
|
#include <wx/window.h>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -111,125 +110,6 @@ namespace {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline wxRegEx *get_regex(lua_State *L)
|
|
||||||
{
|
|
||||||
return static_cast<wxRegEx*>(luaL_checkudata(L, 1, "aegisub.regex"));
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_matches(lua_State *L)
|
|
||||||
{
|
|
||||||
lua_pushboolean(L, get_regex(L)->Matches(check_wxstring(L, 2)));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_match_count(lua_State *L)
|
|
||||||
{
|
|
||||||
wxRegEx *re = get_regex(L);
|
|
||||||
if (re->Matches(check_wxstring(L, 2)))
|
|
||||||
push_value(L, re->GetMatchCount());
|
|
||||||
else
|
|
||||||
push_value(L, 0);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t utf8_len(wxString const& w)
|
|
||||||
{
|
|
||||||
#if wxUSE_UNICODE_UTF8
|
|
||||||
return w.utf8_length();
|
|
||||||
#else
|
|
||||||
return w.utf8_str().length();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_get_match(lua_State *L)
|
|
||||||
{
|
|
||||||
wxString str(check_wxstring(L, 2));
|
|
||||||
size_t start, len;
|
|
||||||
get_regex(L)->GetMatch(&start, &len, luaL_checkinteger(L, 3));
|
|
||||||
push_value(L, utf8_len(str.Left(start)) + 1);
|
|
||||||
push_value(L, utf8_len(str.Left(start + len)));
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_replace(lua_State *L)
|
|
||||||
{
|
|
||||||
wxString str(check_wxstring(L, 3));
|
|
||||||
int reps = get_regex(L)->Replace(&str, check_wxstring(L, 2), luaL_checkinteger(L, 4));
|
|
||||||
push_value(L, str);
|
|
||||||
push_value(L, reps);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_compile(lua_State *L)
|
|
||||||
{
|
|
||||||
wxString pattern(check_wxstring(L, 1));
|
|
||||||
int flags = luaL_checkinteger(L, 2);
|
|
||||||
wxRegEx *re = static_cast<wxRegEx*>(lua_newuserdata(L, sizeof(wxRegEx)));
|
|
||||||
new(re) wxRegEx(pattern, wxRE_ADVANCED | flags);
|
|
||||||
|
|
||||||
luaL_getmetatable(L, "aegisub.regex");
|
|
||||||
lua_setmetatable(L, -2);
|
|
||||||
|
|
||||||
// return nil and handle the error in lua as it's a bit easier to
|
|
||||||
// report the actual call site from there
|
|
||||||
if (!re->IsValid()) {
|
|
||||||
lua_pop(L, 1);
|
|
||||||
lua_pushnil(L);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_gc(lua_State *L) {
|
|
||||||
get_regex(L)->~wxRegEx();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_process_flags(lua_State *L)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
int nargs = lua_gettop(L);
|
|
||||||
for (int i = 1; i <= nargs; ++i) {
|
|
||||||
if (!lua_islightuserdata(L, i)) {
|
|
||||||
push_value(L, "Flags must follow all non-flag arguments");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
ret |= (int)(intptr_t)lua_touserdata(L, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
push_value(L, ret);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_init_flags(lua_State *L)
|
|
||||||
{
|
|
||||||
lua_newtable(L);
|
|
||||||
|
|
||||||
set_field(L, "ICASE", (void*)wxRE_ICASE);
|
|
||||||
set_field(L, "NOSUB", (void*)wxRE_NOSUB);
|
|
||||||
set_field(L, "NEWLINE", (void*)wxRE_NEWLINE);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int regex_init(lua_State *L)
|
|
||||||
{
|
|
||||||
if (luaL_newmetatable(L, "aegisub.regex")) {
|
|
||||||
set_field(L, "__gc", regex_gc);
|
|
||||||
lua_pop(L, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
lua_newtable(L);
|
|
||||||
set_field(L, "matches", regex_matches);
|
|
||||||
set_field(L, "match_count", regex_match_count);
|
|
||||||
set_field(L, "get_match", regex_get_match);
|
|
||||||
set_field(L, "replace", regex_replace);
|
|
||||||
set_field(L, "compile", regex_compile);
|
|
||||||
set_field(L, "process_flags", regex_process_flags);
|
|
||||||
set_field(L, "init_flags", regex_init_flags);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int clipboard_get(lua_State *L)
|
int clipboard_get(lua_State *L)
|
||||||
{
|
{
|
||||||
std::string data = GetClipboard();
|
std::string data = GetClipboard();
|
||||||
|
@ -274,6 +154,8 @@ namespace {
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Automation4 {
|
namespace Automation4 {
|
||||||
|
int regex_init(lua_State *L);
|
||||||
|
|
||||||
// LuaScript
|
// LuaScript
|
||||||
LuaScript::LuaScript(agi::fs::path const& filename)
|
LuaScript::LuaScript(agi::fs::path const& filename)
|
||||||
: Script(filename)
|
: Script(filename)
|
||||||
|
|
|
@ -33,6 +33,7 @@ inline void push_value(lua_State *L, const char *value) { lua_pushstring(L, valu
|
||||||
inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); }
|
inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); }
|
||||||
inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); }
|
inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); }
|
||||||
inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); }
|
inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); }
|
||||||
|
inline void push_value(lua_State *L, long value) { lua_pushinteger(L, value); }
|
||||||
inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); }
|
inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); }
|
||||||
|
|
||||||
inline void push_value(lua_State *L, wxString const& value) {
|
inline void push_value(lua_State *L, wxString const& value) {
|
||||||
|
|
208
aegisub/src/auto4_regex.cpp
Normal file
208
aegisub/src/auto4_regex.cpp
Normal file
|
@ -0,0 +1,208 @@
|
||||||
|
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted, provided that the above
|
||||||
|
// copyright notice and this permission notice appear in all copies.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
// Aegisub Project http://www.aegisub.org/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#ifdef WITH_AUTO4_LUA
|
||||||
|
#include "auto4_lua_utils.h"
|
||||||
|
|
||||||
|
#include <boost/regex/icu.hpp>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
boost::u32regex& get_regex(lua_State *L) {
|
||||||
|
return *static_cast<boost::u32regex*>(luaL_checkudata(L, 1, "aegisub.regex"));
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::smatch& get_smatch(lua_State *L) {
|
||||||
|
return *static_cast<boost::smatch*>(luaL_checkudata(L, 1, "aegisub.smatch"));
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_matches(lua_State *L) {
|
||||||
|
lua_pushboolean(L, u32regex_match(luaL_checkstring(L, 2), get_regex(L)));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_match(lua_State *L) {
|
||||||
|
auto re = get_regex(L);
|
||||||
|
std::string str = luaL_checkstring(L, 2);
|
||||||
|
int start = lua_tointeger(L, 3);
|
||||||
|
|
||||||
|
auto result = static_cast<boost::smatch*>(lua_newuserdata(L, sizeof(boost::smatch)));
|
||||||
|
new(result) boost::smatch;
|
||||||
|
luaL_getmetatable(L, "aegisub.smatch");
|
||||||
|
lua_setmetatable(L, -2);
|
||||||
|
|
||||||
|
if (!u32regex_search(str.cbegin() + start, str.cend(), *result, re,
|
||||||
|
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
|
||||||
|
{
|
||||||
|
lua_pop(L, 1);
|
||||||
|
lua_pushnil(L);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_get_match(lua_State *L) {
|
||||||
|
auto match = get_smatch(L);
|
||||||
|
int idx = luaL_checkinteger(L, 2) - 1;
|
||||||
|
if (static_cast<size_t>(idx) > match.size() || !match[idx].matched) {
|
||||||
|
lua_pushnil(L);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
push_value(L, distance(match.prefix().first, match[idx].first + 1));
|
||||||
|
push_value(L, distance(match.prefix().first, match[idx].second));
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_search(lua_State *L) {
|
||||||
|
auto re = get_regex(L);
|
||||||
|
std::string str = luaL_checkstring(L, 2);
|
||||||
|
int start = luaL_checkinteger(L, 3) - 1;
|
||||||
|
boost::smatch result;
|
||||||
|
if (!u32regex_search(str.cbegin() + start, str.cend(), result, re,
|
||||||
|
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default))
|
||||||
|
{
|
||||||
|
lua_pushnil(L);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
push_value(L, start + result.position() + 1);
|
||||||
|
push_value(L, start + result.position() + result.length());
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_replace(lua_State *L) {
|
||||||
|
auto re = get_regex(L);
|
||||||
|
const auto replacement = luaL_checkstring(L, 2);
|
||||||
|
const std::string str = luaL_checkstring(L, 3);
|
||||||
|
int max_count = luaL_checkinteger(L, 4);
|
||||||
|
|
||||||
|
// Can't just use regex_replace here since it can only do one or infinite replacements
|
||||||
|
auto match = boost::u32regex_iterator<std::string::const_iterator>(begin(str), end(str), re);
|
||||||
|
auto end_it = boost::u32regex_iterator<std::string::const_iterator>();
|
||||||
|
|
||||||
|
auto suffix = begin(str);
|
||||||
|
|
||||||
|
std::string ret;
|
||||||
|
auto out = back_inserter(ret);
|
||||||
|
while (match != end_it && max_count > 0) {
|
||||||
|
copy(suffix, match->prefix().second, out);
|
||||||
|
match->format(out, replacement);
|
||||||
|
suffix = match->suffix().first;
|
||||||
|
++match;
|
||||||
|
--max_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(suffix, end(str), out);
|
||||||
|
|
||||||
|
push_value(L, ret);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_compile(lua_State *L) {
|
||||||
|
std::string pattern(luaL_checkstring(L, 1));
|
||||||
|
int flags = luaL_checkinteger(L, 2);
|
||||||
|
boost::u32regex *re = static_cast<boost::u32regex*>(lua_newuserdata(L, sizeof(boost::u32regex)));
|
||||||
|
|
||||||
|
try {
|
||||||
|
new(re) boost::u32regex;
|
||||||
|
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
|
||||||
|
}
|
||||||
|
catch (std::exception const& e) {
|
||||||
|
lua_pop(L, 1);
|
||||||
|
push_value(L, e.what());
|
||||||
|
return 1;
|
||||||
|
// Do the actual triggering of the error in the Lua code as that code
|
||||||
|
// can report the original call site
|
||||||
|
}
|
||||||
|
|
||||||
|
luaL_getmetatable(L, "aegisub.regex");
|
||||||
|
lua_setmetatable(L, -2);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_gc(lua_State *L) {
|
||||||
|
using boost::u32regex;
|
||||||
|
get_regex(L).~u32regex();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int smatch_gc(lua_State *L) {
|
||||||
|
using boost::smatch;
|
||||||
|
get_smatch(L).~smatch();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_process_flags(lua_State *L) {
|
||||||
|
int ret = 0;
|
||||||
|
int nargs = lua_gettop(L);
|
||||||
|
for (int i = 1; i <= nargs; ++i) {
|
||||||
|
if (!lua_islightuserdata(L, i)) {
|
||||||
|
push_value(L, "Flags must follow all non-flag arguments");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ret |= (int)(intptr_t)lua_touserdata(L, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
push_value(L, ret);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int regex_init_flags(lua_State *L) {
|
||||||
|
lua_newtable(L);
|
||||||
|
|
||||||
|
set_field(L, "ICASE", (void*)boost::u32regex::icase);
|
||||||
|
set_field(L, "NOSUB", (void*)boost::u32regex::nosubs);
|
||||||
|
set_field(L, "COLLATE", (void*)boost::u32regex::collate);
|
||||||
|
set_field(L, "NEWLINE_ALT", (void*)boost::u32regex::newline_alt);
|
||||||
|
set_field(L, "NO_MOD_M", (void*)boost::u32regex::no_mod_m);
|
||||||
|
set_field(L, "NO_MOD_S", (void*)boost::u32regex::no_mod_s);
|
||||||
|
set_field(L, "MOD_S", (void*)boost::u32regex::mod_s);
|
||||||
|
set_field(L, "MOD_X", (void*)boost::u32regex::mod_x);
|
||||||
|
set_field(L, "NO_EMPTY_SUBEXPRESSIONS", (void*)boost::u32regex::no_empty_expressions);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Automation4 {
|
||||||
|
int regex_init(lua_State *L) {
|
||||||
|
if (luaL_newmetatable(L, "aegisub.regex")) {
|
||||||
|
set_field(L, "__gc", regex_gc);
|
||||||
|
lua_pop(L, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (luaL_newmetatable(L, "aegisub.smatch")) {
|
||||||
|
set_field(L, "__gc", smatch_gc);
|
||||||
|
lua_pop(L, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
lua_newtable(L);
|
||||||
|
set_field(L, "matches", regex_matches);
|
||||||
|
set_field(L, "search", regex_search);
|
||||||
|
set_field(L, "match", regex_match);
|
||||||
|
set_field(L, "get_match", regex_get_match);
|
||||||
|
set_field(L, "replace", regex_replace);
|
||||||
|
set_field(L, "compile", regex_compile);
|
||||||
|
set_field(L, "process_flags", regex_process_flags);
|
||||||
|
set_field(L, "init_flags", regex_init_flags);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
Reference in a new issue