From cb08347bdc58b16b2a49ab4cf6c9b98e9ca020b8 Mon Sep 17 00:00:00 2001 From: Niels Martin Hansen Date: Wed, 3 Jan 2007 03:39:15 +0000 Subject: [PATCH] Fixed loading utf8-bom Lua scripts Originally committed to SVN as r688. --- aegisub/auto4_lua.cpp | 83 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/aegisub/auto4_lua.cpp b/aegisub/auto4_lua.cpp index 8fe84ce8f..af1c2b4a3 100644 --- a/aegisub/auto4_lua.cpp +++ b/aegisub/auto4_lua.cpp @@ -38,6 +38,7 @@ #include "ass_style.h" #include "ass_file.h" #include "ass_override.h" +#include "text_file_reader.h" #include #include #include @@ -93,13 +94,89 @@ namespace Automation4 { #endif + // LuaScriptReader + struct LuaScriptReader { + FILE *f; + bool first; + char *databuf; + static const size_t bufsize = 512; + LuaScriptReader(const wxString &filename) + { +#ifdef WIN32 + f = _tfopen(filename.c_str(), _T("rb")); +#else + f = fopen(filename.c_str(), _T("rb")); +#endif + first = true; + databuf = new char[bufsize]; + } + ~LuaScriptReader() + { + if (databuf) + delete databuf; + fclose(f); + } + + static const char* reader_func(lua_State *L, void *data, size_t *size) + { + LuaScriptReader *self = (LuaScriptReader*)(data); + unsigned char *b = (unsigned char *)self->databuf; + FILE *f = self->f; + + if (feof(f)) { + *size = 0; + return 0; + } + + if (self->first) { + // check if file is sensible and maybe skip bom + if ((*size = fread(b, 1, 4, f)) == 4) { + if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { + // got an utf8 file with bom + // nothing further to do, already skipped the bom + fseek(f, -1, SEEK_CUR); + } else { + // oops, not utf8 with bom + // check if there is some other BOM in place and complain if there is... + if ((b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) || // utf32be + (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) || // utf32le + (b[0] == 0xFF && b[1] == 0xFE) || // utf16be + (b[0] == 0xFE && b[1] == 0xFF) || // utf16le + (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) || // utf7 + (b[0] == 0x00 && b[2] == 0x00) || // looks like utf16be + (b[1] == 0x00 && b[3] == 0x00)) { // looks like utf16le + throw _T("The script file uses an unsupported character set. Only UTF-8 is supported."); + } + // assume utf8 without bom, and rewind file + fseek(f, 0, SEEK_SET); + } + } else { + // hmm, rather short file this... + // doesn't have a bom, assume it's just ascii/utf8 without bom + return self->databuf; // *size is already set + } + self->first = false; + } + + *size = fread(b, 1, bufsize, f); + + return self->databuf; + } + }; + + // LuaScript LuaScript::LuaScript(const wxString &filename) : Script(filename) , L(0) { - Create(); + try { + Create(); + } + catch (wxChar *e) { + description = e; + } } LuaScript::~LuaScript() @@ -160,7 +237,9 @@ namespace Automation4 { _stackcheck.check(0); // load user script - if (luaL_loadfile(L, GetFilename().mb_str(wxConvUTF8))) { + LuaScriptReader script_reader(GetFilename()); + if (lua_load(L, script_reader.reader_func, &script_reader, GetFilename().mb_str(wxConvUTF8))) { + //if (luaL_loadfile(L, GetFilename().mb_str(wxConvUTF8))) { wxString *err = new wxString(lua_tostring(L, -1), wxConvUTF8); err->Prepend(_T("An error occurred loading the Lua script file \"") + GetFilename() + _T("\":\n\n")); throw err->c_str();