forked from mia/Aegisub
Optimize Extradata handling a bit
This commit is contained in:
parent
8567d9a573
commit
9e6d5d7d9c
6 changed files with 81 additions and 53 deletions
|
@ -28,6 +28,8 @@
|
|||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <cassert>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
AssFile::AssFile() { }
|
||||
|
||||
|
@ -232,58 +234,79 @@ void AssFile::Sort(EntryList<AssDialogue> &lst, CompFunc comp, std::set<AssDialo
|
|||
uint32_t AssFile::AddExtradata(std::string const& key, std::string const& value) {
|
||||
for (auto const& data : Extradata) {
|
||||
// perform brute-force deduplication by simple key and value comparison
|
||||
if (key == data.second.first && value == data.second.second) {
|
||||
return data.first;
|
||||
if (key == data.key && value == data.value) {
|
||||
return data.id;
|
||||
}
|
||||
}
|
||||
// next_extradata_id must not exist
|
||||
assert(Extradata.find(next_extradata_id) == Extradata.end());
|
||||
Extradata[next_extradata_id] = {key, value};
|
||||
Extradata.push_back(ExtradataEntry{next_extradata_id, key, value});
|
||||
return next_extradata_id++; // return old value, then post-increment
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> AssFile::GetExtradata(std::vector<uint32_t> const& id_list) const {
|
||||
// If multiple IDs have the same key name, the last ID wins
|
||||
std::map<std::string, std::string> result;
|
||||
for (auto id : id_list) {
|
||||
auto it = Extradata.find(id);
|
||||
if (it != Extradata.end())
|
||||
result[it->second.first] = it->second.second;
|
||||
namespace {
|
||||
struct extradata_id_cmp {
|
||||
bool operator()(ExtradataEntry const& e, uint32_t id) {
|
||||
return e.id < id;
|
||||
}
|
||||
bool operator()(uint32_t id, ExtradataEntry const& e) {
|
||||
return id < e.id;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ExtradataType, typename Func>
|
||||
void enumerate_extradata(ExtradataType&& extradata, std::vector<uint32_t> const& id_list, Func&& f) {
|
||||
auto begin = extradata.begin(), end = extradata.end();
|
||||
for (auto id : id_list) {
|
||||
auto it = lower_bound(begin, end, id, extradata_id_cmp{});
|
||||
if (it != end) {
|
||||
f(*it);
|
||||
begin = it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename K, typename V>
|
||||
using reference_map = std::unordered_map<std::reference_wrapper<const K>, V, std::hash<K>, std::equal_to<K>>;
|
||||
}
|
||||
|
||||
std::vector<ExtradataEntry> AssFile::GetExtradata(std::vector<uint32_t> const& id_list) const {
|
||||
std::vector<ExtradataEntry> result;
|
||||
enumerate_extradata(Extradata, id_list, [&](ExtradataEntry const& e) {
|
||||
result.push_back(e);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
void AssFile::CleanExtradata() {
|
||||
// Collect all IDs existing in the database
|
||||
// Then remove all IDs found to be in use from this list
|
||||
// Remaining is then all garbage IDs
|
||||
std::vector<uint32_t> ids;
|
||||
for (auto& it : Extradata)
|
||||
ids.push_back(it.first);
|
||||
if (ids.empty()) return;
|
||||
if (Extradata.empty()) return;
|
||||
|
||||
// For each line, find which IDs it actually uses and remove them from the unused-list
|
||||
std::unordered_set<uint32_t> ids_used;
|
||||
for (auto& line : Events) {
|
||||
if (line.ExtradataIds.get().empty()) continue;
|
||||
|
||||
// Find the ID for each unique key in the line
|
||||
std::map<std::string, uint32_t> key_ids;
|
||||
for (auto id : line.ExtradataIds.get()) {
|
||||
auto ed_it = Extradata.find(id);
|
||||
if (ed_it == Extradata.end())
|
||||
continue;
|
||||
key_ids[ed_it->second.first] = id;
|
||||
reference_map<std::string, uint32_t> keys_used;
|
||||
enumerate_extradata(Extradata, line.ExtradataIds.get(), [&](ExtradataEntry const& e) {
|
||||
keys_used[e.key] = e.id;
|
||||
});
|
||||
|
||||
for (auto const& used : keys_used)
|
||||
ids_used.insert(used.second);
|
||||
|
||||
// If any keys were duplicated or missing, update the id list
|
||||
if (keys_used.size() != line.ExtradataIds.get().size()) {
|
||||
std::vector<uint32_t> ids;
|
||||
ids.reserve(keys_used.size());
|
||||
for (auto const& used : keys_used)
|
||||
ids.push_back(used.second);
|
||||
std::sort(begin(ids), end(ids));
|
||||
line.ExtradataIds = std::move(ids);
|
||||
}
|
||||
// Update the line's ID list to only contain the actual ID for any duplicate keys
|
||||
// Also mark found IDs as used in the cleaning list
|
||||
std::vector<uint32_t> new_ids;
|
||||
for (auto& keyid : key_ids) {
|
||||
new_ids.push_back(keyid.second);
|
||||
ids.erase(remove(begin(ids), end(ids), keyid.second), end(ids));
|
||||
}
|
||||
line.ExtradataIds = new_ids;
|
||||
}
|
||||
|
||||
// The ids list should contain only unused IDs now
|
||||
for (auto id : ids) {
|
||||
Extradata.erase(id);
|
||||
if (ids_used.size() != Extradata.size()) {
|
||||
// Erase all no-longer-used extradata entries
|
||||
Extradata.erase(std::remove_if(begin(Extradata), end(Extradata), [&](ExtradataEntry const& e) {
|
||||
return !ids_used.count(e.id);
|
||||
}), end(Extradata));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <libaegisub/signal.h>
|
||||
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
|
@ -45,7 +46,11 @@ class wxString;
|
|||
template<typename T>
|
||||
using EntryList = typename boost::intrusive::make_list<T, boost::intrusive::constant_time_size<false>, boost::intrusive::base_hook<AssEntryListHook>>::type;
|
||||
|
||||
using AegisubExtradataMap = std::map<uint32_t, std::pair<std::string, std::string>>;
|
||||
struct ExtradataEntry {
|
||||
uint32_t id;
|
||||
std::string key;
|
||||
std::string value;
|
||||
};
|
||||
|
||||
struct AssFileCommit {
|
||||
wxString const& message;
|
||||
|
@ -83,7 +88,7 @@ public:
|
|||
EntryList<AssStyle> Styles;
|
||||
EntryList<AssDialogue> Events;
|
||||
std::vector<AssAttachment> Attachments;
|
||||
AegisubExtradataMap Extradata;
|
||||
std::vector<ExtradataEntry> Extradata;
|
||||
ProjectProperties Properties;
|
||||
|
||||
uint32_t next_extradata_id = 0;
|
||||
|
@ -127,7 +132,7 @@ public:
|
|||
/// @return ID of the created entry
|
||||
uint32_t AddExtradata(std::string const& key, std::string const& value);
|
||||
/// Fetch all extradata entries from a list of IDs
|
||||
std::map<std::string, std::string> GetExtradata(std::vector<uint32_t> const& id_list) const;
|
||||
std::vector<ExtradataEntry> GetExtradata(std::vector<uint32_t> const& id_list) const;
|
||||
/// Remove unreferenced extradata entries
|
||||
void CleanExtradata();
|
||||
|
||||
|
@ -199,4 +204,3 @@ public:
|
|||
/// @param limit If non-empty, only lines in this set are sorted
|
||||
static void Sort(EntryList<AssDialogue>& lst, CompFunc comp = CompStart, std::set<AssDialogue*> const& limit = std::set<AssDialogue*>());
|
||||
};
|
||||
|
||||
|
|
|
@ -219,7 +219,7 @@ void AssParser::ParseExtradataLine(std::string const &data) {
|
|||
|
||||
// ensure next_extradata_id is always at least 1 more than the largest existing id
|
||||
target->next_extradata_id = std::max(id+1, target->next_extradata_id);
|
||||
target->Extradata[id] = {key, value};
|
||||
target->Extradata.push_back(ExtradataEntry{id, std::move(key), std::move(value)});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -179,8 +179,8 @@ namespace Automation4 {
|
|||
// create extradata table
|
||||
lua_newtable(L);
|
||||
for (auto const& ed : ass->GetExtradata(dia->ExtradataIds)) {
|
||||
push_value(L, ed.first);
|
||||
push_value(L, ed.second);
|
||||
push_value(L, ed.key);
|
||||
push_value(L, ed.value);
|
||||
lua_settable(L, -3);
|
||||
}
|
||||
lua_setfield(L, -2, "extra");
|
||||
|
@ -309,7 +309,8 @@ namespace Automation4 {
|
|||
get_string_or_default(L, -2),
|
||||
get_string_or_default(L, -1)));
|
||||
});
|
||||
dia->ExtradataIds = new_ids;
|
||||
std::sort(begin(new_ids), end(new_ids));
|
||||
dia->ExtradataIds = std::move(new_ids);
|
||||
}
|
||||
else {
|
||||
error(L, "Found line with unknown class: %s", lclass.c_str());
|
||||
|
|
|
@ -57,7 +57,7 @@ struct SubsController::UndoInfo {
|
|||
std::vector<AssStyle> styles;
|
||||
std::vector<AssDialogueBase> events;
|
||||
std::vector<AssAttachment> attachments;
|
||||
AegisubExtradataMap extradata;
|
||||
std::vector<ExtradataEntry> extradata;
|
||||
|
||||
mutable std::vector<int> selection;
|
||||
int active_line_id = 0;
|
||||
|
|
|
@ -122,8 +122,8 @@ struct Writer {
|
|||
file.WriteLineToFile(key + std::to_string(n));
|
||||
}
|
||||
|
||||
void WriteExtradata(AegisubExtradataMap const& extradata) {
|
||||
if (extradata.size() == 0)
|
||||
void WriteExtradata(std::vector<ExtradataEntry> const& extradata) {
|
||||
if (extradata.empty())
|
||||
return;
|
||||
|
||||
group = AssEntryGroup::EXTRADATA;
|
||||
|
@ -131,16 +131,16 @@ struct Writer {
|
|||
file.WriteLineToFile("[Aegisub Extradata]");
|
||||
for (auto const& edi : extradata) {
|
||||
std::string line = "Data: ";
|
||||
line += std::to_string(edi.first);
|
||||
line += std::to_string(edi.id);
|
||||
line += ",";
|
||||
line += inline_string_encode(edi.second.first);
|
||||
line += inline_string_encode(edi.key);
|
||||
line += ",";
|
||||
std::string encoded_data = inline_string_encode(edi.second.second);
|
||||
if (4*edi.second.second.size() < 3*encoded_data.size()) {
|
||||
std::string encoded_data = inline_string_encode(edi.value);
|
||||
if (4*edi.value.size() < 3*encoded_data.size()) {
|
||||
// the inline_string encoding grew the data by more than uuencoding would
|
||||
// so base64 encode it instead
|
||||
line += "u"; // marker for uuencoding
|
||||
line += agi::ass::UUEncode(edi.second.second, false);
|
||||
line += agi::ass::UUEncode(edi.value, false);
|
||||
} else {
|
||||
line += "e"; // marker for inline_string encoding (escaping)
|
||||
line += encoded_data;
|
||||
|
|
Loading…
Reference in a new issue