// Copyright (c) 2006, Rodrigo Braz Monteiro // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of the TrayDict Group nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // ----------------------------------------------------------------------------- // // TRAYDICT // // Website: http://aegisub.cellosoft.com // Contact: mailto:zeratul@cellosoft.com // /////////// // Headers #include <wx/wxprec.h> #include <wx/tokenzr.h> #include <wx/filename.h> #include <wx/zstream.h> #include <wx/wfstream.h> #include <stdio.h> #include "dictionary.h" #include "../aegisub/text_file_reader.h" #include "main.h" /////////////// // Constructor Dictionary::Dictionary(wxString _name,wxCheckBox *_check) { name = _name; check = _check; // Set file wxString filename = TrayDict::folderName + _name + _T(".dic"); Load(filename); // Set checkbox wxFileName file(filename); check->Enable(file.FileExists()); } ////////////// // Destructor Dictionary::~Dictionary() { } ////////// // Static KanaTable Dictionary::kanatable; //////// // Load void Dictionary::Convert(wxString source,wxString dest) { // Variables const wchar_t *str = NULL; short len; DictEntry curEntry; try { // Open source file TextFileReader file(source,_T("EUC-JP")); // Open destination file wxFileOutputStream out(dest); wxBufferedOutputStream buf(out); wxZlibOutputStream fp(buf,9,wxZLIB_GZIP); // Skip first line if (file.HasMoreLines()) file.ReadLineFromFile(); // Read lines while (file.HasMoreLines()) { // Get string wxString string = file.ReadLineFromFile(); // Process string to account for lack of kana if (!string.Contains(_T("["))) { if (!string.Contains(_T("]"))) { int pos = string.Find(_T(' ')); if (pos == -1) continue; wxString temp = string; string = temp.Left(pos) + _T(" []") + temp.Right(temp.Length() - pos); } else continue; } // Tokenize wxStringTokenizer token(string,_T("[]"),wxTOKEN_RET_EMPTY); // Kanji if (token.HasMoreTokens()) { curEntry.kanji = token.GetNextToken().Trim(false).Trim(true); } else continue; // Kana & romaji if (token.HasMoreTokens()) { curEntry.kana = token.GetNextToken().Trim(false).Trim(true); if (curEntry.kana.IsEmpty()) curEntry.kana = curEntry.kanji; curEntry.romaji = kanatable.KanaToRomaji(curEntry.kana,0); } else continue; // English if (token.HasMoreTokens()) { curEntry.english = token.GetNextToken().Trim(false).Trim(true); curEntry.english = curEntry.english.Mid(1,curEntry.english.Length()-2); } else continue; // Write kanji str = curEntry.kanji.c_str(); len = wcslen(str); fp.Write(&len,2); fp.Write(str,len*2); // Write kana str = curEntry.kana.c_str(); len = wcslen(str); fp.Write(&len,2); fp.Write(str,len*2); // Write romaji str = curEntry.romaji.c_str(); len = wcslen(str); fp.Write(&len,2); fp.Write(str,len*2); // Write english str = curEntry.english.c_str(); len = wcslen(str); fp.Write(&len,2); fp.Write(str,len*2); } } catch (...) { wxMessageBox(_T("Could not find dictionary file: ") + source,_T("File not found!"),wxICON_ERROR); } } //////// // Load void Dictionary::Load(wxString filename) { dictFile = filename; } ////////// // Search void Dictionary::Search(ResultSet &results,wxString query) { // Prepare results int resCount = 0; results.ownData = true; results.dicName = name; results.query = query; query.Trim(true); query.Trim(false); // Determine query type bool isJapanese = false; for (size_t i=0;i<query.Length();i++) { if (query[i] > 255) { isJapanese = true; break; } } // Stopwatch wxStopWatch stopwatch; // Open file wxInputStream *file; wxFileInputStream filestream(dictFile); wxBufferedInputStream buf(filestream); wxZlibInputStream zstream(buf); wxBufferedInputStream buf2(zstream); file = &zstream; // Buffer wchar_t buffer[16384]; short len; DictEntry *cur = NULL; // Search for matches while (!file->Eof()) { // Prepare bool addThis = false; int rel = 0; // Create data if (!cur) cur = new DictEntry; // Read kanji file->Read(&len,2); if (len < 0) return; file->Read(buffer,2*len); buffer[len] = 0; cur->kanji = buffer; // Read kana file->Read(&len,2); if (len < 0) return; file->Read(buffer,2*len); buffer[len] = 0; cur->kana = buffer; // Read romaji file->Read(&len,2); if (len < 0) return; file->Read(buffer,2*len); buffer[len] = 0; cur->romaji = buffer; // Read english file->Read(&len,2); if (len < 0) return; file->Read(buffer,2*len); buffer[len] = 0; cur->english = buffer; // Japanese query if (isJapanese) { // Matches kanji? if (cur->kanji.Contains(query)) { addThis = true; rel = GetRelevance(query,cur->kanji,cur->english.Contains(_T("(P)"))); } // Matches kana? else if (cur->kana.Contains(query)) { addThis = true; rel = GetRelevance(query,cur->kana,cur->english.Contains(_T("(P)"))); } } // English/romaji query else { // Lowercase query wxString lowQuery = query.Lower(); // Matches english? if (cur->english.Lower().Contains(lowQuery)) { addThis = true; rel = GetRelevance(lowQuery,cur->english,cur->english.Contains(_T("(P)")),true); } // Matches wapuro romaji? if (cur->romaji.Contains(lowQuery)) { addThis = true; rel = GetRelevance(lowQuery,cur->romaji,cur->english.Contains(_T("(P)"))); } } // Add entry if (addThis) { SearchResult res; res.relevance = rel; res.entry = cur; results.results.push_back(res); cur = NULL; } } // Delete cur if (cur) delete cur; // Time stopwatch.Pause(); results.time = stopwatch.Time(); // Close file } ///////////////// // Get relevancy int Dictionary::GetRelevance(wxString substr,wxString _str,bool isPop,bool english) { // Best score int bestScore = 0; // Generate list of strings wxArrayString strings; if (!english) strings.Add(_str.Lower()); else { wxStringTokenizer tkn(_str.Lower(),_T("/")); while (tkn.HasMoreTokens()) { // Get token wxString token = tkn.GetNextToken(); // Remove parenthesis wxString temp; bool inside = false; bool gotOne = false; for (size_t i=0;i<token.Length();i++) { if (token[i] == _T('(')) { inside = true; gotOne = true; } if (!inside) temp += token[i]; if (token[i] == _T(')')) inside = false; } // Add a copy with parenthesis if (gotOne) strings.Add(token); // Trim & add temp.Trim(true); temp.Trim(false); strings.Add(temp); } } // Search in each match for (size_t i=0;i<strings.Count();i++) { // Get string wxString str = strings[i]; if (!str.Contains(substr)) continue; // Score int score = 0; if (isPop) score += 5000; // Exact match, can't get better if (substr == str) { score += 10000; } else { // Semi-exact match (to e.g. match "car shed" higher than "card" when looking for "car") if (english) { wxString temp1 = _T(" ") + str + _T(" "); wxString temp2 = _T(" ") + substr + _T(" "); if (temp1.Contains(temp2)) score += 5000; } // Calculate how much of a partial match it was score += 1000 - (str.Length() - substr.Length())*1000/str.Length(); // Find match position int start = str.Find(substr); if (start == -1) throw 0; int temp1 = (str.length() - start)*500/str.Length() + 1; int temp2 = (start + substr.Length())*500/str.Length(); if (temp1 > temp2) score += temp1; else score += temp2; } // Best score? if (score > bestScore) bestScore = score; } return bestScore; } ////////////////////////// // Comparison for sorting bool operator < (const SearchResult &a,const SearchResult &b) { return (a.relevance > b.relevance); } ///////////////// // Compact entry void DictEntry::Compact() { kanji.Trim(true).Trim(false).Shrink(); kana.Trim(true).Trim(false).Shrink(); english.Trim(true).Trim(false).Shrink(); } /////////////// // Constructor ResultSet::ResultSet() { ownData = false; } ////////////// // Destructor ResultSet::~ResultSet() { if (ownData) { std::list<SearchResult>::iterator cur; for (cur = results.begin(); cur != results.end();cur++) { delete (*cur).entry; } } } /////////////////// // Print resultset /* void ResultSet::Print(wxTextCtrl *target,int bitmask) { // Get options bool drawKanji = (bitmask & 1) != 0; bool drawKana = (bitmask & 2) != 0; bool drawRomaji = (bitmask & 4) != 0; bool drawEnglish = (bitmask & 8) != 0; // Fonts wxFont font; font.SetFaceName(_T("MS Mincho")); font.SetPointSize(9); wxFont font2; font2.SetFaceName(_T("Tahoma")); // Text attributes wchar_t space = 0x3000; wxString spaceStr = space; wxTextAttr fontAttr; fontAttr.SetFont(font); wxTextAttr kanjiCol; kanjiCol.SetFont(font); kanjiCol.SetTextColour(wxColour(192,0,0)); wxTextAttr kanaCol; kanaCol.SetTextColour(wxColour(0,0,192)); wxTextAttr romajiCol; romajiCol.SetTextColour(wxColour(0,128,0)); wxTextAttr engCol; engCol.SetFont(font2); engCol.SetTextColour(wxColour(0,0,0)); wxTextAttr commonCol; commonCol.SetTextColour(wxColour(0,128,128)); commonCol.SetFont(font2); wxTextAttr sepCol; sepCol.SetTextColour(wxColour(128,90,0)); wxTextAttr boldCol; font2.SetWeight(wxBOLD); boldCol.SetFont(font2); wxTextAttr notBoldCol; font2.SetWeight(wxNORMAL); notBoldCol.SetFont(font); // Find column widths int kanjiWidth = 0; int kanaWidth = 0; int romajiWidth = 0; std::list<SearchResult>::iterator cur; DictEntry *entry; int curLen; int resPrinted = 0; for (cur=results.begin();cur!=results.end();cur++) { entry = cur->entry; curLen = entry->kanji.Length(); if (curLen > kanjiWidth) kanjiWidth = curLen; curLen = entry->kana.Length(); if (curLen > kanaWidth) kanaWidth = curLen; wxString temp = Dictionary::kanatable.KanaToRomaji(entry->kana); curLen = temp.Length() - temp.Freq(0x304); if (curLen > romajiWidth) romajiWidth = curLen; // Limit to 1000 resPrinted++; if (resPrinted >= 1000) break; } // List number of results target->SetDefaultStyle(boldCol); int maxDisp = results.size(); if (maxDisp > 1000) maxDisp = 1000; target->AppendText(wxString::Format(_T("Searched %s for \"%s\". Displaying %i matches. Search took %i ms.\n"),dicName.Upper().c_str(),query.c_str(),maxDisp,time)); target->SetDefaultStyle(notBoldCol); target->SetDefaultStyle(fontAttr); // Append to results wxString curText; resPrinted = 0; for (cur=results.begin();cur!=results.end();cur++) { entry = cur->entry; // Write kanji if (drawKanji) { target->SetDefaultStyle(kanjiCol); curText = entry->kanji; curLen = kanjiWidth - curText.Length() + 1; for (int i=0;i<curLen;i++) curText = curText + spaceStr; target->AppendText(curText); } // Write kana if (drawKana) { target->SetDefaultStyle(kanaCol); //curText = _T("[") + entry->kana + _T("]"); curText = entry->kana; curLen = kanaWidth - curText.Length() + 1; for (int i=0;i<curLen;i++) curText = curText + spaceStr; target->AppendText(curText); } // Write romaji if (drawRomaji) { target->SetDefaultStyle(romajiCol); curText = Dictionary::kanatable.KanaToRomaji(entry->kana); curLen = romajiWidth - curText.Length() + curText.Freq(0x304) + 1; for (int i=0;i<curLen;i++) curText = curText + _T(" "); target->AppendText(curText); } // Write english if (drawEnglish) { // Search for grammatical class wxString mainText; int pos = entry->english.Find(_T(')')); if (entry->english[0] == _T('(') && pos != -1) { mainText = entry->english.Mid(pos+1); mainText.Trim(false); // Draw grammatical class target->SetDefaultStyle(sepCol); target->AppendText(entry->english.Left(pos+1) + _T(" ")); } else mainText = entry->english; target->SetDefaultStyle(engCol); // Draw rest target->SetDefaultStyle(engCol); wxStringTokenizer tkn(mainText,_T("/")); bool hadPrev = false; while (tkn.HasMoreTokens()) { // Popular entry wxString token = tkn.GetNextToken(); if (token == _T("(P)")) { target->SetDefaultStyle(commonCol); target->AppendText(_T(" [Common]")); target->SetDefaultStyle(engCol); } // Normal entry else { // Separator if (hadPrev) { target->SetDefaultStyle(sepCol); target->AppendText(_T(" / ")); target->SetDefaultStyle(engCol); } // Append text target->AppendText(token); hadPrev = true; } } } // Line break target->SetDefaultStyle(fontAttr); target->AppendText(_T("\n")); // Limit to 1000 resPrinted++; if (resPrinted >= 1000) { target->SetDefaultStyle(boldCol); target->AppendText(wxString::Format(_T("Too many (%i) matches, stopping.\n"),results.size())); break; } } // Print two carriage returns target->AppendText(_T("\n\n")); } */