Don't bother storing storing a single charset into an std::string, instead insert it into the std::map and fix Single() to return the first element. This keeps things simple and also ensures that DetectAll() will always return atleast one element which wasn't being done before.
Originally committed to SVN as r4369.
This commit is contained in:
parent
6736f5e292
commit
9d854b69f3
2 changed files with 18 additions and 12 deletions
|
@ -30,7 +30,6 @@
|
|||
namespace agi {
|
||||
namespace charset {
|
||||
|
||||
|
||||
UCDetect::UCDetect(const std::string file): nsUniversalDetector(NS_FILTER_ALL) {
|
||||
{
|
||||
std::ifstream *fp;
|
||||
|
@ -47,7 +46,7 @@ UCDetect::UCDetect(const std::string file): nsUniversalDetector(NS_FILTER_ALL) {
|
|||
DataEnd();
|
||||
|
||||
if (mDetectedCharset) {
|
||||
charset.assign(mDetectedCharset);
|
||||
list.insert(CLDPair(1, mDetectedCharset));
|
||||
} else {
|
||||
|
||||
switch (mInputState) {
|
||||
|
@ -56,32 +55,39 @@ UCDetect::UCDetect(const std::string file): nsUniversalDetector(NS_FILTER_ALL) {
|
|||
if (mCharSetProbers[i]) {
|
||||
float conf = mCharSetProbers[i]->GetConfidence();
|
||||
if (conf > 0.01f) {
|
||||
list.insert(std::pair<float, std::string>(conf, mCharSetProbers[i]->GetCharSetName()));
|
||||
list.insert(CLDPair(conf, mCharSetProbers[i]->GetCharSetName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!list.empty()) {
|
||||
CharsetListDetected::const_iterator i_lst = list.begin();
|
||||
charset.assign(i_lst->second);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ePureAscii:
|
||||
charset.assign("US-ASCII");
|
||||
list.insert(CLDPair(1, "US-ASCII"));
|
||||
break;
|
||||
|
||||
default:
|
||||
throw UnknownCharset("Unknown chararacter set.");
|
||||
}
|
||||
|
||||
if ((list.empty() && (mInputState == eHighbyte)) || charset.empty())
|
||||
if (list.empty() && (mInputState == eHighbyte))
|
||||
throw UnknownCharset("Unknown chararacter set.");
|
||||
|
||||
|
||||
} // if mDetectedCharset else
|
||||
}
|
||||
|
||||
std::string UCDetect::Single() {
|
||||
/// @todo Add a debug log here since this shouldn't happen.
|
||||
if (list.empty()) {
|
||||
throw UnknownCharset("Unknown chararacter set.");
|
||||
}
|
||||
|
||||
CharsetListDetected::const_iterator i_lst = list.begin();
|
||||
return i_lst->second;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace util
|
||||
} // namespace agi
|
||||
|
|
|
@ -29,8 +29,8 @@ namespace agi {
|
|||
|
||||
class UCDetect : public nsUniversalDetector {
|
||||
|
||||
/// Character set
|
||||
std::string charset;
|
||||
/// For insertion into CharsetListDetected
|
||||
typedef std::pair<float, std::string> CLDPair;
|
||||
|
||||
/// List of detected character sets.
|
||||
CharsetListDetected list;
|
||||
|
@ -50,7 +50,7 @@ public:
|
|||
|
||||
/// @brief Return a single character set (highest confidence)
|
||||
/// @return Character set
|
||||
std::string Single() { return charset; }
|
||||
std::string Single();
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
|
|
Loading…
Reference in a new issue