2010-05-28 09:40:21 +02:00
|
|
|
// Copyright (c) 2010, Amar Takhar <verm@aegisub.org>
|
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice appear in all copies.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
//
|
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/// @file charset_ucd.h
|
|
|
|
/// @brief Character set detection using Universalchardet
|
|
|
|
/// @ingroup libaegisub
|
|
|
|
|
2010-06-17 02:23:44 +02:00
|
|
|
#include "libaegisub/charset.h"
|
|
|
|
|
2010-05-28 09:40:21 +02:00
|
|
|
#ifndef LAGI_PRE
|
2010-06-17 02:23:44 +02:00
|
|
|
#include <string>
|
2010-12-08 15:32:30 +01:00
|
|
|
#endif
|
2010-06-17 02:23:44 +02:00
|
|
|
|
2010-06-12 09:04:46 +02:00
|
|
|
#ifndef _WIN32
|
|
|
|
#define _X86_ 1
|
|
|
|
#endif
|
2010-06-17 02:23:44 +02:00
|
|
|
|
2010-05-28 09:40:21 +02:00
|
|
|
#include "../../universalchardet/nscore.h"
|
|
|
|
#include "../../universalchardet/nsUniversalDetector.h"
|
|
|
|
#include "../../universalchardet/nsMBCSGroupProber.h"
|
2010-06-17 02:23:44 +02:00
|
|
|
|
2010-05-28 09:40:21 +02:00
|
|
|
namespace agi {
|
|
|
|
namespace charset {
|
|
|
|
|
|
|
|
class UCDetect : public nsUniversalDetector {
|
|
|
|
|
2010-05-28 15:08:00 +02:00
|
|
|
/// For insertion into CharsetListDetected
|
|
|
|
typedef std::pair<float, std::string> CLDPair;
|
2010-05-28 09:40:21 +02:00
|
|
|
|
|
|
|
/// List of detected character sets.
|
|
|
|
CharsetListDetected list;
|
|
|
|
|
|
|
|
/// Stub.
|
|
|
|
void Report(const char* aCharset) {};
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
/// @brief Detect character set of a file using UniversalCharDetect
|
|
|
|
/// @param file File to check
|
2010-06-17 02:23:44 +02:00
|
|
|
UCDetect(const std::string &file);
|
2010-05-28 09:40:21 +02:00
|
|
|
|
|
|
|
/// @brief Detect character set of a file using UniversalCharDet
|
|
|
|
/// @param out[out] Map to load list into ordered by confidence
|
|
|
|
void List(CharsetListDetected &out) { out = list; }
|
|
|
|
|
|
|
|
/// @brief Return a single character set (highest confidence)
|
|
|
|
/// @return Character set
|
2010-05-28 15:08:00 +02:00
|
|
|
std::string Single();
|
2010-05-28 09:40:21 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace util
|
|
|
|
} // namespace agi
|