Report the endianness of detected UTF-16 and UTF-32

'UTF-16' is big-endian UTF-16, so returning it for UTF-16LE is incorrect
and results in line_iterator failing on UTF-16LE.

Originally committed to SVN as r6351.
This commit is contained in:
Thomas Goyne 2012-01-25 00:21:27 +00:00
parent 1034d16e4b
commit 3140d902da

View file

@ -125,12 +125,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
mDetectedCharset = "X-ISO-10646-UCS-4-3412"; mDetectedCharset = "X-ISO-10646-UCS-4-3412";
else if ('\xFF' == aBuf[1]) else if ('\xFF' == aBuf[1])
// FE FF UTF-16, big endian BOM // FE FF UTF-16, big endian BOM
mDetectedCharset = "UTF-16"; mDetectedCharset = "UTF-16BE";
break; break;
case '\x00': case '\x00':
if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3])) if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
// 00 00 FE FF UTF-32, big-endian BOM // 00 00 FE FF UTF-32, big-endian BOM
mDetectedCharset = "UTF-32"; mDetectedCharset = "UTF-32BE";
else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3])) else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
// 00 00 FF FE UCS-4, unusual octet order BOM (2143) // 00 00 FF FE UCS-4, unusual octet order BOM (2143)
mDetectedCharset = "X-ISO-10646-UCS-4-2143"; mDetectedCharset = "X-ISO-10646-UCS-4-2143";
@ -138,10 +138,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
case '\xFF': case '\xFF':
if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3])) if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
// FF FE 00 00 UTF-32, little-endian BOM // FF FE 00 00 UTF-32, little-endian BOM
mDetectedCharset = "UTF-32"; mDetectedCharset = "UTF-32LE";
else if ('\xFE' == aBuf[1]) else if ('\xFE' == aBuf[1])
// FF FE UTF-16, little endian BOM // FF FE UTF-16, little endian BOM
mDetectedCharset = "UTF-16"; mDetectedCharset = "UTF-16LE";
break; break;
} // switch } // switch