Report the endianness of detected UTF-16 and UTF-32
'UTF-16' is big-endian UTF-16, so returning it for UTF-16LE is incorrect and results in line_iterator failing on UTF-16LE. Originally committed to SVN as r6351.
This commit is contained in:
parent
1034d16e4b
commit
3140d902da
1 changed files with 4 additions and 4 deletions
|
@ -125,12 +125,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||||
mDetectedCharset = "X-ISO-10646-UCS-4-3412";
|
mDetectedCharset = "X-ISO-10646-UCS-4-3412";
|
||||||
else if ('\xFF' == aBuf[1])
|
else if ('\xFF' == aBuf[1])
|
||||||
// FE FF UTF-16, big endian BOM
|
// FE FF UTF-16, big endian BOM
|
||||||
mDetectedCharset = "UTF-16";
|
mDetectedCharset = "UTF-16BE";
|
||||||
break;
|
break;
|
||||||
case '\x00':
|
case '\x00':
|
||||||
if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
|
if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
|
||||||
// 00 00 FE FF UTF-32, big-endian BOM
|
// 00 00 FE FF UTF-32, big-endian BOM
|
||||||
mDetectedCharset = "UTF-32";
|
mDetectedCharset = "UTF-32BE";
|
||||||
else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
|
else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
|
||||||
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||||
mDetectedCharset = "X-ISO-10646-UCS-4-2143";
|
mDetectedCharset = "X-ISO-10646-UCS-4-2143";
|
||||||
|
@ -138,10 +138,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||||
case '\xFF':
|
case '\xFF':
|
||||||
if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
|
if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
|
||||||
// FF FE 00 00 UTF-32, little-endian BOM
|
// FF FE 00 00 UTF-32, little-endian BOM
|
||||||
mDetectedCharset = "UTF-32";
|
mDetectedCharset = "UTF-32LE";
|
||||||
else if ('\xFE' == aBuf[1])
|
else if ('\xFE' == aBuf[1])
|
||||||
// FF FE UTF-16, little endian BOM
|
// FF FE UTF-16, little endian BOM
|
||||||
mDetectedCharset = "UTF-16";
|
mDetectedCharset = "UTF-16LE";
|
||||||
break;
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue