Open
Description
For the provided file the encoding is not detected as UTF16 LE
even though it is.
detector.Encoding = ASCIIEncoding.ASCIIEncodingSealed
BodyName = {string} "us-ascii"
CodePage = {int} 20127
DecoderFallback = DecoderReplacementFallback
EncoderFallback = EncoderReplacementFallback
EncodingName = {string} "US-ASCII"
HeaderName = {string} "us-ascii"
IsBrowserDisplay = {bool} false
IsBrowserSave = {bool} false
IsMailNewsDisplay = {bool} true
IsMailNewsSave = {bool} true
IsReadOnly = {bool} true
IsSingleByte = {bool} true
IsUTF8CodePage = {bool} false
Preamble = {ReadOnlySpan<byte>} System.ReadOnlySpan<Byte>[0]
WebName = {string} "us-ascii"
WindowsCodePage = {int} 1252
_codePage = {int} 20127
_dataItem = CodePageDataItem
_isReadOnly = {bool} true
decoderFallback = DecoderReplacementFallback
encoderFallback = EncoderReplacementFallback
As workaround i am now using this code
byte[] byteArray = File.ReadAllBytes(filePath);
// count every second byte array if its zero.
int zeroBytesCount = 0;
for (int i = 1; i < byteArray.Length; i += 2)
{
if (byteArray[i] == 0)
{
zeroBytesCount++;
}
}
Encoding encoding = Encoding.UTF8;
// if count is bigger or equal to 40% of the byte array, it most likely UTF16
if (zeroBytesCount >= byteArray.Length * 0.4)
{
encoding = Encoding.Unicode;
}
else
{
DetectionDetail detector = CharsetDetector.DetectFromBytes(byteArray).Detected;
encoding = detector.Encoding;
}
Metadata
Metadata
Assignees
Labels
No labels