Use more data when trying to determine encoding.

32 bytes wasn't resulting in the wrong determination being made in more situations than acceptable. Bumped it up to 4K since most filesystem reads are at least that size.
ix5 · Jan 18, 2020 · 8e41997 · 8e41997
1 parent b6679ad
commit 8e41997
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/markdown2ctags.py b/markdown2ctags.py
@@ -34,7 +34,7 @@ class ScriptError(Exception):
 
 def detect_encoding(filename):
     with open(filename, 'rb') as f:
-        raw = f.read(32)
+        raw = f.read(4096)
 
     potential_bom = raw[:4]
     bom_encodings = [('utf-8-sig', codecs.BOM_UTF8),