Skip to content

Commit c0da9b6

Browse files
author
Marcel Radischat
committed
Merge branch 'feature/refactor_encoding' into develop
2 parents f1911e4 + 169165d commit c0da9b6

File tree

9 files changed

+52
-16
lines changed

9 files changed

+52
-16
lines changed

pandasqt/_lib/magic/db/magic.mgc

2.9 MB
Binary file not shown.
110 KB
Binary file not shown.

pandasqt/_lib/magic/libmagic-1.dll

478 KB
Binary file not shown.

pandasqt/_lib/magic/libregex-1.dll

279 KB
Binary file not shown.

pandasqt/_lib/magic/magic1.dll

478 KB
Binary file not shown.

pandasqt/_lib/magic/zlib1.dll

129 KB
Binary file not shown.

pandasqt/encoding.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import sys
2+
import os
3+
4+
BASEDIR = os.path.dirname(os.path.abspath(__file__))
5+
6+
if sys.platform == 'win32':
7+
# add local folder to path
8+
lib = os.path.join(BASEDIR, '_lib', 'magic')
9+
envpath = os.environ['PATH']
10+
os.environ['PATH'] = ';'.join([lib, envpath])
11+
12+
try:
13+
import magic
14+
AUTODETECT = True
15+
except ImportError, e:
16+
if sys.platform == 'darwin':
17+
raise ImportError('Please install libmagic')
18+
AUTODETECT = False
19+
20+
21+
class Detector(object):
22+
def __init__(self):
23+
if AUTODETECT:
24+
magic_db = os.path.join(BASEDIR, '_lib', 'magic', 'db', 'magic.mgc')
25+
self.magic = magic.Magic(magic_file=magic_db, mime_encoding=True)
26+
else:
27+
self.magic = False
28+
29+
30+
def detect(self, filepath):
31+
if self.magic:
32+
encoding = self.magic.from_file(filepath)
33+
return encoding
34+
return None
35+
36+

pandasqt/views/CSVDialogs.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
from encodings.aliases import aliases as _encodings
55

66
import pandas
7-
from chardet.universaldetector import UniversalDetector
87

98
from pandasqt.compat import Qt, QtCore, QtGui, Slot, Signal
9+
from pandasqt.encoding import Detector
1010
from pandasqt.models.DataFrameModel import DataFrameModel
1111
from pandasqt.views.CustomDelegates import DtypeComboDelegate
1212
from pandasqt.views._ui import icons_rc
@@ -200,6 +200,7 @@ def __init__(self, parent=None):
200200
self._filename = None
201201
self._delimiter = None
202202
self._header = None
203+
self._detector = Detector()
203204
self._initUI()
204205

205206
def _initUI(self):
@@ -352,19 +353,18 @@ def _guessEncoding(self, path):
352353
353354
"""
354355
if os.path.exists(path) and path.lower().endswith('csv'):
355-
encodingDetector = UniversalDetector()
356-
with open(path, 'r') as fp:
357-
for line in fp:
358-
encodingDetector.feed(line)
359-
if encodingDetector.done:
360-
break
361-
encodingDetector.close()
362-
result = encodingDetector.result['encoding']
363-
result = result.replace('-','_')
364-
365-
self._encodingKey = _calculateEncodingKey(result)
366-
if self._encodingKey:
367-
index = self._encodingComboBox.findText(result.upper())
356+
encoding = self._detector.detect(path)
357+
358+
if encoding is not None:
359+
if encoding.startswith('utf'):
360+
encoding = encoding.replace('-', '')
361+
encoding = encoding.replace('-','_')
362+
363+
viewValue = _encodings.get(encoding)
364+
365+
self._encodingKey = encoding
366+
367+
index = self._encodingComboBox.findText(viewValue.upper())
368368
self._encodingComboBox.setCurrentIndex(index)
369369

370370
@Slot('int')

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def run_tests(self):
5353
errcode = pytest.main(self.test_args)
5454
sys.exit(errcode)
5555

56-
tests_require = ['pandas >= 0.15.2', 'pyside', 'pytest', 'pytest-cov', 'pytest-qt']
56+
tests_require = ['pandas >= 0.15.2', 'pyside', 'pytest', 'pytest-cov', 'pytest-qt', 'python-magic==0.4.6']
5757
setup(
5858
name='pandas-qt',
5959
version=__version__,
@@ -62,7 +62,7 @@ def run_tests(self):
6262
namespace_packages = ['pandasqt'],
6363
author='Matthias Ludwig',
6464
tests_require=tests_require,
65-
install_requires=['pandas>=0.15.1', 'chardet', 'pytest', 'pytest-qt==1.2.2', 'pytest-cov'],
65+
install_requires=['pandas>=0.15.1', 'pytest', 'pytest-qt==1.2.2', 'pytest-cov', 'python-magic==0.4.6'],
6666
cmdclass={'test': PyTest},
6767
author_email='m.Ludwig@datalyze-solutions.com',
6868
description='catches exceptions inside qt applications and writes them to a message box and into a log file',

0 commit comments

Comments
 (0)