Skip to content
This repository was archived by the owner on Dec 3, 2020. It is now read-only.

Python 3 support #1

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
build/
dist/
grin.egg-info/

__pycache__/
*.py[cod]
.tox/
54 changes: 46 additions & 8 deletions grin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@

import argparse

try:
# On Python 3, accessing the fileno() attribute of a stream object will
# raise an io.UnsupportedOperation error, but a simple AttributeError on
# Python 2.
import cStringIO
UnsupportedOperation = AttributeError
except ImportError:
import io
UnsupportedOperation = io.UnsupportedOperation


#### Constants ####
__version__ = '1.2.1'
Expand All @@ -24,7 +34,7 @@
POST = 1

# Use file(1)'s choices for what's text and what's not.
TEXTCHARS = ''.join(map(chr, [7,8,9,10,12,13,27] + range(0x20, 0x100)))
TEXTCHARS = ''.join(map(chr, [7,8,9,10,12,13,27] + list(range(0x20, 0x100))))
ALLBYTES = ''.join(map(chr, range(256)))

COLOR_TABLE = ['black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan',
Expand All @@ -41,6 +51,12 @@
READ_BLOCKSIZE = 16 * 1024 * 1024


if sys.version_info >= (3, 0):
ALLBYTES = bytes(ALLBYTES, encoding='latin-1')
TEXTCHARS = bytes(TEXTCHARS, encoding='latin-1')
GZIP_MAGIC = bytes(GZIP_MAGIC, encoding='latin-1')


def is_binary_string(bytes):
""" Determine if a string is classified as binary rather than text.

Expand Down Expand Up @@ -232,7 +248,13 @@ def read_block_with_context(self, prev, fp, fp_size):
else:
remaining = max(fp_size - fp.tell(), 0)
target_io_size = min(READ_BLOCKSIZE, remaining)
block_main = fp.read(target_io_size)
try:
block_main = fp.read(target_io_size)
except UnicodeDecodeError:
# We hit an block that could not be decoded. Most likely this
# is because of some binary content that was embedded deep in
# the file (fits files do this).
return EMPTY_DATABLOCK
is_last_block = target_io_size == remaining

if prev is None:
Expand Down Expand Up @@ -313,7 +335,7 @@ def do_grep(self, fp):
fp_size = status.st_size
else:
fp_size = None
except AttributeError: # doesn't support fileno()
except UnsupportedOperation: # doesn't support fileno()
fp_size = None

block = self.read_block_with_context(None, fp, fp_size)
Expand Down Expand Up @@ -587,7 +609,7 @@ def _is_binary_file(self, f):
"""
try:
bytes = f.read(self.binary_bytes)
except Exception, e:
except Exception as e:
# When trying to read from something that looks like a gzipped file,
# it may be corrupt. If we do get an error, assume that the file is binary.
return True
Expand Down Expand Up @@ -1009,6 +1031,22 @@ def get_regex(args):
return re.compile(args.regex, flags)


def gzip_opener_factory():
""" Return an opener for gzip files.

The return value is a callable which takes a filename and a mode argument
and returns an open file object. On Python 3, this sets the encoding on
the gzip file to utf-8, so that the file contents can be properly decoded.

"""
if sys.version_info >= (3, 0):
def _python3_gzip_opener(filename, _):
return gzip.open(filename, 'rt', encoding='utf-8')
return _python3_gzip_opener
else:
return gzip.open


def grin_main(argv=None):
try:
if argv is None:
Expand All @@ -1026,21 +1064,21 @@ def grin_main(argv=None):

regex = get_regex(args)
g = GrepText(regex, args)
openers = dict(text=open, gzip=gzip.open)
openers = dict(text=open, gzip=gzip_opener_factory())
for filename, kind in get_filenames(args):
report = g.grep_a_file(filename, opener=openers[kind])
sys.stdout.write(report)
except KeyboardInterrupt:
raise SystemExit(0)
except IOError, e:
except IOError as e:
if 'Broken pipe' in str(e):
# The user is probably piping to a pager like less(1) and has exited
# it. Just exit.
raise SystemExit(0)
raise

def print_line(filename):
print filename
print(filename)

def print_null(filename):
# Note that the final filename will have a trailing NUL, just like
Expand Down Expand Up @@ -1073,7 +1111,7 @@ def grind_main(argv=None):
output(filename)
except KeyboardInterrupt:
raise SystemExit(0)
except IOError, e:
except IOError as e:
if 'Broken pipe' in str(e):
# The user is probably piping to a pager like less(1) and has exited
# it. Just exit.
Expand Down
43 changes: 26 additions & 17 deletions tests/test_file_recognizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Test the file recognizer capabilities.
"""
from __future__ import print_function

import gzip
import os
Expand All @@ -9,33 +10,40 @@

import nose

from grin import FileRecognizer
from grin import FileRecognizer, GZIP_MAGIC


def _b(s):
if sys.version_info >= (3, 0):
return bytes(s, encoding='latin-1')
else:
return bytes(s)


def empty_file(filename, open=open):
f = open(filename, 'wb')
f.close()

def binary_file(filename, open=open):
f = open(filename, 'wb')
f.write(''.join(map(chr, range(256))))
f.write(_b(''.join(map(chr, list(range(256))))))
f.close()

def text_file(filename, open=open):
lines = ['foo\n', 'bar\n'] * 100
lines.append('baz\n')
lines.extend(['foo\n', 'bar\n'] * 100)
f = open(filename, 'wb')
f.writelines(lines)
f.writelines(line.encode('utf-8') for line in lines)
f.close()

def fake_gzip_file(filename, open=open):
""" Write out a binary file that has the gzip magic header bytes, but is not
a gzip file.
"""
GZIP_MAGIC = '\037\213'
f = open(filename, 'wb')
f.write(GZIP_MAGIC)
f.write(''.join(map(chr, range(256))))
f.write(_b(''.join(map(chr, list(range(256))))))
f.close()

def binary_middle(filename, open=open):
Expand All @@ -45,7 +53,7 @@ def binary_middle(filename, open=open):
"""
text = 'a'*100 + '\0'*100 + 'b'*100
f = open(filename, 'wb')
f.write(text)
f.write(text.encode('latin-1'))
f.close()

def socket_file(filename):
Expand All @@ -56,25 +64,25 @@ def unreadable_file(filename):
""" Write a file that does not have read permissions.
"""
text_file(filename)
os.chmod(filename, 0200)
os.chmod(filename, 0o200)

def unreadable_dir(filename):
""" Make a directory that does not have read permissions.
"""
os.mkdir(filename)
os.chmod(filename, 0300)
os.chmod(filename, 0o300)

def unexecutable_dir(filename):
""" Make a directory that does not have execute permissions.
"""
os.mkdir(filename)
os.chmod(filename, 0600)
os.chmod(filename, 0o600)

def totally_unusable_dir(filename):
""" Make a directory that has neither read nor execute permissions.
"""
os.mkdir(filename)
os.chmod(filename, 0100)
os.chmod(filename, 0o100)

def setup():
# Make files to test individual recognizers.
Expand Down Expand Up @@ -135,13 +143,13 @@ def setup():
text_file('tree/.skip_hidden_file')
os.mkdir('tree/unreadable_dir')
text_file('tree/unreadable_dir/text')
os.chmod('tree/unreadable_dir', 0300)
os.chmod('tree/unreadable_dir', 0o300)
os.mkdir('tree/unexecutable_dir')
text_file('tree/unexecutable_dir/text')
os.chmod('tree/unexecutable_dir', 0600)
os.chmod('tree/unexecutable_dir', 0o600)
os.mkdir('tree/totally_unusable_dir')
text_file('tree/totally_unusable_dir/text')
os.chmod('tree/totally_unusable_dir', 0100)
os.chmod('tree/totally_unusable_dir', 0o100)

def ensure_deletability(arg, dirname, fnames):
""" os.path.walk() callback function which will make sure every directory is
Expand All @@ -150,7 +158,7 @@ def ensure_deletability(arg, dirname, fnames):
for fn in fnames:
fn = os.path.join(dirname, fn)
if os.path.isdir(fn):
os.chmod(fn, 0700)
os.chmod(fn, 0o700)

def teardown():
files_to_delete = ['empty', 'binary', 'binary_middle', 'text', 'text~',
Expand All @@ -168,10 +176,11 @@ def teardown():
os.unlink(filename)
else:
os.rmdir(filename)
except Exception, e:
print >>sys.stderr, 'Could not delete %s: %s' % (filename, e)
except Exception as e:
print('Could not delete %s: %s' % (filename, e), file=sys.stderr)
os.unlink('socket_test')
os.path.walk('tree', ensure_deletability, None)
for root, dirs, files in os.walk('tree', topdown=True):
ensure_deletability(None, root, dirs)
shutil.rmtree('tree')


Expand Down
6 changes: 5 additions & 1 deletion tests/test_grep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
Set up

>>> import grin
>>> from cStringIO import StringIO
>>> try:
... from cStringIO import StringIO
... except ImportError:
... from io import StringIO
...
>>> import re
>>>
>>> all_foo = """\
Expand Down
7 changes: 7 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[tox]
envlist=py26, py27, py34, py35
[testenv]
usedevelop=True
deps=nose
commands=nosetests