From 2ec3b46c9e298bdee9c9ee5cb5ce21736fb07c1a Mon Sep 17 00:00:00 2001 From: Peter Iannucci Date: Sat, 1 Aug 2015 04:35:32 -0400 Subject: [PATCH] More Python 3 changes. Tests no longer error (but some don't pass). Iterators for map, filter, and range passed to list() as needed in many places. Module membership search uses object identity rather than equality now. Reconciled almost all differences between the built-in dumb dbm module and the patched version in weave. Rewrote dumb shelve module to rely more heavily on the built-in shelve module, with the zlib compression functionality factored out. --- setup.py | 5 +- weave/_dumb_shelve.py | 64 ++++---- weave/_dumbdbm_patched.py | 291 ++++++++++++++++++++++++--------- weave/catalog.py | 4 +- weave/size_check.py | 2 +- weave/tests/test_c_spec.py | 54 +----- weave/tests/test_catalog.py | 21 ++- weave/tests/test_ext_tools.py | 6 +- weave/tests/test_size_check.py | 6 +- 9 files changed, 267 insertions(+), 186 deletions(-) mode change 100644 => 100755 weave/_dumb_shelve.py mode change 100644 => 100755 weave/tests/test_size_check.py diff --git a/setup.py b/setup.py index a02f981..6d3276f 100755 --- a/setup.py +++ b/setup.py @@ -112,14 +112,11 @@ def write_version_py(filename='weave/version.py'): """ FULLVERSION, GIT_REVISION = get_version_info() - a = open(filename, 'w') - try: + with open(filename, 'w') as a: a.write(cnt % {'version': VERSION, 'full_version' : FULLVERSION, 'git_revision' : GIT_REVISION, 'isrelease': str(ISRELEASED)}) - finally: - a.close() try: diff --git a/weave/_dumb_shelve.py b/weave/_dumb_shelve.py old mode 100644 new mode 100755 index 3f1ba16..2eee029 --- a/weave/_dumb_shelve.py +++ b/weave/_dumb_shelve.py @@ -1,51 +1,53 @@ -from __future__ import division, print_function, absolute_import - from shelve import Shelf +import collections try: import zlib except ImportError: - # Some python installations don't have zlib. - pass + zlib = None -import pickle +class ZlibMapping(collections.MutableMapping): + """Mapping adapter that applies zlib compression. + """ + def __init__(self, dict): + self.dict = dict + if hasattr(dict, 'close'): + self.close = dict.close + if hasattr(dict, 'sync'): + self.sync = dict.sync + if hasattr(dict, 'keys'): + self.keys = dict.keys -class DbfilenameShelf(Shelf): - """Shelf implementation using the "anydbm" generic dbm interface. + def __iter__(self): + return iter(self.dict) - This is initialized with the filename for the dbm database. - See the module's __doc__ string for an overview of the interface. - """ + def __len__(self): + return len(self.dict) - def __init__(self, filename, flag='c'): - from . import _dumbdbm_patched - Shelf.__init__(self, _dumbdbm_patched.open(filename, flag)) + def __contains__(self, key): + return key in self.dict def __getitem__(self, key): - compressed = self.dict[key] - try: - r = zlib.decompress(compressed) - except zlib.error: - r = compressed - except NameError: - r = compressed - - return pickle.loads(r) + return zlib.decompress(self.dict[key]) def __setitem__(self, key, value): - s = pickle.dumps(value,1) - try: - self.dict[key] = zlib.compress(s) - except NameError: - #zlib doesn't exist, leave it uncompressed. - self.dict[key] = s + self.dict[key] = zlib.compress(value) + + def __delitem__(self, key): + del self.dict[key] def open(filename, flag='c'): """Open a persistent dictionary for reading and writing. - Argument is the filename for the dbm database. - See the module's __doc__ string for an overview of the interface. + The filename parameter is the base filename for the underlying + database. As a side-effect, an extension may be added to the + filename and more than one file may be created. The optional flag + parameter has the same interpretation as the flag parameter of + dbm.open(). """ + writeback = False - return DbfilenameShelf(filename, flag) + from . import _dumbdbm_patched + d = _dumbdbm_patched.open(filename, flag) + return Shelf(ZlibMapping(d) if zlib is not None else d) diff --git a/weave/_dumbdbm_patched.py b/weave/_dumbdbm_patched.py index cb62025..0d08ace 100755 --- a/weave/_dumbdbm_patched.py +++ b/weave/_dumbdbm_patched.py @@ -20,10 +20,10 @@ - support opening for read-only (flag = 'm') """ -from __future__ import division, print_function, absolute_import import sys -_os = __import__('os') +import os as _os +import collections PY3 = sys.version_info[0] == 3 @@ -34,130 +34,261 @@ _open = open -_BLOCKSIZE = 512 - -error = IOError # For anydbm +__all__ = ["error", "open"] +_BLOCKSIZE = 512 -class _Database(object): +error = OSError + +class _Database(collections.MutableMapping): + + # The on-disk directory and data files can remain in mutually + # inconsistent states for an arbitrarily long time (see comments + # at the end of __setitem__). This is only repaired when _commit() + # gets called. One place _commit() gets called is from __del__(), + # and if that occurs at program shutdown time, module globals may + # already have gotten rebound to None. Since it's crucial that + # _commit() finish successfully, we can't ignore shutdown races + # here, and _commit() must not reference any globals. + _os = _os # for _commit() + _open = _open # for _commit() + + def __init__(self, filebasename, mode): + self._mode = mode + + # The directory file is a text file. Each line looks like + # "%r, (%d, %d)\n" % (key, pos, siz) + # where key is the string key, pos is the offset into the dat + # file of the associated value's first byte, and siz is the number + # of bytes in the associated value. + self._dirfile = filebasename + '.dir' + + # The data file is a binary file pointed into by the directory + # file, and holds the values associated with keys. Each value + # begins at a _BLOCKSIZE-aligned byte offset, and is a raw + # binary 8-bit string value. + self._datfile = filebasename + '.dat' + self._bakfile = filebasename + '.bak' + + # The index is an in-memory dict, mirroring the directory file. + self._index = None # maps keys to (pos, siz) pairs - def __init__(self, file): - self._dirfile = file + '.dir' - self._datfile = file + '.dat' - self._bakfile = file + '.bak' # Mod by Jack: create data file if needed try: - f = _open(self._datfile, 'r') - except IOError: - f = _open(self._datfile, 'w') - f.close() + f = _open(self._datfile, 'r', encoding="Latin-1") + except OSError: + with _open(self._datfile, 'w', encoding="Latin-1") as f: + self._chmod(self._datfile) + else: + f.close() self._update() + # Read directory file into the in-memory index dict. def _update(self): self._index = {} try: - f = _open(self._dirfile) - except IOError: + f = _open(self._dirfile, 'r', encoding="Latin-1") + except OSError: pass else: - while 1: - line = f.readline().rstrip() - if not line: - break - key, (pos, siz) = eval(line) - self._index[key] = (pos, siz) - f.close() - + with f: + for line in f: + line = line.rstrip() + key, pos_and_siz_pair = eval(line) + key = key.encode('Latin-1') + self._index[key] = pos_and_siz_pair + + # Write the index dict to the directory file. The original directory + # file (if any) is renamed with a .bak extension first. If a .bak + # file currently exists, it's deleted. def _commit(self): + # CAUTION: It's vital that _commit() succeed, and _commit() can + # be called from __del__(). Therefore we must never reference a + # global in this routine. + if self._index is None: + return # nothing to do + try: - _os.unlink(self._bakfile) - except _os.error: + self._os.unlink(self._bakfile) + except OSError: pass + try: - _os.rename(self._dirfile, self._bakfile) - except _os.error: + self._os.rename(self._dirfile, self._bakfile) + except OSError: pass - f = _open(self._dirfile, 'w') - for key, (pos, siz) in self._index.items(): - f.write("%s, (%s, %s)\n" % (repr(key), repr(pos), repr(siz))) - f.close() + + with self._open(self._dirfile, 'w', encoding="Latin-1") as f: + self._chmod(self._dirfile) + for key, pos_and_siz_pair in self._index.items(): + # Use Latin-1 since it has no qualms with any value in any + # position; UTF-8, though, does care sometimes. + entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) + f.write(entry) + + sync = _commit + + def _verify_open(self): + if self._index is None: + raise error('DBM object has already been closed') def __getitem__(self, key): - pos, siz = self._index[key] # may raise KeyError - f = _open(self._datfile, 'rb') - f.seek(pos) - dat = f.read(siz) - f.close() + if isinstance(key, str): + key = key.encode('utf-8') + self._verify_open() + pos, siz = self._index[key] # may raise KeyError + with _open(self._datfile, 'rb') as f: + f.seek(pos) + dat = f.read(siz) return dat - def __contains__(self, key): - return key in self._index - + # Append val to the data file, starting at a _BLOCKSIZE-aligned + # offset. The data file is first padded with NUL bytes (if needed) + # to get to an aligned offset. Return pair + # (starting offset of val, len(val)) def _addval(self, val): - f = _open(self._datfile, 'rb+') - f.seek(0, 2) - pos = f.tell() -## Does not work under MW compiler -## pos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE -## f.seek(pos) - npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE - f.write(b'\0'*(npos-pos)) - pos = npos - - f.write(val) - f.close() + with _open(self._datfile, 'rb+') as f: + f.seek(0, 2) + pos = int(f.tell()) + npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE + f.write(b'\0'*(npos-pos)) + pos = npos + f.write(val) return (pos, len(val)) + # Write val to the data file, starting at offset pos. The caller + # is responsible for ensuring that there's enough room starting at + # pos to hold val, without overwriting some other value. Return + # pair (pos, len(val)). def _setval(self, pos, val): - f = _open(self._datfile, 'rb+') - f.seek(pos) - f.write(val) - f.close() + with _open(self._datfile, 'rb+') as f: + f.seek(pos) + f.write(val) return (pos, len(val)) - def _addkey(self, key, pos_and_siz): - (pos, siz) = pos_and_siz - self._index[key] = (pos, siz) - f = _open(self._dirfile, 'a') - f.write("%s, (%s, %s)\n" % (repr(key), repr(pos), repr(siz))) - f.close() + # key is a new key whose associated value starts in the data file + # at offset pos and with length siz. Add an index record to + # the in-memory index dict, and append one to the directory file. + def _addkey(self, key, pos_and_siz_pair): + self._index[key] = pos_and_siz_pair + with _open(self._dirfile, 'a', encoding="Latin-1") as f: + self._chmod(self._dirfile) + f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) def __setitem__(self, key, val): - if not isinstance(key, string_types) or not isinstance(val, string_types): - raise TypeError("keys and values must be strings") + if isinstance(key, str): + key = key.encode('utf-8') + elif not isinstance(key, (bytes, bytearray)): + raise TypeError("keys must be bytes or strings") + if isinstance(val, str): + val = val.encode('utf-8') + elif not isinstance(val, (bytes, bytearray)): + raise TypeError("values must be bytes or strings") + self._verify_open() if key not in self._index: - (pos, siz) = self._addval(val) - self._addkey(key, (pos, siz)) + self._addkey(key, self._addval(val)) else: + # See whether the new value is small enough to fit in the + # (padded) space currently occupied by the old value. pos, siz = self._index[key] oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE if newblocks <= oldblocks: - pos, siz = self._setval(pos, val) - self._index[key] = pos, siz + self._index[key] = self._setval(pos, val) else: - pos, siz = self._addval(val) - self._index[key] = pos, siz - self._addkey(key, (pos, siz)) + # The new value doesn't fit in the (padded) space used + # by the old value. The blocks used by the old value are + # forever lost. + self._index[key] = self._addval(val) + + self._addkey(key, self._index[key]) def __delitem__(self, key): + if isinstance(key, str): + key = key.encode('utf-8') + self._verify_open() + # The blocks used by the associated value are lost. del self._index[key] + # XXX It's unclear why we do a _commit() here (the code always + # XXX has, so I'm not changing it). __setitem__ doesn't try to + # XXX keep the directory file in synch. Why should we? Or + # XXX why shouldn't __setitem__? self._commit() def keys(self): - return list(self._index.keys()) + try: + return list(self._index) + except TypeError: + raise error('DBM object has already been closed') from None - def has_key(self, key): - return key in self._index + def items(self): + self._verify_open() + return [(key, self[key]) for key in self._index.keys()] + + def __contains__(self, key): + if isinstance(key, str): + key = key.encode('utf-8') + try: + return key in self._index + except TypeError: + if self._index is None: + raise error('DBM object has already been closed') from None + else: + raise + + def iterkeys(self): + try: + return iter(self._index) + except TypeError: + raise error('DBM object has already been closed') from None + __iter__ = iterkeys def __len__(self): - return len(self._index) + try: + return len(self._index) + except TypeError: + raise error('DBM object has already been closed') from None def close(self): - self._index = None - self._datfile = self._dirfile = self._bakfile = None + self._commit() + self._index = self._datfile = self._dirfile = self._bakfile = None + + __del__ = close + + def _chmod(self, file): + if hasattr(self._os, 'chmod'): + self._os.chmod(file, self._mode) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def open(file, flag=None, mode=0o666): + """Open the database file, filename, and return corresponding object. + + The flag argument, used to control how the database is opened in the + other DBM implementations, is ignored in the dbm.dumb module; the + database is always opened for update, and will be created if it does + not exist. + + The optional mode argument is the UNIX mode of the file, used only when + the database has to be created. It defaults to octal code 0o666 (and + will be modified by the prevailing umask). + + """ + # flag argument is currently ignored + # Modify mode depending on the umask + try: + um = _os.umask(0) + _os.umask(um) + except AttributeError: + pass + else: + # Turn off any bits that are set in the umask + mode = mode & (~um) -def open(file, flag=None, mode=None): - # flag, mode arguments are currently ignored - return _Database(file) + return _Database(file, mode) diff --git a/weave/catalog.py b/weave/catalog.py index 61fa9fb..b721e3e 100644 --- a/weave/catalog.py +++ b/weave/catalog.py @@ -67,7 +67,7 @@ def getmodule(object): # try except used because of some comparison failures # in wxPoint code. Need to review this try: - if mod and object in mod.__dict__.values(): + if mod and any(object is x for x in mod.__dict__.values()): value = mod # if it is a built-in module, keep looking to see # if a non-builtin also has it. Otherwise quit and @@ -596,7 +596,7 @@ def get_catalog_files(self): """ files = map(catalog_path,self.build_search_order()) files = filter(lambda x: x is not None,files) - return files + return list(files) def get_existing_files(self): """ Returns all existing catalog file list in correct search order. diff --git a/weave/size_check.py b/weave/size_check.py index 21c7621..1844853 100644 --- a/weave/size_check.py +++ b/weave/size_check.py @@ -243,7 +243,7 @@ def __getitem__(self,indices): # step = step.value if beg is None: beg = 0 - if end == sys.maxint or end is None: + if end == sys.maxsize or end is None: end = dim_len if step is None: step = 1 diff --git a/weave/tests/test_c_spec.py b/weave/tests/test_c_spec.py index 85b39dd..024dd2b 100644 --- a/weave/tests/test_c_spec.py +++ b/weave/tests/test_c_spec.py @@ -26,22 +26,22 @@ class IntConverter(TestCase): @dec.slow def test_type_match_string(self): - s = c_spec.int_converter() + s = c_spec.long_converter() assert_(not s.type_match('string')) @dec.slow def test_type_match_int(self): - s = c_spec.int_converter() + s = c_spec.long_converter() assert_(s.type_match(5)) @dec.slow def test_type_match_float(self): - s = c_spec.int_converter() + s = c_spec.long_converter() assert_(not s.type_match(5.)) @dec.slow def test_type_match_complex(self): - s = c_spec.int_converter() + s = c_spec.long_converter() assert_(not s.type_match(5.+1j)) @dec.slow @@ -221,52 +221,6 @@ def test_complex_return(self): assert_(c == 3.+3j) -#---------------------------------------------------------------------------- -# File conversion tests -#---------------------------------------------------------------------------- - -class FileConverter(TestCase): - - compiler = '' - - @dec.slow - def test_py_to_file(self): - file_name = os.path.join(test_dir, "testfile") - file = open(file_name,'w') - code = """ - fprintf(file,"hello bob"); - """ - inline_tools.inline(code,['file'],compiler=self.compiler,force=1) - file.close() - file = open(file_name,'r') - assert_(file.read() == "hello bob") - - @dec.slow - def test_file_to_py(self): - file_name = os.path.join(test_dir, "testfile") - # not sure I like Py::String as default -- might move to std::sting - # or just plain char* - code = """ - const char* _file_name = file_name.c_str(); - FILE* file = fopen(_file_name, "w"); - return_val = file_to_py(file, _file_name, "w"); - """ - file = inline_tools.inline(code,['file_name'], compiler=self.compiler, - force=1) - file.write("hello fred") - file.close() - file = open(file_name,'r') - assert_(file.read() == "hello fred") - - -#---------------------------------------------------------------------------- -# Instance conversion tests -#---------------------------------------------------------------------------- - -class InstanceConverter(TestCase): - pass - - #---------------------------------------------------------------------------- # Callable object conversion tests #---------------------------------------------------------------------------- diff --git a/weave/tests/test_catalog.py b/weave/tests/test_catalog.py index 5254d0c..722f5e9 100644 --- a/weave/tests/test_catalog.py +++ b/weave/tests/test_catalog.py @@ -4,7 +4,6 @@ import os import re import glob -import string import stat import tempfile @@ -461,7 +460,7 @@ def test_add_function_persistent1(self): backup_dir = clear_temp_catalog() q = catalog.catalog() # just use some already available functions - funcs = [string.upper, string.lower, string.find,string.replace] + funcs = [str.upper, str.lower, str.find,str.replace] for i in funcs: q.add_function_persistent('code',i) pfuncs = q.get_cataloged_functions('code') @@ -474,12 +473,12 @@ def test_add_function_ordered(self): backup_dir = clear_temp_catalog() q = catalog.catalog() - q.add_function('f',string.upper) - q.add_function('f',string.lower) - q.add_function('ff',string.find) - q.add_function('ff',string.replace) - q.add_function('fff',string.atof) - q.add_function('fff',string.atoi) + q.add_function('f',str.upper) + q.add_function('f',str.lower) + q.add_function('ff',str.find) + q.add_function('ff',str.replace) + q.add_function('fff',float) + q.add_function('fff',int) del q # now we're gonna make a new catalog with same code @@ -516,10 +515,10 @@ def test_add_function_ordered(self): #assert_(funcs2 == [os.chdir,os.abort,string.replace,string.find]) #assert_(funcs3 == [re.purge,re.match,os.open, # os.access,string.atoi,string.atof]) - assert_(funcs1[:2] == [string.lower,string.upper]),repr(funcs1) - assert_(funcs2[:4] == [os.chdir,os.abort,string.replace,string.find]) + assert_(funcs1[:2] == [str.lower,str.upper]),repr(funcs1) + assert_(funcs2[:4] == [os.chdir,os.abort,str.replace,str.find]) assert_(funcs3[:6] == [re.purge,re.match,os.open, - os.access,string.atoi,string.atof]) + os.access,int,float]) cleanup_temp_dir(user_dir) cleanup_temp_dir(env_dir) diff --git a/weave/tests/test_ext_tools.py b/weave/tests/test_ext_tools.py index 21a02fe..3f3d3f6 100644 --- a/weave/tests/test_ext_tools.py +++ b/weave/tests/test_ext_tools.py @@ -1,7 +1,5 @@ from __future__ import absolute_import, print_function -import types - from numpy import arange, float32, float64 from numpy.testing import (TestCase, dec, assert_equal, assert_, run_module_suite) @@ -134,8 +132,8 @@ def test_assign_variable_types(self): bd = array_converter() bd.name, bd.var_type, bd.dims = 'b', float64, 1 - cd = c_spec.int_converter() - cd.name, cd.var_type = 'c', types.IntType + cd = c_spec.long_converter() + cd.name, cd.var_type = 'c', int desired = [ad,bd,cd] assert_equal(actual,desired) diff --git a/weave/tests/test_size_check.py b/weave/tests/test_size_check.py old mode 100644 new mode 100755 index e01cbc9..f894d3c --- a/weave/tests/test_size_check.py +++ b/weave/tests/test_size_check.py @@ -264,7 +264,7 @@ def test_1d_stride_12(self): def test_1d_random(self): # throw a bunch of different indexes at it for good measure. - choices = map(lambda x: repr(x),range(50)) + range(50) + ['']*50 + choices = [repr(x) for x in range(50)] + list(range(50)) + ['']*50 for i in range(100): try: beg = random.choice(choices) @@ -287,7 +287,7 @@ def test_2d_2(self): def test_2d_random(self): # throw a bunch of different indexes at it for good measure. - choices = map(lambda x: repr(x),range(50)) + range(50) + ['']*50 + choices = [repr(x) for x in range(50)] + list(range(50)) + ['']*50 for i in range(100): try: beg = random.choice(choices) @@ -307,7 +307,7 @@ def test_2d_random(self): def test_3d_random(self): # throw a bunch of different indexes at it for good measure. - choices = map(lambda x: repr(x),range(50)) + range(50) + ['']*50 + choices = [repr(x) for x in range(50)] + list(range(50)) + ['']*50 for i in range(100): try: idx = []