Merge branch 'py3'

AndreasHeger · May 8, 2017 · 4cbc8c3 · 4cbc8c3
2 parents bb185d7 + 73ce7d1
commit 4cbc8c3
Show file tree

Hide file tree

Showing 17 changed files with 352 additions and 275 deletions.
diff --git a/gat/Engine.pyx b/gat/Engine.pyx
diff --git a/gat/Experiment.py b/gat/Experiment.py
@@ -117,7 +117,7 @@ def getParams(options=None):
             result.append("# %-40s: %s" % (k, str(v).encode("string_escape")))
     else:
         vars = inspect.currentframe().f_back.f_locals
-        for var in filter(lambda x: re.match("param_", x), vars.keys()):
+        for var in [x for x in list(vars.keys()) if re.match("param_", x)]:
             result.append("# %-40s: %s" %
                           (var, str(vars[var]).encode("string_escape")))
 
@@ -133,7 +133,7 @@ def getFooter():
     return "# job finished in %i seconds at %s -- %s -- %s" %\
            (time.time() - global_starting_time,
             time.asctime(time.localtime(time.time())),
-            " ".join(map(lambda x: "%5.2f" % x, os.times()[:4])),
+            " ".join(["%5.2f" % x for x in os.times()[:4]]),
             global_id)
 
 
@@ -417,7 +417,7 @@ def Stop():
         global_options.stdlog.write(
             "######### Time spent in benchmarked functions ###################\n")
         global_options.stdlog.write("# function\tseconds\tpercent\n")
-        for key, value in global_benchmark.items():
+        for key, value in list(global_benchmark.items()):
             global_options.stdlog.write(
                 "# %s\t%6i\t%5.2f%%\n" % (key, value, (100.0 * float(value) / t)))
         global_options.stdlog.write(
@@ -452,8 +452,8 @@ def Stop():
                                      "host", "system", "release", "machine",
                                      "start", "end", "path", "cmd")) + "\n")
 
-        csystem, host, release, version, machine = map(str, os.uname())
-        uusr, usys, c_usr, c_sys = map(lambda x: "%5.2f" % x, os.times()[:4])
+        csystem, host, release, version, machine = list(map(str, os.uname()))
+        uusr, usys, c_usr, c_sys = ["%5.2f" % x for x in os.times()[:4]]
         t_end = time.time()
         c_wall = "%5.2f" % (t_end - global_starting_time)
 
@@ -483,7 +483,7 @@ def wrapper(*arg):
         t1 = time.time()
         res = func(*arg)
         t2 = time.time()
-        key = "%s:%i" % (func.func_name, func.func_code.co_firstlineno)
+        key = "%s:%i" % (func.__name__, func.__code__.co_firstlineno)
         global_benchmark[key] += t2 - t1
         global_options.stdlog.write(
             '## benchmark: %s completed in %6.4f s\n' % (key, (t2 - t1)))
@@ -620,18 +620,18 @@ def __setattr__(self, name, value):
         self._counts[name] = value
 
     def __str__(self):
-        return ", ".join("%s=%i" % x for x in self._counts.iteritems())
+        return ", ".join("%s=%i" % x for x in self._counts.items())
 
     def __iadd__(self, other):
         try:
-            for key, val in other.iteritems():
+            for key, val in other.items():
                 self._counts[key] += val
         except:
             raise TypeError("unknown type")
         return self
 
     def iteritems(self):
-        return self._counts.iteritems()
+        return iter(self._counts.items())
 
 
 def run(cmd):

diff --git a/gat/IO.py b/gat/IO.py
@@ -431,7 +431,7 @@ def outputMetrics(outfile, segments, workspace, track, section):
     .'''
 
     stats_per_isochore = []
-    for isochore, ss in segments.iteritems():
+    for isochore, ss in segments.items():
         stats = SegmentsSummary()
         stats.update(ss, workspace[isochore])
         stats_per_isochore.append(stats)

diff --git a/gat/IOTools.py b/gat/IOTools.py
@@ -200,7 +200,7 @@ def readMultiMap(infile,
         try:
             key = map_functions[0](d[columns[0]])
             val = map_functions[1](d[columns[1]])
-        except (ValueError, IndexError), msg:
+        except (ValueError, IndexError) as msg:
             raise ValueError("parsing error in line %s: %s" % (l[:-1], msg))
 
         if key not in m:
@@ -235,20 +235,20 @@ def readTable(file,
 
     """
 
-    lines = filter(lambda x: x[0] != "#", file.readlines())
+    lines = [x for x in file.readlines() if x[0] != "#"]
 
     if len(lines) == 0:
         return None, []
 
     if take == "all":
         num_cols = len(string.split(lines[0][:-1], "\t"))
-        take = range(0, num_cols)
+        take = list(range(0, num_cols))
     else:
         num_cols = len(take)
 
     if headers:
         headers = lines[0][:-1].split("\t")
-        headers = map(lambda x: headers[x], take)
+        headers = [headers[x] for x in take]
         del lines[0]
 
     num_rows = len(lines)
@@ -263,7 +263,7 @@ def readTable(file,
     max_data = None
     for l in lines:
         data = l[:-1].split("\t")
-        data = map(lambda x: data[x], take)
+        data = [data[x] for x in take]
 
         # try conversion. Unparseable fields set to missing_value
         for x in range(len(data)):
@@ -328,10 +328,10 @@ def getInvertedDictionary(dict, make_unique=False):
     """
     inv = {}
     if make_unique:
-        for k, v in dict.iteritems():
+        for k, v in dict.items():
             inv[v] = k
     else:
-        for k, v in dict.iteritems():
+        for k, v in dict.items():
             inv.setdefault(v, []).append(k)
     return inv
 
@@ -440,28 +440,28 @@ def __init__(self,
 
     def __del__(self):
         """close all open files."""
-        for file in self.mFiles.values():
+        for file in list(self.mFiles.values()):
             file.close()
 
     def __len__(self):
         return len(self.mCounts)
 
     def close(self):
         """close all open files."""
-        for file in self.mFiles.values():
+        for file in list(self.mFiles.values()):
             file.close()
 
     def values(self):
-        return self.mCounts.values()
+        return list(self.mCounts.values())
 
     def keys(self):
-        return self.mCounts.keys()
+        return list(self.mCounts.keys())
 
     def iteritems(self):
-        return self.mCounts.iteritems()
+        return iter(self.mCounts.items())
 
     def items(self):
-        return self.mCounts.items()
+        return list(self.mCounts.items())
 
     def __iter__(self):
         return self.mCounts.__iter__()
@@ -499,7 +499,7 @@ def write(self, identifier, line):
         if filename not in self.mFiles:
 
             if self.maxopen and len(self.mFiles) > self.maxopen:
-                for f in self.mFiles.values():
+                for f in list(self.mFiles.values()):
                     f.close()
                 self.mFiles = {}
 
@@ -509,7 +509,7 @@ def write(self, identifier, line):
 
         try:
             self.mFiles[filename].write(line)
-        except ValueError, msg:
+        except ValueError as msg:
             raise ValueError(
                 "error while writing to %s: msg=%s" % (filename, msg))
         self.mCounts[filename] += 1
@@ -518,7 +518,7 @@ def deleteFiles(self, min_size=0):
         """delete all files below a minimum size."""
 
         ndeleted = 0
-        for filename, counts in self.mCounts.items():
+        for filename, counts in list(self.mCounts.items()):
             if counts < min_size:
                 os.remove(filename)
                 ndeleted += 1
@@ -554,7 +554,7 @@ def close(self):
         if self.isClosed:
             raise IOError("write on closed FilePool in close()")
 
-        for filename, data in self.data.iteritems():
+        for filename, data in self.data.items():
             f = self.openFile(filename, "a")
             if self.mHeader:
                 f.write(self.mHeader)
@@ -613,7 +613,7 @@ def iterflattened(self):
         iterate through values with nested keys flattened into a tuple
         """
 
-        for key, value in self.iteritems():
+        for key, value in self.items():
             if isinstance(value, nested_dict):
                 for keykey, value in value.iterflattened():
                     yield (key,) + keykey, value

diff --git a/gat/PositionList.pyx b/gat/PositionList.pyx
@@ -3,7 +3,8 @@
 
 cimport cython
 
-from cpython cimport PyString_AsString, PyString_FromStringAndSize
+from cpython.bytes cimport PyBytes_AsString, PyBytes_FromStringAndSize
+from cpython cimport PyBytes_Check, PyUnicode_Check
 from libc.stdlib cimport qsort, calloc, malloc, realloc, free
 from libc.string cimport memcpy
 from libc.errno cimport errno
@@ -15,6 +16,20 @@ from posix.fcntl cimport O_CREAT, O_RDWR, O_RDONLY
 
 from SegmentList cimport Position, Segment, SegmentList
 
+cdef bytes force_bytes(object s, encoding="ascii"):
+    """convert string or unicode object to bytes, assuming
+    ascii encoding.
+    """
+    if s is None:
+        return None
+    elif PyBytes_Check(s):
+        return s
+    elif PyUnicode_Check(s):
+        return s.encode(encoding)
+    else:
+        raise TypeError("Argument must be string, bytes or unicode.")
+
+
 # trick to permit const void * in function definitions
 cdef extern from *:
     ctypedef void * const_void_ptr "const void*"
@@ -142,7 +157,7 @@ cdef class PositionList:
                 self.is_shared = True
                 self.is_slave = True
             else:
-                p = PyString_AsString(unreduce[6])
+                p = PyBytes_AsString(unreduce[6])
                 self.positions = <Position*>malloc(self.npositions * sizeof(Position))
                 memcpy(self.positions, p, cython.sizeof(Position) * self.npositions)
 
@@ -182,7 +197,7 @@ cdef class PositionList:
     def __reduce__(self):
         '''pickling function - returns class contents as a tuple.'''
 
-        cdef str data
+        cdef bytes data
 
         if self.shared_fd >= 0:
             return (buildPositionList, (self.npositions, 
@@ -194,7 +209,7 @@ cdef class PositionList:
                                         self.shared_fd))
 
         else:
-            data = PyString_FromStringAndSize(
+            data = PyBytes_FromStringAndSize(
                 <char*>self.positions, \
                 self.npositions * cython.sizeof(Position) * 2)
 

diff --git a/gat/SegmentList.pxd b/gat/SegmentList.pxd
@@ -20,6 +20,9 @@ cdef extern from "gat_utils.h":
     int toCompressedFile(unsigned char *, size_t, FILE *)
     int fromCompressedFile(unsigned char *, size_t, FILE *)
 
+cdef bytes force_bytes(object s, encoding=*)
+cdef force_str(object s, encoding=*)
+
 #####################################################
 #####################################################
 ## type definitions

diff --git a/gat/SegmentList.pyx b/gat/SegmentList.pyx
@@ -6,7 +6,9 @@ import random
 
 cimport cython
 
-from cpython cimport PyString_AsString, PyString_FromStringAndSize
+from cpython.version cimport PY_MAJOR_VERSION, PY_MINOR_VERSION
+from cpython.bytes cimport PyBytes_AsString, PyBytes_FromStringAndSize
+from cpython cimport PyBytes_Check, PyUnicode_Check
 from libc.stdlib cimport qsort, realloc, malloc, calloc, free
 from libc.stdint cimport UINT32_MAX
 from libc.string cimport memcpy, memmove
@@ -19,6 +21,33 @@ from posix.stat cimport S_IRUSR, S_IWUSR
 from posix.fcntl cimport O_CREAT, O_RDWR, O_RDONLY
 from posix.unistd cimport ftruncate
 
+
+cdef bytes force_bytes(object s, encoding="ascii"):
+    """convert string or unicode object to bytes, assuming
+    ascii encoding.
+    """
+    if s is None:
+        return None
+    elif PyBytes_Check(s):
+        return s
+    elif PyUnicode_Check(s):
+        return s.encode(encoding)
+    else:
+        raise TypeError("Argument must be string, bytes or unicode.")
+
+cdef force_str(object s, encoding="ascii"):
+    """Return s converted to str type of current Python
+    (bytes in Py2, unicode in Py3)"""
+    if s is None:
+        return None
+    if PY_MAJOR_VERSION < 3:
+        return s
+    elif PyBytes_Check(s):
+        return s.decode(encoding)
+    else:
+        # assume unicode
+        return s
+
 #####################################################
 ## numpy import
 ## both import and cimport are necessary
@@ -249,7 +278,7 @@ cdef class SegmentList:
                 self.is_shared = True
                 self.is_slave = True
             else:
-                p = PyString_AsString(unreduce[5])
+                p = PyBytes_AsString(unreduce[5])
                 self.segments = <Segment*>malloc(self.nsegments * sizeof(Segment))
                 memcpy(self.segments, p, cython.sizeof(Position) * 2 * self.nsegments)
 
@@ -285,7 +314,7 @@ cdef class SegmentList:
     def __reduce__(self):
         '''pickling function - returns class contents as a tuple.'''
 
-        cdef str data
+        cdef bytes data
 
         if self.shared_fd >= 0:
             return (buildSegmentList, (self.nsegments, 
@@ -296,7 +325,7 @@ cdef class SegmentList:
                                        self.shared_fd))
 
         else:
-            data = PyString_FromStringAndSize(
+            data = PyBytes_FromStringAndSize(
                 <char*>self.segments, \
                 self.nsegments * cython.sizeof(Position) * 2)
 
@@ -324,7 +353,8 @@ cdef class SegmentList:
             return
 
         cdef int fd
-        fd = shm_open( key, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)
+        key = force_bytes(key)
+        fd = shm_open(key, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)
         if fd == -1:
             error = errno
             raise OSError( "could not create shared memory at %s; ERRNO=%i" % (key, error ))
@@ -1684,18 +1714,29 @@ cdef class SegmentList:
         if key >= self.nsegments:
             raise IndexError("index out of range")
         return self.segments[key].start, self.segments[key].end
-
-    def __cmp__(self, SegmentList other):
+    
+    def compare(self, SegmentList other):
         cdef int idx
-        x = self.__len__().__cmp__(len(other))
-        if x != 0:
-            return x
+        cdef int l1 = self.__len__()
+        if other is None:
+            return -1
+        cdef int l2 = len(other)
+        if l2 - l1 != 0:
+            return l2 - l1
         for idx from 0 <= idx < self.nsegments:
             x = cmpSegmentsStartAndEnd(&self.segments[idx], &other.segments[idx])
             if x != 0:
                 return x
         return 0
 
+    def __richcmp__(self, SegmentList other, int op):
+        if op == 2:  # == operator
+            return self.compare(other) == 0
+        elif op == 3:  # != operator
+            return self.compare(other) != 0
+        else:
+            return NotImplemented
+
 
 def buildSegmentList(*args):
     '''pickling helper function.