Skip to content

Commit 75d28ae

Browse files
committed
Precompile record format, and batch load all records
Since each row/record has the same format every time, instead of a regular unpack, precompile the format using struct.Struct to unpack faster. Leads to roughly x1.33 speedup. In addition, when users arent worried about memory (ie the "records" method), we can exploit this by reading all records to memory at once. Leads to 15-20x speedup.
1 parent c1b54f3 commit 75d28ae

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

shapefile.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
__version__ = "1.2.3"
1212

13-
from struct import pack, unpack, calcsize, error
13+
from struct import pack, unpack, calcsize, error, Struct
1414
import os
1515
import sys
1616
import time
@@ -472,6 +472,8 @@ def __dbfHeader(self):
472472
if terminator != b("\r"):
473473
raise ShapefileException("Shapefile dbf header lacks expected terminator. (likely corrupt?)")
474474
self.fields.insert(0, ('DeletionFlag', 'C', 1, 0))
475+
fmt,fmtSize = self.__recordFmt()
476+
self.__recStruct = Struct(fmt)
475477

476478
def __recordFmt(self):
477479
"""Calculates the size of a .shp geometry record."""
@@ -484,8 +486,7 @@ def __recordFmt(self):
484486
def __record(self):
485487
"""Reads and returns a dbf record row as a list of values."""
486488
f = self.__getFileObj(self.dbf)
487-
recFmt = self.__recordFmt()
488-
recordContents = unpack(recFmt[0], f.read(recFmt[1]))
489+
recordContents = self.__recStruct.unpack(f.read(self.__recStruct.size))
489490
if recordContents[0] != b(' '):
490491
# deleted record
491492
return None
@@ -538,7 +539,7 @@ def record(self, i=0):
538539
if not self.numRecords:
539540
self.__dbfHeader()
540541
i = self.__restrictIndex(i)
541-
recSize = self.__recordFmt()[1]
542+
recSize = self.__recStruct.size
542543
f.seek(0)
543544
f.seek(self.__dbfHeaderLength() + (i * recSize))
544545
return self.__record()
@@ -547,13 +548,11 @@ def records(self):
547548
"""Returns all records in a dbf file."""
548549
if not self.numRecords:
549550
self.__dbfHeader()
550-
records = []
551551
f = self.__getFileObj(self.dbf)
552552
f.seek(self.__dbfHeaderLength())
553-
for i in range(self.numRecords):
554-
r = self.__record()
555-
if r:
556-
records.append(r)
553+
flat = unpack(self.__recStruct.format * self.numRecords, f.read(self.__recStruct.size * self.numRecords))
554+
rowlen = len(self.fields) - 1
555+
records = list(izip(*(iter(flat),) * rowlen))
557556
return records
558557

559558
def iterRecords(self):

shapefile.pyc

354 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)