Skip to content

Commit 5544b89

Browse files
committed
Merge pull request #4417 from jreback/hdf_close
API: GH4409 HDFStore adds an is_open property / CLOSED message
2 parents 527db38 + 71efb43 commit 5544b89

File tree

5 files changed

+334
-112
lines changed

5 files changed

+334
-112
lines changed

doc/source/io.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,13 +1717,14 @@ Closing a Store, Context Manager
17171717
17181718
.. ipython:: python
17191719
1720-
# closing a store
17211720
store.close()
1721+
store
1722+
store.is_open
17221723
17231724
# Working with, and automatically closing the store with the context
17241725
# manager
17251726
with get_store('store.h5') as store:
1726-
store.keys()
1727+
store.keys()
17271728
17281729
.. ipython:: python
17291730
:suppress:

doc/source/release.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,21 @@ pandas 0.13
6565
an alias of iteritems used to get around ``2to3``'s changes).
6666
(:issue:`4384`, :issue:`4375`, :issue:`4372`)
6767
- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
68+
- ``HDFStore``
69+
70+
- added an ``is_open`` property to indicate if the underlying file handle is_open;
71+
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
72+
(:issue:`4409`)
73+
- a close of a ``HDFStore`` now will close that instance of the ``HDFStore``
74+
but will only close the actual file if the ref count (by ``PyTables``) w.r.t. all of the open handles
75+
are 0. Essentially you have a local instance of ``HDFStore`` referenced by a variable. Once you
76+
close it, it will report closed. Other references (to the same file) will continue to operate
77+
until they themselves are closed. Performing an action on a closed file will raise
78+
``ClosedFileError``
79+
- removed the ``_quiet`` attribute, replace by a ``DuplicateWarning`` if retrieving
80+
duplicate rows from a table (:issue:`4367`)
81+
- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will
82+
be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`)
6883

6984
**Experimental Features**
7085

doc/source/v0.13.0.txt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,44 @@ API changes
3030
an alias of iteritems used to get around ``2to3``'s changes).
3131
(:issue:`4384`, :issue:`4375`, :issue:`4372`)
3232
- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
33+
- ``HDFStore``
34+
35+
- added an ``is_open`` property to indicate if the underlying file handle is_open;
36+
a closed store will now report 'CLOSED' when viewing the store (rather than raising an error)
37+
(:issue:`4409`)
38+
- a close of a ``HDFStore`` now will close that instance of the ``HDFStore``
39+
but will only close the actual file if the ref count (by ``PyTables``) w.r.t. all of the open handles
40+
are 0. Essentially you have a local instance of ``HDFStore`` referenced by a variable. Once you
41+
close it, it will report closed. Other references (to the same file) will continue to operate
42+
until they themselves are closed. Performing an action on a closed file will raise
43+
``ClosedFileError``
44+
45+
.. ipython:: python
46+
47+
path = 'test.h5'
48+
df = DataFrame(randn(10,2))
49+
store1 = HDFStore(path)
50+
store2 = HDFStore(path)
51+
store1.append('df',df)
52+
store2.append('df2',df)
53+
54+
store1
55+
store2
56+
store1.close()
57+
store2
58+
store2.close()
59+
store2
60+
61+
.. ipython:: python
62+
:suppress:
63+
64+
import os
65+
os.remove(path)
66+
67+
- removed the ``_quiet`` attribute, replace by a ``DuplicateWarning`` if retrieving
68+
duplicate rows from a table (:issue:`4367`)
69+
- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will
70+
be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`)
3371

3472
Enhancements
3573
~~~~~~~~~~~~

pandas/io/pytables.py

Lines changed: 71 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -61,26 +61,35 @@ def _ensure_encoding(encoding):
6161
return encoding
6262

6363

64-
class IncompatibilityWarning(Warning):
64+
class PossibleDataLossError(Exception):
65+
pass
66+
67+
class ClosedFileError(Exception):
6568
pass
6669

70+
class IncompatibilityWarning(Warning):
71+
pass
6772

6873
incompatibility_doc = """
6974
where criteria is being ignored as this version [%s] is too old (or
7075
not-defined), read the file in and write it out to a new file to upgrade (with
7176
the copy_to method)
7277
"""
7378

74-
7579
class AttributeConflictWarning(Warning):
7680
pass
7781

78-
7982
attribute_conflict_doc = """
8083
the [%s] attribute of the existing index is [%s] which conflicts with the new
8184
[%s], resetting the attribute to None
8285
"""
8386

87+
class DuplicateWarning(Warning):
88+
pass
89+
90+
duplicate_doc = """
91+
duplicate entries in table, taking most recently appended
92+
"""
8493

8594
performance_doc = """
8695
your performance may suffer as PyTables will pickle object types that it cannot
@@ -263,7 +272,6 @@ class HDFStore(StringMixin):
263272
>>> bar = store['foo'] # retrieve
264273
>>> store.close()
265274
"""
266-
_quiet = False
267275

268276
def __init__(self, path, mode=None, complevel=None, complib=None,
269277
fletcher32=False):
@@ -281,11 +289,12 @@ def __init__(self, path, mode=None, complevel=None, complib=None,
281289
self._complib = complib
282290
self._fletcher32 = fletcher32
283291
self._filters = None
284-
self.open(mode=mode, warn=False)
292+
self.open(mode=mode)
285293

286294
@property
287295
def root(self):
288296
""" return the root node """
297+
self._check_if_open()
289298
return self._handle.root
290299

291300
def __getitem__(self, key):
@@ -299,6 +308,7 @@ def __delitem__(self, key):
299308

300309
def __getattr__(self, name):
301310
""" allow attribute access to get stores """
311+
self._check_if_open()
302312
try:
303313
return self.get(name)
304314
except:
@@ -321,24 +331,26 @@ def __len__(self):
321331

322332
def __unicode__(self):
323333
output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
324-
325-
if len(list(self.keys())):
326-
keys = []
327-
values = []
328-
329-
for k in self.keys():
330-
try:
331-
s = self.get_storer(k)
332-
if s is not None:
333-
keys.append(pprint_thing(s.pathname or k))
334-
values.append(pprint_thing(s or 'invalid_HDFStore node'))
335-
except Exception as detail:
336-
keys.append(k)
337-
values.append("[invalid_HDFStore node: %s]" % pprint_thing(detail))
338-
339-
output += adjoin(12, keys, values)
334+
if self.is_open:
335+
if len(list(self.keys())):
336+
keys = []
337+
values = []
338+
339+
for k in self.keys():
340+
try:
341+
s = self.get_storer(k)
342+
if s is not None:
343+
keys.append(pprint_thing(s.pathname or k))
344+
values.append(pprint_thing(s or 'invalid_HDFStore node'))
345+
except Exception as detail:
346+
keys.append(k)
347+
values.append("[invalid_HDFStore node: %s]" % pprint_thing(detail))
348+
349+
output += adjoin(12, keys, values)
350+
else:
351+
output += 'Empty'
340352
else:
341-
output += 'Empty'
353+
output += "File is CLOSED"
342354

343355
return output
344356

@@ -358,7 +370,7 @@ def items(self):
358370

359371
iteritems = items
360372

361-
def open(self, mode='a', warn=True):
373+
def open(self, mode='a'):
362374
"""
363375
Open the file in the specified mode
364376
@@ -367,19 +379,23 @@ def open(self, mode='a', warn=True):
367379
mode : {'a', 'w', 'r', 'r+'}, default 'a'
368380
See HDFStore docstring or tables.openFile for info about modes
369381
"""
370-
self._mode = mode
371-
if warn and mode == 'w': # pragma: no cover
372-
while True:
373-
if compat.PY3:
374-
raw_input = input
375-
response = raw_input("Re-opening as mode='w' will delete the "
376-
"current file. Continue (y/n)?")
377-
if response == 'y':
378-
break
379-
elif response == 'n':
380-
return
381-
if self._handle is not None and self._handle.isopen:
382-
self._handle.close()
382+
if self._mode != mode:
383+
384+
# if we are chaning a write mode to read, ok
385+
if self._mode in ['a','w'] and mode in ['r','r+']:
386+
pass
387+
elif mode in ['w']:
388+
389+
# this would truncate, raise here
390+
if self.is_open:
391+
raise PossibleDataLossError("Re-opening the file [{0}] with mode [{1}] "
392+
"will delete the current file!".format(self._path,self._mode))
393+
394+
self._mode = mode
395+
396+
# close and reopen the handle
397+
if self.is_open:
398+
self.close()
383399

384400
if self._complib is not None:
385401
if self._complevel is None:
@@ -401,13 +417,24 @@ def close(self):
401417
"""
402418
Close the PyTables file handle
403419
"""
404-
self._handle.close()
420+
if self._handle is not None:
421+
self._handle.close()
422+
self._handle = None
423+
424+
@property
425+
def is_open(self):
426+
"""
427+
return a boolean indicating whether the file is open
428+
"""
429+
if self._handle is None: return False
430+
return bool(self._handle.isopen)
405431

406432
def flush(self):
407433
"""
408434
Force all buffered modifications to be written to disk
409435
"""
410-
self._handle.flush()
436+
if self._handle is not None:
437+
self._handle.flush()
411438

412439
def get(self, key):
413440
"""
@@ -748,11 +775,13 @@ def create_table_index(self, key, **kwargs):
748775
def groups(self):
749776
""" return a list of all the top-level nodes (that are not themselves a pandas storage object) """
750777
_tables()
778+
self._check_if_open()
751779
return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(
752780
g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u('table')) ]
753781

754782
def get_node(self, key):
755783
""" return the node with the key or None if it does not exist """
784+
self._check_if_open()
756785
try:
757786
if not key.startswith('/'):
758787
key = '/' + key
@@ -811,6 +840,9 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None
811840
return new_store
812841

813842
###### private methods ######
843+
def _check_if_open(self):
844+
if not self.is_open:
845+
raise ClosedFileError("{0} file is not open!".format(self._path))
814846

815847
def _create_storer(self, group, value = None, table = False, append = False, **kwargs):
816848
""" return a suitable Storer class to operate """
@@ -1647,10 +1679,6 @@ def pathname(self):
16471679
def _handle(self):
16481680
return self.parent._handle
16491681

1650-
@property
1651-
def _quiet(self):
1652-
return self.parent._quiet
1653-
16541682
@property
16551683
def _filters(self):
16561684
return self.parent._filters
@@ -2918,9 +2946,7 @@ def read(self, where=None, columns=None, **kwargs):
29182946
objs.append(obj)
29192947

29202948
else:
2921-
if not self._quiet: # pragma: no cover
2922-
print ('Duplicate entries in table, taking most recently '
2923-
'appended')
2949+
warnings.warn(duplicate_doc, DuplicateWarning)
29242950

29252951
# reconstruct
29262952
long_index = MultiIndex.from_arrays(

0 commit comments

Comments
 (0)