1
+ import math
1
2
import os
2
3
import time
3
4
import shutil
@@ -28,7 +29,7 @@ def makedirs(path):
28
29
29
30
30
31
def key_filename (key ):
31
- return hashlib .sha1 (key .encode (' utf-8' )).hexdigest ()
32
+ return hashlib .sha1 (key .encode (" utf-8" )).hexdigest ()
32
33
33
34
34
35
def object_path (root , key ):
@@ -38,7 +39,7 @@ def object_path(root, key):
38
39
39
40
40
41
def stream_path (root , key ):
41
- return object_path (root , key ) + ' .stream'
42
+ return object_path (root , key ) + " .stream"
42
43
43
44
44
45
def is_safely_readable (path ):
@@ -50,11 +51,17 @@ def is_safely_readable(path):
50
51
51
52
52
53
def filesize (path ):
53
- return os .stat (path ).st_size
54
+ try :
55
+ blk_sz = os .statvfs (path ).f_bsize
56
+ sz = os .stat (path ).st_size
57
+ if sz == 0 :
58
+ return blk_sz
59
+ return blk_sz * math .ceil (sz / blk_sz )
60
+ except Exception :
61
+ return None
54
62
55
63
56
64
class CacheStore (object ):
57
-
58
65
def __init__ (self , root , max_size , echo , fill_factor = 0.8 ):
59
66
self .root = os .path .abspath (root )
60
67
self .tmproot = self ._init_temp (self .root )
@@ -79,7 +86,7 @@ def warn(self, ex, msg):
79
86
self .echo ("IO ERROR: (%s) %s" % (ex , msg ))
80
87
81
88
def _init_temp (self , root ):
82
- tmproot = os .path .join (root , ' tmp' )
89
+ tmproot = os .path .join (root , " tmp" )
83
90
if os .path .exists (tmproot ):
84
91
self .safe_fileop (shutil .rmtree , tmproot )
85
92
makedirs (tmproot )
@@ -94,18 +101,23 @@ def _init_gc(self, root):
94
101
self .safe_fileop (os .unlink , path )
95
102
else :
96
103
stat_res = self .safe_fileop (os .stat , path )
97
- if stat_res :
104
+ sz = filesize (path )
105
+ if stat_res and sz is not None :
98
106
_ , info = stat_res
99
107
if info .st_mtime == TIMESTAMP_FOR_DELETABLE :
100
- self .safe_fileop (os .unlink , path )
108
+ # If we can't unlink it, we add it to the gc_queue
109
+ # to try at least another time
110
+ if not self .safe_fileop (os .unlink , path ):
111
+ self .gc_queue [path ] = (time .time (), sz )
101
112
elif info .st_mtime == TIMESTAMP_FOR_DISPOSABLE :
102
- self .disposables_queue [path ] = info . st_size
113
+ self .disposables_queue [path ] = sz
103
114
else :
104
- objects .append ((info .st_mtime , (path , info . st_size )))
115
+ objects .append ((info .st_mtime , (path , sz )))
105
116
106
117
self .objects_queue .update (x for _ , x in sorted (objects ))
107
- self .total_size = sum (self .disposables_queue .values ()) + \
108
- sum (self .objects_queue .values ())
118
+ self .total_size = sum (self .disposables_queue .values ()) + sum (
119
+ self .objects_queue .values ()
120
+ )
109
121
110
122
# It is possible that the datastore contains more than gc_watermark
111
123
# bytes. To ensure that we start below the gc_watermark, we run the GC:
@@ -115,17 +127,19 @@ def _init_gc(self, root):
115
127
# yet, so we can safely delete the marked objects without a quarantine:
116
128
self ._gc_objects (quarantine = - 1 )
117
129
118
- self .echo ("Cache initialized with %d permanents objects, "
119
- "%d disposable objects, totaling %d bytes."
120
- % (len (self .objects_queue ),
121
- len (self .disposables_queue ),
122
- self .total_size ))
130
+ self .warn (
131
+ None ,
132
+ "Cache initialized with %d permanents objects, "
133
+ "%d disposable objects, totaling %d bytes."
134
+ % (len (self .objects_queue ), len (self .disposables_queue ), self .total_size )
135
+ )
123
136
124
137
def _gc_objects (self , quarantine = GC_MARKER_QUARANTINE ):
125
138
def mark_for_deletion (path , size ):
126
- self .safe_fileop (os .utime , path , (TIMESTAMP_FOR_DELETABLE ,
127
- TIMESTAMP_FOR_DELETABLE ))
128
- self .gc_queue [path ] = (time .time (), size )
139
+ if self .safe_fileop (
140
+ os .utime , path , (TIMESTAMP_FOR_DELETABLE , TIMESTAMP_FOR_DELETABLE )
141
+ ):
142
+ self .gc_queue [path ] = (time .time (), size )
129
143
130
144
# 1) delete marked objects that are past their quarantine period
131
145
limit = time .time () - quarantine
@@ -135,6 +149,13 @@ def mark_for_deletion(path, size):
135
149
if self .safe_fileop (os .unlink , path ):
136
150
del self .gc_queue [path ]
137
151
self .total_size -= size
152
+ else :
153
+ self .echo (
154
+ "Could not remove file at '%s' -- removing from GC" % path
155
+ )
156
+ # We still remove to prevent the garbage collector from
157
+ # being stuck a few lines below.
158
+ del self .gc_queue [path ]
138
159
else :
139
160
break
140
161
@@ -169,14 +190,15 @@ def ensure_path(self, path):
169
190
170
191
def open_tempdir (self , token , action_name , stream_key ):
171
192
self ._gc_objects ()
172
-
173
193
if self .total_size > self .max_size :
174
- self .warn (None , "Cache soft limit reached! Used %d bytes, max %s bytes"
175
- % (self .total_size , self .max_size ))
194
+ self .warn (
195
+ None ,
196
+ "Cache soft limit reached! Used %d bytes, max %s bytes"
197
+ % (self .total_size , self .max_size ),
198
+ )
176
199
177
200
try :
178
- tmp = tempfile .mkdtemp (prefix = 'cache_action_%s.' % token ,
179
- dir = self .tmproot )
201
+ tmp = tempfile .mkdtemp (prefix = "cache_action_%s." % token , dir = self .tmproot )
180
202
except Exception as ex :
181
203
msg = "Could not create a temp directory for request %s" % token
182
204
self .warn (ex , msg )
@@ -187,7 +209,7 @@ def open_tempdir(self, token, action_name, stream_key):
187
209
# make sure that the new symlink points at a valid (empty!)
188
210
# file by creating a dummy destination file
189
211
self .ensure_path (src )
190
- open_res = self .safe_fileop (open , dst , 'w' )
212
+ open_res = self .safe_fileop (open , dst , "w" )
191
213
if open_res :
192
214
_ , f = open_res
193
215
f .close ()
@@ -198,8 +220,7 @@ def open_tempdir(self, token, action_name, stream_key):
198
220
# simultaneously. We don't consider an existing
199
221
# symlink (errno 17) to be an error.
200
222
if ex .errno != 17 :
201
- err = "Could not create a symlink %s->%s" \
202
- % (src , dst )
223
+ err = "Could not create a symlink %s->%s" % (src , dst )
203
224
self .warn (ex , err )
204
225
except Exception as ex :
205
226
self .warn (ex , "Unknown error" )
@@ -227,6 +248,12 @@ def _insert(queue, key, value):
227
248
# previous entry first
228
249
queue .pop (key , None )
229
250
queue [key ] = value
251
+ # If we are inserting something in disposables_queue or objects_queue,
252
+ # we make sure it is no longer in the gc_queue. This can happen if, for
253
+ # example, an object is marked as deletable, is therefore not "readable"
254
+ # and is therefore re-created.
255
+ if key in self .gc_queue :
256
+ del self .gc_queue [key ]
230
257
231
258
disposables = frozenset (disposable_keys )
232
259
missing = []
@@ -243,21 +270,19 @@ def _insert(queue, key, value):
243
270
if os .path .exists (src ):
244
271
dst = object_path (self .root , key )
245
272
self .ensure_path (dst )
246
- stat_res = self .safe_fileop (os .stat , src )
247
- if stat_res and self .safe_fileop (os .rename , src , dst ):
248
- _ , info = stat_res
249
- size = info .st_size
273
+ sz = filesize (src )
274
+ if sz is not None and self .safe_fileop (os .rename , src , dst ):
250
275
if key in disposables :
251
276
# we proceed even if we fail to mark the object as
252
277
# disposable. It just means that during a possible
253
278
# restart the object is treated as a non-disposable
254
279
# object
255
280
tstamp = TIMESTAMP_FOR_DISPOSABLE
256
281
self .safe_fileop (os .utime , dst , (tstamp , tstamp ))
257
- _insert (self .disposables_queue , dst , size )
282
+ _insert (self .disposables_queue , dst , sz )
258
283
else :
259
- _insert (self .objects_queue , dst , size )
260
- self .total_size += size
284
+ _insert (self .objects_queue , dst , sz )
285
+ self .total_size += sz
261
286
else :
262
287
missing .append (key )
263
288
0 commit comments