11import abc
22import json
3+ import gzip
4+ import pickle
5+ import sqlite3
36
47import sqlitedict
58from autoextract .request import Request
@@ -39,8 +42,30 @@ def __str__(self):
3942
4043
4144class AutoExtractCache (_Cache ):
42- def __init__ (self , path ):
43- self .db = sqlitedict .SqliteDict (path , autocommit = True )
45+ def __init__ (self , path , * , compressed = True ):
46+ self .compressed = compressed
47+ tablename = 'responses_gzip' if compressed else 'responses'
48+ self .db = sqlitedict .SqliteDict (path ,
49+ tablename = tablename ,
50+ autocommit = True ,
51+ encode = self .encode ,
52+ decode = self .decode )
53+
54+ def encode (self , obj ):
55+ # based on sqlitedict.encode
56+ data = pickle .dumps (obj , pickle .HIGHEST_PROTOCOL )
57+ if self .compressed :
58+ data = gzip .compress (data , compresslevel = 3 )
59+ return sqlite3 .Binary (data )
60+
61+ def decode (self , obj ):
62+ # based on sqlitedict.decode
63+ data = bytes (obj )
64+ if self .compressed :
65+ # gzip is slightly less efficient than raw zlib, but it does
66+ # e.g. crc checks out of box
67+ data = gzip .decompress (data )
68+ return pickle .loads (data )
4469
4570 @classmethod
4671 def fingerprint (cls , request : Request ) -> str :
@@ -51,7 +76,9 @@ def fingerprint(cls, request: Request) -> str:
5176 )
5277
5378 def __str__ (self ):
54- return f"AutoExtractCache <{ self .db .filename } | { len (self .db )} records>"
79+ return f"AutoExtractCache <{ self .db .filename } | " \
80+ f"compressed: { self .compressed } | " \
81+ f"{ len (self .db )} records>"
5582
5683 def __getitem__ (self , fingerprint : str ):
5784 return self .db [fingerprint ]
0 commit comments