|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
15 | 15 | import copy
|
| 16 | +import gzip |
| 17 | +import pickle |
16 | 18 | import uuid
|
17 | 19 | from abc import abstractmethod
|
| 20 | +from pathlib import Path |
18 | 21 | from typing import (
|
19 | 22 | List,
|
20 | 23 | Optional,
|
@@ -162,19 +165,44 @@ def pack_name(self, pack_name: str):
|
162 | 165 | self._meta.pack_name = pack_name
|
163 | 166 |
|
164 | 167 | @classmethod
|
165 |
| - def _deserialize(cls, string: str) -> "PackType": |
| 168 | + def _deserialize( |
| 169 | + cls, |
| 170 | + data_source: Union[Path, str], |
| 171 | + serialize_method: str = "jsonpickle", |
| 172 | + zip_pack: bool = False, |
| 173 | + ) -> "PackType": |
166 | 174 | """
|
167 | 175 | This function should deserialize a Pack from a string. The
|
168 |
| - implementation should decide the specific pack type. |
| 176 | + implementation should decide the specific pack type. |
169 | 177 |
|
170 | 178 | Args:
|
171 |
| - string: The serialized string to be deserialized. |
| 179 | + data_source: The data path containing pack data. The content |
| 180 | + of the data could be string or bytes depending on the method of |
| 181 | + serialization. |
| 182 | + serialize_method: The method used to serialize the data, this |
| 183 | + should be the same as how serialization is done. The current |
| 184 | + options are "jsonpickle" and "pickle". The default method |
| 185 | + is "jsonpickle". |
| 186 | + zip_pack: Boolean value indicating whether the input source is |
| 187 | + zipped. |
172 | 188 |
|
173 | 189 | Returns:
|
174 |
| - An pack object deserialized from the string. |
| 190 | + An pack object deserialized from the data. |
175 | 191 | """
|
176 |
| - pack = jsonpickle.decode(string) |
177 |
| - return pack |
| 192 | + _open = gzip.open if zip_pack else open |
| 193 | + |
| 194 | + if serialize_method == "jsonpickle": |
| 195 | + with _open(data_source, mode="rt") as f: # type: ignore |
| 196 | + pack = cls.from_string(f.read()) |
| 197 | + else: |
| 198 | + with _open(data_source, mode="rb") as f: # type: ignore |
| 199 | + pack = pickle.load(f) |
| 200 | + |
| 201 | + return pack # type: ignore |
| 202 | + |
| 203 | + @classmethod |
| 204 | + def from_string(cls, data_content: str) -> "BasePack": |
| 205 | + return jsonpickle.decode(data_content) |
178 | 206 |
|
179 | 207 | @abstractmethod
|
180 | 208 | def delete_entry(self, entry: EntryType):
|
@@ -238,13 +266,76 @@ def add_all_remaining_entries(self, component: Optional[str] = None):
|
238 | 266 | self.add_entry(entry, c_)
|
239 | 267 | self._pending_entries.clear()
|
240 | 268 |
|
241 |
| - def serialize(self, drop_record: Optional[bool] = False) -> str: |
242 |
| - r"""Serializes a pack to a string.""" |
| 269 | + def to_string( |
| 270 | + self, |
| 271 | + drop_record: Optional[bool] = False, |
| 272 | + json_method: str = "jsonpickle", |
| 273 | + indent: Optional[int] = None, |
| 274 | + ) -> str: |
| 275 | + """ |
| 276 | + Return the string representation (json encoded) of this method. |
| 277 | +
|
| 278 | + Args: |
| 279 | + drop_record: Whether to drop the creation records, default is False. |
| 280 | + json_method: What method is used to convert data pack to json. |
| 281 | + Only supports `json_pickle` for now. Default value is |
| 282 | + `json_pickle`. |
| 283 | + indent: The indent used for json string. |
| 284 | +
|
| 285 | + Returns: String representation of the data pack. |
| 286 | + """ |
| 287 | + if drop_record: |
| 288 | + self._creation_records.clear() |
| 289 | + self._field_records.clear() |
| 290 | + if json_method == "jsonpickle": |
| 291 | + return jsonpickle.encode(self, unpicklable=True, indent=indent) |
| 292 | + else: |
| 293 | + raise ValueError(f"Unsupported JSON method {json_method}.") |
| 294 | + |
| 295 | + def serialize( |
| 296 | + self, |
| 297 | + output_path: Union[str, Path], |
| 298 | + zip_pack: bool = False, |
| 299 | + drop_record: bool = False, |
| 300 | + serialize_method: str = "jsonpickle", |
| 301 | + indent: Optional[int] = None, |
| 302 | + ): |
| 303 | + r""" |
| 304 | + Serializes the data pack to the provided path. The output of this |
| 305 | + function depends on the serialization method chosen. |
| 306 | +
|
| 307 | + Args: |
| 308 | + output_path: The path to write data to. |
| 309 | + zip_pack: Whether to compress the result with `gzip`. |
| 310 | + drop_record: Whether to drop the creation records, default is False. |
| 311 | + serialize_method: The method used to serialize the data. Currently |
| 312 | + supports "jsonpickle" (outputs str) and Python's built-in |
| 313 | + "pickle" (outputs bytes). |
| 314 | + indent: Whether to indent the file if written as JSON. |
| 315 | +
|
| 316 | + Returns: Results of serialization. |
| 317 | + """ |
| 318 | + if zip_pack: |
| 319 | + _open = gzip.open |
| 320 | + else: |
| 321 | + _open = open # type:ignore |
| 322 | + |
243 | 323 | if drop_record:
|
244 | 324 | self._creation_records.clear()
|
245 | 325 | self._field_records.clear()
|
246 | 326 |
|
247 |
| - return jsonpickle.encode(self, unpicklable=True) |
| 327 | + if serialize_method == "pickle": |
| 328 | + with _open(output_path, mode="wb") as pickle_out: |
| 329 | + pickle.dump(self, pickle_out) # type:ignore |
| 330 | + elif serialize_method == "jsonpickle": |
| 331 | + with _open(output_path, mode="wt", encoding="utf-8") as json_out: |
| 332 | + json_out.write( |
| 333 | + self.to_string(drop_record, "jsonpickle", indent=indent) |
| 334 | + ) |
| 335 | + else: |
| 336 | + raise NotImplementedError( |
| 337 | + f"Unsupported serialization method {serialize_method}" |
| 338 | + ) |
248 | 339 |
|
249 | 340 | def view(self):
|
250 | 341 | return copy.deepcopy(self)
|
@@ -457,22 +548,6 @@ def get_ids_from(self, components: List[str]) -> Set[int]:
|
457 | 548 | valid_component_id |= self.get_ids_by_creator(component)
|
458 | 549 | return valid_component_id
|
459 | 550 |
|
460 |
| - def get_ids_by_type_subtype(self, entry_type: Type[EntryType]) -> Set[int]: |
461 |
| - r"""Look up the type_index with key ``entry_type``. |
462 |
| -
|
463 |
| - Args: |
464 |
| - entry_type: The type of the entry you are looking for. |
465 |
| -
|
466 |
| - Returns: |
467 |
| - A set of entry ids. The entries are instances of `entry_type` ( |
468 |
| - and also includes instances of the subclasses of `entry_type`). |
469 |
| - """ |
470 |
| - subclass_index: Set[int] = set() |
471 |
| - for index_key, index_val in self._index.iter_type_index(): |
472 |
| - if issubclass(index_key, entry_type): |
473 |
| - subclass_index.update(index_val) |
474 |
| - return subclass_index |
475 |
| - |
476 | 551 | def _expand_to_sub_types(self, entry_type: Type[EntryType]) -> Set[Type]:
|
477 | 552 | """
|
478 | 553 | Return all the types and the sub types that inherit from the provided
|
@@ -511,7 +586,7 @@ def get_entries_of(
|
511 | 586 | for tid in self._index.query_by_type(entry_type):
|
512 | 587 | yield self.get_entry(tid)
|
513 | 588 | else:
|
514 |
| - for tid in self.get_ids_by_type_subtype(entry_type): |
| 589 | + for tid in self._index.query_by_type_subtype(entry_type): |
515 | 590 | yield self.get_entry(tid)
|
516 | 591 |
|
517 | 592 | @classmethod
|
|
0 commit comments