|
| 1 | +""" |
| 2 | +You can store Pandas DataFrame objects in Hazelcast cluster and retrieve with low-latency. |
| 3 | +There are multiple serialization methods for DataFrame objects. |
| 4 | +Hazelcast uses pickle serialization as default for various Python objects including DataFrame. |
| 5 | +So, you can simply put your data directly using default pickle serialization without any conversions. |
| 6 | +
|
| 7 | +patients.put(1, df) |
| 8 | +
|
| 9 | +Alternatively, you can convert your DataFrame to JSON, CSV or a dict using to_json(), to_csv() and to_dict() methods. |
| 10 | +Note that JSON or CSV serializations returns string representation of DataFrame in requested format. |
| 11 | +
|
| 12 | +Convert to CSV: df = df.to_csv() |
| 13 | +Convert to JSON: df = df.to_json() |
| 14 | +Convert to dict: df = df.to_dict() |
| 15 | +
|
| 16 | +If you prefer to use these converted representations, you need to re-create DataFrame object since they are not stored |
| 17 | +as DataFrame object in Hazelcast cluster. For CSV, JSON and dict conversions, use following methods while retrieving: |
| 18 | +
|
| 19 | +Create from CSV: df = pd.from_csv(StringIO(patients.get(1))) |
| 20 | +Create from JSON: df = pd.read_json(patients.get(1)) |
| 21 | +Create from dict object: df = pd.DataFrame(patients.get(1)) |
| 22 | +
|
| 23 | +In addition to methods above, you can write your own custom serializer for DatFrame objects. For more information about |
| 24 | +Pyton client serialization methods, see https://hazelcast.readthedocs.io/en/stable/serialization.html# |
| 25 | +""" |
| 26 | + |
| 27 | +import hazelcast |
| 28 | +from hazelcast.core import HazelcastJsonValue |
| 29 | +from matplotlib import pyplot as plt |
| 30 | +import numpy as np |
| 31 | +import pandas as pd |
| 32 | + |
| 33 | +# Create Hazelcast client |
| 34 | +client = hazelcast.HazelcastClient() |
| 35 | + |
| 36 | +# Get an IMap for storing patient's DataFrame objects |
| 37 | +patients = client.get_map("patients").blocking() |
| 38 | + |
| 39 | +# Store the blood pressure and heart rate data of fifty patients in a DataFrame and load it to Hazelcast cluster as JSON |
| 40 | +for pid in range(0, 50): |
| 41 | + # Create DataFrame with random values |
| 42 | + df = pd.DataFrame( |
| 43 | + data={ |
| 44 | + "blood_pressure": np.random.randint(80, 120, size=(75,)), |
| 45 | + "heart_rate": np.random.randint(60, 100, size=(75,)), |
| 46 | + }, |
| 47 | + index=pd.date_range("2023-01-15", periods=75, freq="H"), |
| 48 | + ) |
| 49 | + # Load DataFrame to Hazelcast cluster as HazelcastJsonValue |
| 50 | + patients.put(pid, HazelcastJsonValue(df.to_json())) |
| 51 | + |
| 52 | +pid = np.random.randint(0, 50) |
| 53 | +# Retrieve the data of a random patient |
| 54 | +df = pd.read_json(patients.get(pid).to_string()) |
| 55 | + |
| 56 | +# Plot the data |
| 57 | +df.plot(use_index=True, y=["blood_pressure", "heart_rate"], figsize=(15, 5), kind="line") |
| 58 | +plt.title(f"Blood Pressure and Heart Rate Plot of Patient-{pid}") |
| 59 | +plt.show() |
0 commit comments