Skip to content

Commit 01e6274

Browse files
committed
changes
1 parent ac5258d commit 01e6274

File tree

10 files changed

+358
-109
lines changed

10 files changed

+358
-109
lines changed

checkpoint/persist.py

+75-11
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
2+
import shutil
3+
import signal
24
import subprocess
35
import sys
46
import tempfile
57
import typing
6-
from typing import Any
7-
88
import pickle
99
import pathlib
1010
import hashlib
@@ -46,9 +46,8 @@ def run_instrumented_file(
4646

4747
class Loader:
4848
fuel: int
49-
step: int
5049

51-
restored_state: tuple[Any, ...]
50+
restored_state: tuple
5251
iterator: typing.Optional[typing.Iterable]
5352
i: int
5453
filename: pathlib.Path
@@ -72,7 +71,6 @@ def __init__(self, module_filename: str | pathlib.Path, env) -> None:
7271
self.fuel = read_fuel()
7372
self.i = -1
7473
self.printing_index = 1
75-
self.step = read_step()
7674
self.iterator = None
7775
self.restored_state = ()
7876

@@ -101,7 +99,7 @@ def iterate(self, iterable) -> typing.Iterable:
10199

102100
def commit(self, *args) -> None:
103101
self.i += 1
104-
if self.i % self.step in [0, 1]:
102+
if self.i % STEP_VALUE in [0, 1]:
105103
self.fuel -= 1
106104
if self.fuel <= 0:
107105
raise KeyboardInterrupt("Out of fuel")
@@ -161,13 +159,11 @@ def __init__(self, tag: str) -> None:
161159
input()
162160
else:
163161
break
164-
self.step = read_step()
165162
self.i = -1
166-
self.value = None
167163

168164
def commit(self) -> None:
169165
self.i += 1
170-
if self.i % self.step in [0, 1]:
166+
if self.i % STEP_VALUE in [0, 1]:
171167
# send commend to save_snapshot server to take snapshot
172168
self.socket.send(struct.pack("Q", self.i))
173169
# wait for snapshot to start, and continue after it's done
@@ -180,5 +176,73 @@ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
180176
self.socket.close()
181177

182178
def iterate(self, iterable):
183-
for self.value in iterable:
184-
yield self.value
179+
for value in iterable:
180+
yield value
181+
182+
183+
def diff_vm_snapshot(folder: pathlib.Path, i: int) -> int:
184+
folder = folder.as_posix()
185+
result = subprocess.run(
186+
[
187+
"./diff_vm_snapshot",
188+
f"{folder}/{i}.a.dump",
189+
f"{folder}/{i}.b.dump",
190+
"64",
191+
"remove",
192+
],
193+
capture_output=True,
194+
)
195+
if result.returncode != 0:
196+
raise RuntimeError("Failed to run diff_vm_snapshot", result)
197+
return int(result.stdout.strip())
198+
199+
200+
def diff_coredump(folder: pathlib.Path, i: int) -> int:
201+
folder = folder.as_posix()
202+
result = subprocess.run(
203+
[
204+
"./diff_coredump",
205+
f"{folder}/{i}.a.dump",
206+
f"{folder}/{i}.b.dump",
207+
],
208+
capture_output=True,
209+
)
210+
if result.returncode != 0:
211+
raise RuntimeError("Failed to run diff_coredump", result)
212+
return int(result.stdout.strip())
213+
214+
215+
def coredump(tag: str, index: int) -> None:
216+
pid = os.getpid()
217+
result = subprocess.run(["gcore" "-o" f"{tag}" f"{pid}"], shell=True)
218+
if result.returncode != 0:
219+
raise RuntimeError("Failed to run diff_coredump", result)
220+
os.rename(f"{tag}.{pid}", f"{tag}/{index}.core")
221+
222+
223+
def make_dumps_folder(tag: str) -> pathlib.Path:
224+
cwd = pathlib.Path.cwd()
225+
dumps_folder = cwd / "dumps" / tag
226+
shutil.rmtree(dumps_folder, ignore_errors=True)
227+
dumps_folder.mkdir(exist_ok=False, parents=True)
228+
return dumps_folder
229+
230+
231+
def make_results_folder(tag: str) -> pathlib.Path:
232+
cwd = pathlib.Path.cwd()
233+
results_folder = cwd / "results" / tag
234+
results_folder.mkdir(exist_ok=True, parents=True)
235+
return results_folder
236+
237+
238+
PID = os.getpid()
239+
STEP_VALUE = read_step()
240+
241+
242+
def sigint() -> None:
243+
os.kill(PID, signal.SIGINT)
244+
245+
246+
def self_coredump(i) -> None:
247+
if i % STEP_VALUE in [0, 1]:
248+
sigint()

experiment/k_means/proc.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from checkpoint.persist import self_coredump
2+
import numpy as np
3+
import sklearn.datasets
4+
5+
np.random.seed(42)
6+
7+
8+
def run(X: np.ndarray, k: int, max_iterations: int) -> np.ndarray:
9+
"""A simple clustering method that forms k clusters by iteratively reassigning
10+
samples to the closest centroids and after that moves the centroids to the center
11+
of the new formed clusters. Do K-Means clustering and return cluster indices
12+
@param X: np.ndarray
13+
The dataset to cluster, where each row is a sample and each column is a feature.
14+
@param k: int
15+
The number of clusters the algorithm will form.
16+
@param max_iterations: int
17+
The number of iterations the algorithm will run for if it does
18+
not converge before that.
19+
"""
20+
nsamples, features = X.shape
21+
centroids = X[np.random.choice(nsamples, k)]
22+
clusters = list[list[int]]()
23+
for i in range(max_iterations): # type: int
24+
self_coredump(i)
25+
clusters = [list[int]() for _ in range(k)]
26+
for sample_i in range(len(X)):
27+
r = np.argmin(np.linalg.norm(X[sample_i] - centroids, axis=1))
28+
clusters[r].append(sample_i)
29+
prev_centroids = centroids
30+
centroids = np.array([np.mean(X[cluster], axis=0) for cluster in clusters])
31+
diff = centroids - prev_centroids
32+
if not diff.any():
33+
break
34+
y_pred = np.zeros(nsamples)
35+
for cluster_i in range(len(clusters)):
36+
for sample_i in clusters[cluster_i]:
37+
y_pred[sample_i] = cluster_i
38+
return y_pred
39+
40+
41+
def compute_random(n_samples: int, k: int, plot: bool) -> None:
42+
X, y = sklearn.datasets.make_blobs(
43+
n_samples=n_samples, n_features=2, centers=k, cluster_std=1.8, shuffle=True
44+
)
45+
y_pred = run(X, k=k, max_iterations=1000)
46+
if plot:
47+
import matplotlib.pyplot as plt
48+
49+
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
50+
plt.show()
51+
else:
52+
print("Done.")
53+
54+
55+
if __name__ == "__main__":
56+
import argparse
57+
58+
parser = argparse.ArgumentParser()
59+
parser.add_argument("samples", type=int)
60+
parser.add_argument("k", type=int)
61+
parser.add_argument("--plot", action="store_true")
62+
args = parser.parse_args()
63+
compute_random(args.samples, args.k, args.plot)

pythia/type_system.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1804,7 +1804,10 @@ def visit_FunctionDef(self, fdef: ast.FunctionDef) -> FunctionType:
18041804
if update is not None:
18051805
assert isinstance(update, ast.Call)
18061806
assert len(update.args) == 1
1807-
update_type = self.expr_to_type(update.args[0])
1807+
update_arg = update.args[0]
1808+
if isinstance(update_arg, ast.Constant) and isinstance(update_arg.value, str):
1809+
update_arg = ast.parse(update_arg.s).body[0].value
1810+
update_type = self.expr_to_type(update_arg)
18081811
else:
18091812
update_type = None
18101813
# side_effect = parse_side_effect(fdef.body)

save_snapshot.py

+6-27
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,14 @@
1414
import asyncio
1515
import logging
1616
import os
17-
import pathlib
18-
import shutil
1917
import socket
2018
import struct
21-
import subprocess
2219
import argparse
2320
from concurrent.futures import ThreadPoolExecutor, Future
2421
from typing import Iterator
2522

2623
from checkpoint.qmp_client import SimpleQmpClient
24+
from checkpoint import persist
2725

2826
logging.basicConfig(level=logging.INFO)
2927

@@ -96,30 +94,10 @@ def __next__(self) -> int:
9694
return self.i
9795

9896

99-
def count_diff(folder: pathlib.Path, i: int) -> int:
100-
folder = folder.as_posix()
101-
result = subprocess.run(
102-
[
103-
"./count_diff",
104-
f"{folder}/{i}.a.dump",
105-
f"{folder}/{i}.b.dump",
106-
"64",
107-
"remove",
108-
],
109-
capture_output=True,
110-
)
111-
if result.returncode != 0:
112-
raise RuntimeError("Failed to run count_diff", result)
113-
return int(result.stdout.strip())
114-
115-
11697
async def relay_qmp_results(qmp_port: int, server: Server) -> None:
11798
tag = server.tag
11899
assert tag.replace("_", "").isalnum()
119-
cwd = pathlib.Path.cwd()
120-
dumps_folder = cwd / "dumps" / tag
121-
shutil.rmtree(dumps_folder, ignore_errors=True)
122-
os.makedirs(dumps_folder, exist_ok=False)
100+
dumps_folder = persist.make_dumps_folder(tag)
123101
async with SimpleQmpClient(qmp_port) as vm:
124102
await vm.dump(f"{dumps_folder}/0.a.dump")
125103
with ThreadPoolExecutor() as executor:
@@ -130,15 +108,16 @@ async def relay_qmp_results(qmp_port: int, server: Server) -> None:
130108
next_prev_file = dumps_folder / f"{i + 1}.a.dump"
131109
await vm.dump(current_next_file)
132110
os.link(current_next_file, next_prev_file)
133-
p: Future[int] = executor.submit(count_diff, dumps_folder, i)
111+
p: Future[int] = executor.submit(
112+
persist.diff_vm_snapshot, dumps_folder, i
113+
)
134114
if p is not None:
135115
ps.append(p)
136116
else:
137117
server.finish()
138118
os.unlink(next_prev_file)
139119

140-
results_folder = cwd / "results" / tag
141-
results_folder.mkdir(exist_ok=True, parents=True)
120+
results_folder = persist.make_results_folder(tag)
142121
version_number = len(
143122
[x for x in results_folder.iterdir() if x.name.startswith("vm_")]
144123
)

scripts/Makefile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
3+
.PHONY: all
4+
5+
all: diff_coredump diff_vm_snapshot
6+
7+
diff_coredump:
8+
gcc -O2 -std=c23 diff_coredump.c -o diff_coredump
9+
10+
11+
diff_vm_snapshot:
12+
g++ -O2 -std=c++20 diff_vm_snapshot.cpp -o diff_vm_snapshot
13+
14+
clean:
15+
rm diff_coredump diff_vm_snapshot
16+

scripts/all_diff.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
CHUNK_SIZE = 64
77

88

9-
def run_count_diff(file1, file2):
9+
def run_count_diff(file1: Path, file2: Path) -> int:
1010
cmd = f"./count_diff {file1} {file2} {CHUNK_SIZE}"
1111
output = os.popen(cmd).read().split()[0]
1212
return int(output)
1313

1414

15-
def main(dumpdir, chunk_size, max_workers):
15+
def main(dumpdir: str, max_workers):
1616
dump_files = list(sorted(Path(dumpdir).iterdir(), key=os.path.getmtime))
1717
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
1818
results = executor.map(run_count_diff, dump_files[:-1], dump_files[1:])
@@ -25,9 +25,8 @@ def main(dumpdir, chunk_size, max_workers):
2525
# Check if folder path is provided as a command-line argument
2626
parser = argparse.ArgumentParser(description="Compute count_diff for pairs of dump files")
2727
parser.add_argument("dump_dir", type=str, help="Directory containing the dump files")
28-
parser.add_argument("-c", "--chunk_size", type=int, default=64, help="Chunk size for comparison")
2928
parser.add_argument("-w", "--max_workers", type=int, default=10, help="Maximum number of worker threads")
3029
args = parser.parse_args()
3130

3231
CHUNK_SIZE = args.chunk_size
33-
main(args.dump_dir, args.chunk_size, args.max_workers)
32+
main(args.dump_dir, args.max_workers)

0 commit comments

Comments
 (0)