Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

partial call chain and on-the-fly verification #29

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2a94736
Include 900+ PnetCDF functions and 700+ HDF5 functions
wangvsa Aug 14, 2024
df8b872
Make the code compatible with all HDF5 version; tested with 1.8, 1.10…
wangvsa Aug 15, 2024
bf3f52b
Merge pull request #27 from uiuc-hpc/dev
wangvsa Aug 15, 2024
c323ee2
Fix hdf5 version issue
wangvsa Aug 15, 2024
dcb0a32
Bug fix
wangvsa Aug 15, 2024
a55a5ba
Add NetCDF support (300+ functions)
wangvsa Aug 15, 2024
9f6901c
Trace netcdf by default
wangvsa Aug 15, 2024
e2df24a
Check and report cycles
wangvsa Aug 17, 2024
59ed717
Add two MPI_File_iwrite functions
wangvsa Aug 18, 2024
11b5031
add flag for summary
lalilalalalu Aug 19, 2024
b3efdc7
generic match
lalilalalalu Aug 20, 2024
dbff5be
merge with current
lalilalalalu Aug 20, 2024
4d802a9
Provide simplifed Record for use by VerifyIO
wangvsa Aug 20, 2024
7c6a939
add generic mapping
lalilalalalu Aug 20, 2024
b07153e
check the mpi call class
Aug 21, 2024
a36f36e
changes@
Aug 22, 2024
72851cd
Merge pull request #28 from lalilalalalu/mydev
wangvsa Aug 22, 2024
64ae334
Bug fix
wangvsa Aug 22, 2024
e0053a4
Filtering MPI call arguments and optmizing MPI matching code
wangvsa Aug 22, 2024
7170418
Parse srouce and tag for recv calls with ANY_SOURCE and ANY_TAG
wangvsa Aug 23, 2024
7bab963
Bug fix
wangvsa Aug 23, 2024
8b6b284
Remove debug output
wangvsa Aug 23, 2024
4484f17
Update output
wangvsa Aug 23, 2024
842a7f1
Fix typo
wangvsa Aug 26, 2024
687481c
Solve the bug when array size is 0
wangvsa Aug 27, 2024
c691e87
Add MPI_Issend support
wangvsa Aug 27, 2024
2c754de
Bug fix
wangvsa Aug 27, 2024
7460ef5
Bug fix
wangvsa Aug 28, 2024
52ac5d2
Fix issues of matching MPI_Issend
wangvsa Aug 30, 2024
f3ca76e
Clean the code and add document
wangvsa Aug 30, 2024
2a66aa4
Bug fix
wangvsa Sep 2, 2024
1f16acb
Clean up code
wangvsa Sep 2, 2024
bc877c3
Consider fcntl/flock for synchronizing conflicting operations
wangvsa Sep 2, 2024
2971cd1
Include fcntl call
wangvsa Sep 2, 2024
3f5883b
Handle corner case where each rank has only a single mpi node in the …
wangvsa Sep 2, 2024
70aeb54
Include metadata operations for commit/session semantics check
wangvsa Sep 3, 2024
19ee405
Add fclose/fopen
wangvsa Sep 3, 2024
6621ef8
merged
Sep 3, 2024
3dd75ac
change arg name
Sep 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Check and report cycles
Signed-off-by: Chen Wang <wangvsa@gmail.com>
  • Loading branch information
wangvsa committed Aug 17, 2024
commit e2df24a5c3fc295e8c8e1eadc43a139fa9c02394
4 changes: 2 additions & 2 deletions tools/verifyio/match_mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def read_one_mpi_call(self, rank, seq_id, record):
# str first
args = []
for i in range(record.arg_count):
arg = record.args[i].decode("utf-8")
arg = record.args[i].decode("utf-8", "ignore")
args.append(arg)

src, dst, stag, rtag = None, None, None, None
Expand Down Expand Up @@ -433,7 +433,7 @@ def add_nodes_to_edge(edge, call):
coll_call.matched = True

mpi_call.matched = True
#print("match collective:", mpi_call.func, "root:", mpi_call.src, mpi_call.comm)
#print("match collective:", mpi_call.func, "root:", mpi_call.src, mpi_call.comm, mpi_call.req, mpi_call.reqflag)
return edge

#@profile
Expand Down
9 changes: 7 additions & 2 deletions tools/verifyio/verifyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,16 @@ def get_conflict_info(nodes: list, reader: RecorderReader, summary=None, show_de

t1 = time.time()
G = VerifyIOGraph(all_nodes, mpi_edges, include_vc=True)
G.run_vector_clock()
#G.run_transitive_closure()
t2 = time.time()
print("build happens-before graph: %.3f secs, nodes: %d" %((t2-t1), G.num_nodes()))

# Correct code (traces) should result in
# a DAG without any cycles
if G.check_cycles(): quit()

G.run_vector_clock()
#G.run_transitive_closure()

# G.plot_graph("vgraph.jpg")

t1 = time.time()
Expand Down
27 changes: 25 additions & 2 deletions tools/verifyio/verifyio_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def next_hb_node(self, current, funcs, target_rank):
nodes = self.nodes[target_rank]
for target in nodes:
if (target.func in funcs) and (self.has_path(current, target)):
targest.append(target)
target.append(target)
break
return target

Expand Down Expand Up @@ -173,7 +173,7 @@ def __build_graph(self, all_nodes, mpi_edges, include_vc):
# have added all nodes. We use this function
# to add edges of matching MPI calls
ghost_node_index = 0
for edge in mpi_edges:
for edge in mpi_edges[0:161]:
head, tail = edge.head, edge.tail
# all-to-all
# TODO is there a many-to-many MPI call that
Expand Down Expand Up @@ -214,6 +214,7 @@ def __build_graph(self, all_nodes, mpi_edges, include_vc):

# many-to-one, e.g., reduce
elif edge.call_type == MPICallType.MANY_TO_ONE:
print(head, tail)
for h in head:
self.add_edge(h, tail)
# one-to-many, e.g., bcast
Expand All @@ -229,3 +230,25 @@ def __build_graph(self, all_nodes, mpi_edges, include_vc):
#print("Generate transitive closure")
#tc = nx.transitive_closure(self.G)

# Detect cycles of the graph
# correct code should contain no cycles.
# incorrect code, e.g., with unmatched collective calls
# may have cycles
# e.g., pnetcdf/test/testcases/test-varm.c
# it uses ncmpi_wait() call, which may result in
# rank0 calling MPI_File_write_at_all(), and
# rank1 calling MPI_File_write_all()
#
# Our verification algorithm assumes the graph
# has no code, so we need do this check first.
def check_cycles(self):
has_cycles = False
try:
cycle = nx.find_cycle(self.G)
print("Generated graph contain cycles. Original code may have bugs.")
print(cycle)
has_cycles = True
except nx.exception.NetworkXNoCycle:
pass
return has_cycles