Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 93 additions & 119 deletions graph_net/tools/generate_subgraph_dataset.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#!/bin/bash
set -x

MIN_SEQ_OPS=${1:-16}
MIN_SEQ_OPS=${1:-4}
MAX_SEQ_OPS=${2:-64}
GPU_ID=${3:-0}
LIMITS_HANDLED_MODELS=${4:-40}

OP_RANGE=$MIN_SEQ_OPS-$MAX_SEQ_OPS

Expand All @@ -14,51 +13,39 @@ GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(
RESUME="true"

DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace
DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/device_rewrited
DIMENSION_GENERALIZER_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/dimension_generalized_samples
OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/sample_op_names
SPLIT_POINTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/split_points
RANGE_DECOMPOSE_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/range_decompose
GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/graph_var_renamed
DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/deduplicated
CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/cumsum_num_kernels
FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_ranges
GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/grouped_fusible_subgraph_ranges
FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_samples
RENAMED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/renamed_fusible_subgraphs
DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/deduplicated_fusible_subgraphs
DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/workspace_dimension_subgraph_samples
UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/unittests
DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/01_device_rewrited_samples
DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/02_dimension_generalized_samples
OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/03_sample_op_names
SUBGRAPH_RANGES_JSON_ROOT=$DECOMPOSE_WORKSPACE/04_subgraph_ranges
RANGE_DECOMPOSE_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/05_range_decompose_subgraphs
GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/06_renamed_subgraphs
DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/07_deduplicated_subgraphs
CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/08_cumsum_num_kernels
FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/09_fusible_subgraph_ranges
GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subgraph_ranges
SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs
RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs
DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs
UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests

mkdir -p "$DECOMPOSE_WORKSPACE"

model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt"
device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt
range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt
deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt
device_rewrited_subgraph_list=${DECOMPOSE_WORKSPACE}/device_rewrited_subgraph_sample_list.txt
fusible_subgraph_list=${DECOMPOSE_WORKSPACE}/fusible_subgraph_sample_list.txt
deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_fusible_subgraph_sample_list.txt
group_subgraph_sources_list=${DECOMPOSE_WORKSPACE}/group_subgraph_sources_sample_list.txt
dimension_generalizer_samples_list=dimension_generalizer_sample_list.txt
model_runnable_predicator=ModelRunnablePredicator

function generate_subgraph_list_by_index() {
dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt
deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_dimension_generalized_subgraph_sample_list.txt

function generate_generalized_subgraph_list() {
local target_dir="$1"
local sample_list_name="$2"
local max_index="$3"
echo ">>> Generate subgraph_sample_list for samples under ${target_dir} with index 0-${max_index}."
local sample_list="$2"
echo ">>> Generate subgraph_sample_list for samples under ${target_dir}."
echo ">>>"
for index in $(seq 0 $max_index); do
local sample_list="${target_dir}/${index}/${sample_list_name}"
echo ">>> Generating list for index ${index}"

find ${target_dir}/${index} -name "model.py" \
| xargs dirname \
| xargs realpath --relative-to=${target_dir}/${index} \
| tee $sample_list

echo "Generated: $sample_list"
done
find ${target_dir} -name "model.py" \
| xargs dirname \
| xargs realpath --relative-to=${target_dir} \
| tee $sample_list
}

function generate_subgraph_list() {
Expand Down Expand Up @@ -95,37 +82,37 @@ EOF
}

function dimension_generalizer(){
echo ">>> [2] dimension generalize for samoles under ${device_rewrited_subgraph_list}."
echo ">>> [2] Apply dimension generalization for samples under ${device_rewrited_sample_list}."
echo ">>>"
python3 -m graph_net.apply_sample_pass \
--model-path-list $device_rewrited_subgraph_list \
--model-path-list $device_rewrited_sample_list \
--sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/dimension_generalizer.py" \
--sample-pass-class-name "ApplyDimGenPasses" \
--sample-pass-config $(base64 -w 0 <<EOF
{
"output_dir": "${DIMENSION_GENERALIZER_OUTPUT_DIR}",
"output_dir": "${DIMENSION_GENERALIZED_OUTPUT_DIR}",
"model_path_prefix": "$DEVICE_REWRITED_OUTPUT_DIR",
"dimension_generalizer_filepath": "$GRAPH_NET_ROOT/graph_net/torch/static_to_dynamic.py",
"dimension_generalizer_class_name": "StaticToDynamic",
"limits_handled_models": ${LIMITS_HANDLED_MODELS},
"resume": ${RESUME},
"last_model_log_file": "/tmp/a.py"
}
EOF
)
}

function generate_op_names() {
echo ">>> [3] Generate op_names.txt for samples in ${device_rewrited_subgraph_list}."
echo ">>>"
echo ">>> [3] Generate op_names.txt for samples in ${model_list}."
echo ">>>"
python3 -m graph_net.model_path_handler \
--model-path-list $device_rewrited_subgraph_list \
--model-path-list $model_list \
--handler-config=$(base64 -w 0 <<EOF
{
"handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/op_names_extractor.py",
"handler_class_name": "OpNamesExtractor",
"handler_config": {
"resume": ${RESUME},
"model_path_prefix": "$DEVICE_REWRITED_OUTPUT_DIR",
"model_path_prefix": "$GRAPH_NET_ROOT",
"output_dir": "${OP_NAMES_OUTPUT_DIR}"
}
}
Expand All @@ -134,17 +121,17 @@ EOF
}

function generate_split_point() {
echo ">>> [4] Generate split points for samples in ${device_rewrited_subgraph_list}."
echo ">>> [4] Generate subgraph_ranges.json for samples in ${model_list}."
echo ">>> MIN_SEQ_OPS: ${MIN_SEQ_OPS}, MAX_SEQ_OPS: ${MAX_SEQ_OPS}"
echo ">>>"
python3 -m graph_net.apply_sample_pass \
--model-path-list $device_rewrited_subgraph_list \
--model-path-list $model_list \
--sample-pass-file-path $GRAPH_NET_ROOT/graph_net/torch/sample_pass/typical_sequence_split_points.py \
--sample-pass-class-name TypicalSequenceSplitPointsGenerator \
--sample-pass-config=$(base64 -w 0 <<EOF
{
"model_path_prefix": "$DEVICE_REWRITED_OUTPUT_DIR",
"output_dir": "$DECOMPOSE_WORKSPACE",
"model_path_prefix": "$GRAPH_NET_ROOT",
"output_dir": "$SUBGRAPH_RANGES_JSON_ROOT",
"op_names_path_prefix": "${OP_NAMES_OUTPUT_DIR}",
"device": "cuda",
"window_size": 64,
Expand All @@ -161,10 +148,10 @@ EOF
}

function range_decompose() {
echo ">>> [5] Decompose according to split_results.json for samples in ${device_rewrited_subgraph_list}."
echo ">>> [5] Decompose according to subgraph_ranges.json for samples in ${device_rewrited_sample_list}."
echo ">>>"
python3 -m graph_net.model_path_handler \
--model-path-list "$device_rewrited_subgraph_list" \
--model-path-list "$device_rewrited_sample_list" \
--handler-config=$(base64 -w 0 <<EOF
{
"handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/subgraph_generator.py",
Expand All @@ -173,10 +160,11 @@ function range_decompose() {
"resume": ${RESUME},
"model_path_prefix": "$DEVICE_REWRITED_OUTPUT_DIR",
"output_dir": "${RANGE_DECOMPOSE_OUTPUT_DIR}",
"subgraph_ranges_json_root": "$SPLIT_POINTS_OUTPUT_DIR",
"subgraph_ranges_json_root": "${SUBGRAPH_RANGES_JSON_ROOT}",
"subgraph_ranges_json_file_name": "typical_subgraph_ranges.json",
"group_head_and_tail": false,
"chain_style": false
"chain_style": false,
"device": "cuda"
}
}
EOF
Expand Down Expand Up @@ -216,8 +204,8 @@ function remove_duplicate_renamed_graphs() {
--target-dir ${DEDUPLICATED_OUTPUT_DIR}
}

function gen_fusible_subgraphs() {
echo ">>> [8] Generate fusible subgraphs for subgraph samples under ${DEVICE_REWRITED_OUTPUT_DIR}."
function gen_fusible_subgraph_ranges() {
echo ">>> [8] Generate fusible subgraphs for subgraph samples under ${DEDUPLICATED_OUTPUT_DIR}."
echo ">>>"
python3 -m graph_net.model_path_handler \
--use-subprocess \
Expand Down Expand Up @@ -268,16 +256,23 @@ EOF
}
EOF
)
}

python3 -m graph_net.model_path_handler \
--model-path-list "$model_list" \
--handler-config $(base64 -w 0 <<EOF
function subgraph_dimension_generalizer(){
echo ">>> [9] Generate dimension generalized subgraph samples under ${DIMENSION_GENERALIZED_OUTPUT_DIR}."
for index in {0..8}; do
echo ">>> Generating dimension generalized subgraph variant index: ${index}"
dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt"
generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list}
python3 -m graph_net.model_path_handler \
--model-path-list "${dimension_generalized_sample_list}" \
--handler-config $(base64 -w 0 <<EOF
{
"handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/subgraph_generator.py",
"handler_class_name": "SubgraphGenerator",
"handler_config": {
"model_path_prefix": "$GRAPH_NET_ROOT",
"output_dir": "$FUSIBLE_SUBGRAPH_SAMPLES_DIR",
"model_path_prefix": "${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}",
"output_dir": "${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}",
"subgraph_ranges_json_root": "$GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR",
"subgraph_ranges_json_file_name": "grouped_fusible_subgraph_ranges.json",
"device": "cuda",
Expand All @@ -286,14 +281,14 @@ EOF
}
EOF
)
done
}


function rename_fusible_subgraph() {
echo ">>> [9] Rename subgraph samples under ${FUSIBLE_SUBGRAPH_SAMPLES_DIR}."
function rename_dimension_generalized_fusible_subgraph() {
echo ">>> [10] Rename subgraph samples under ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}."
echo ">>>"
python3 -m graph_net.model_path_handler \
--model-path-list ${fusible_subgraph_list} \
--model-path-list ${dimension_generalized_subgraph_list} \
--handler-config=$(base64 -w 0 <<EOF
{
"handler_path": "$GRAPH_NET_ROOT/graph_net/sample_pass/ast_graph_variable_renamer.py",
Expand All @@ -302,65 +297,41 @@ function rename_fusible_subgraph() {
"device": "cuda",
"try_run": false,
"resume": ${RESUME},
"model_path_prefix": "${FUSIBLE_SUBGRAPH_SAMPLES_DIR}",
"model_path_prefix": "${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}",
"data_input_predicator_filepath": "$GRAPH_NET_ROOT/graph_net/torch/constraint_util.py",
"data_input_predicator_class_name": "RenamedDataInputPredicator",
"model_runnable_predicator_filepath": "$GRAPH_NET_ROOT/graph_net/torch/constraint_util.py",
"model_runnable_predicator_class_name": "ModelRunnablePredicator",
"output_dir": "$RENAMED_FUSIBLE_SUBGRAPH_DIR"
"output_dir": "${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}"
}
}
EOF
)
}

function remove_duplicate_fusible_graphs() {
echo ">>> [10] Remove duplicated subgraph samples under ${RENAMED_FUSIBLE_SUBGRAPH_DIR}."
function remove_duplicate_dimension_generalized_fusible_graphs() {
echo ">>> [11] Remove duplicated subgraph samples under ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}."
echo ">>>"
python3 -m graph_net.tools.deduplicated \
--samples-dir ${RENAMED_FUSIBLE_SUBGRAPH_DIR} \
--target-dir ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR}
}

function get_dimension_subgraph(){
echo ">>> [11] Generating dimension_subgraph samples under ${DIMENSION_GENERALIZER_OUTPUT_DIR}."
for index in {0..8}; do
echo ">>> Generating dimension_subgraph variant index: ${index}"
python3 -m graph_net.model_path_handler \
--model-path-list "${DIMENSION_GENERALIZER_OUTPUT_DIR}/${index}/${dimension_generalizer_samples_list}" \
--handler-config $(base64 -w 0 <<EOF
{
"handler_path": "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/subgraph_generator.py",
"handler_class_name": "SubgraphGenerator",
"handler_config": {
"model_path_prefix": "${DIMENSION_GENERALIZER_OUTPUT_DIR}/${index}",
"output_dir": "${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}/${index}",
"subgraph_ranges_json_root": "$GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR",
"subgraph_ranges_json_file_name": "grouped_fusible_subgraph_ranges.json",
"device": "cuda",
"resume": ${RESUME}
}
}
EOF
)
python3 -m graph_net.tools.deduplicated \
--samples-dir ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} \
--target-dir ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index}
done
}

function generate_unittests() {
echo ">>> [12] Generate unittests for subgraph samples under ${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}. "
echo ">>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. "
echo ">>>"
for index in {0..8}; do
echo ">>> Generating unittests variant index: ${index}"
python3 -m graph_net.model_path_handler \
--model-path-list "${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}/${index}/dimension_subgraph_list.txt" \
--handler-config=$(base64 -w 0 <<EOF
python3 -m graph_net.model_path_handler \
--model-path-list ${deduplicated_fusible_subgraphs_list} \
--handler-config=$(base64 -w 0 <<EOF
{
"handler_path": "${GRAPH_NET_ROOT}/graph_net/sample_pass/agent_unittest_generator.py",
"handler_class_name": "AgentUnittestGeneratorPass",
"handler_config": {
"framework": "torch",
"model_path_prefix": "${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}/${index}",
"output_dir": "${UNITTESTS_OUTPUT_DIR}/${index}",
"model_path_prefix": "${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}",
"output_dir": "${UNITTESTS_OUTPUT_DIR}",
"device": "cuda",
"generate_main": false,
"try_run": true,
Expand All @@ -371,19 +342,20 @@ function generate_unittests() {
}
EOF
)
done
}

main() {
timestamp=`date +%Y%m%d_%H%M`
suffix="${OP_RANGE}ops_${timestamp}"

# rewrite the device in model to cuda
rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt
generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_subgraph_list}

dimension_generalizer 2>&1 | tee ${DIMENSION_GENERALIZER_OUTPUT_DIR}/log_dimension_generalizer_${suffix}.txt
generate_subgraph_list_by_index ${DIMENSION_GENERALIZER_OUTPUT_DIR} ${dimension_generalizer_samples_list} 8

generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_sample_list}

# whole-graph dimension generalization
dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dimension_generalizer_${suffix}.txt

# typical subgraph decomposition
generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt
generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt
range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt
Expand All @@ -393,16 +365,18 @@ main() {
remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt
generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list}

gen_fusible_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt
generate_subgraph_list ${FUSIBLE_SUBGRAPH_SAMPLES_DIR} ${fusible_subgraph_list}

rename_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_fusible_subgraph_${suffix}.txt
remove_duplicate_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_fusible_graphs_${suffix}.txt
generate_subgraph_list ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list}

get_dimension_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_get_dimension_subgraph_${suffix}.txt
generate_subgraph_list_by_index ${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR} dimension_subgraph_list.txt 8

# generate fusible subgraph ranges
gen_fusible_subgraph_ranges 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt

# subgraph dimension generalization
subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt
generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list}

rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt
remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt
generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list}

# generate kernelbench format unittest
generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt
}

Expand Down
Loading