From 1718168cfda8da9c3de3acf6daa0522ead4bf316 Mon Sep 17 00:00:00 2001 From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> Date: Mon, 3 Oct 2022 22:13:27 +0800 Subject: [PATCH] Add documentation (#79) * fix bugs of import functions * fix bug of context * add example of new input * new format of submit input. add dargs check. * command line option for old-style compatibility * fix s3 config of commands * add showkey, fix minor issues * add missing file * fix bug * add guidance for dpgen2 command and input scripts. Co-authored-by: Han Wang --- docs/alloy_configs.rst | 7 ++ docs/cli.rst | 1 + docs/index.rst | 7 +- docs/input.md | 178 +++++++++++++++++++++++++++++++ docs/op_configs.rst | 3 + docs/quickcli.md | 86 +++++++++++++++ docs/step_configs.rst | 9 ++ docs/submit_args.rst | 7 ++ dpgen2/entrypoint/main.py | 25 +++++ dpgen2/entrypoint/showkey.py | 25 +++++ dpgen2/entrypoint/submit.py | 7 +- dpgen2/entrypoint/submit_args.py | 2 +- examples/almg/input.json | 2 +- 13 files changed, 354 insertions(+), 5 deletions(-) create mode 100644 docs/alloy_configs.rst create mode 100644 docs/input.md create mode 100644 docs/quickcli.md create mode 100644 docs/step_configs.rst create mode 100644 docs/submit_args.rst create mode 100644 dpgen2/entrypoint/showkey.py diff --git a/docs/alloy_configs.rst b/docs/alloy_configs.rst new file mode 100644 index 00000000..5f3e50ac --- /dev/null +++ b/docs/alloy_configs.rst @@ -0,0 +1,7 @@ +Alloy configs +============= + +.. _alloy configs: +.. dargs:: + :module: dpgen2.utils.alloy_conf + :func: generate_alloy_conf_args diff --git a/docs/cli.rst b/docs/cli.rst index 743d5819..a3520e8a 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1,6 +1,7 @@ Command line interface ====================== +.. _fullcli: .. argparse:: :module: dpgen2.entrypoint.main :func: main_parser diff --git a/docs/index.rst b/docs/index.rst index ed679515..8f1951f3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,8 +12,13 @@ DPGEN2 is the 2nd generation of the Deep Potential GENerator. :maxdepth: 3 :caption: User Guide + quickcli cli + input.md + submit_args op_configs + alloy_configs + step_configs .. _developer-guide: @@ -31,4 +36,4 @@ DPGEN2 is the 2nd generation of the Deep Potential GENerator. * :ref:`search` .. _feedback: -.. _affiliated packages: \ No newline at end of file +.. _affiliated packages: diff --git a/docs/input.md b/docs/input.md new file mode 100644 index 00000000..9e5e86cb --- /dev/null +++ b/docs/input.md @@ -0,0 +1,178 @@ +(inputscript)= +# Guide on writing input scripts for dpgen2 commands + +## Preliminaries + +The reader of this doc is assumed to be familiar with the concurrent learning algorithm that the dpgen2 implements. If not, one may check [this paper](https://doi.org/10.1016/j.cpc.2020.107206). + +## The input script for all dpgen2 commands + +For all the dpgen2 commands, one need to provide `dflow2` global configurations. For example, +```json + "dflow_config" : { + "host" : "http://address.of.the.host:port" + }, + "dflow_s3_config" : { + "s3_endpoint" : "address.of.the.s3.sever:port" + }, +``` +The `dpgen` simply pass all keys of `"dflow_config"` to `dflow.config` and all keys of `"dflow_s3_config"` to `dflow.s3_config`. + + +## The input script for `submit` and `resubmit` + +The full documentation of the `submit` and `resubmit` script can be [found here](submitargs). This documentation provides a fast guide on how to write the input script. + +In the input script of `dpgen2 submit` and `dpgen2 resubmit`, one needs to provide the definition of the workflow and how they are executed in the input script. One may find an example input script in the [dpgen2 Al-Mg alloy example](../examples/almg/input.json). + +The definition of the workflow can be provided by the following sections: + +### Inputs + +This section provides the inputs to start a dpgen2 workflow. An example for the Al-Mg alloy +```json +"inputs": { + "type_map": ["Al", "Mg"], + "mass_map": [27, 24], + "init_data_sys": [ + "path/to/init/data/system/0", + "path/to/init/data/system/1" + ], +} +``` +The key `"init_data_sys"` provides the initial training data to kick-off the training of deep potential (DP) models. + + +### Training + +This section defines how a model is trained. +```json +"train" : { + "type" : "dp", + "numb_models" : 4, + "config" : {}, + "template_script" : { + "_comment" : "omitted content of tempalte script" + }, + "_comment" : "all" +} +``` +The `"type" : "dp"` tell the traning method is `"dp"`, i.e. calling [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit) to train DP models. +The `"config"` key defines the training configs, see [the full documentation](rundptrainargs). +The `"template_script"` provides the template training script in `json` format. + + +### Exploration + +This section defines how the configuration space is explored. +```json +"explore" : { + "type" : "lmp", + "config" : { + "command": "lmp -var restart 0" + }, + "max_numb_iter" : 5, + "conv_accuracy" : 0.9, + "fatal_at_max" : false, + "f_trust_lo": 0.05, + "f_trust_hi": 0.50, + "configurations": [ + { + "lattice" : ["fcc", 4.57], + "replicate" : [2, 2, 2], + "numb_confs" : 30, + "concentration" : [[1.0, 0.0], [0.5, 0.5], [0.0, 1.0]] + } + { + "lattice" : ["fcc", 4.57], + "replicate" : [3, 3, 3], + "numb_confs" : 30, + "concentration" : [[1.0, 0.0], [0.5, 0.5], [0.0, 1.0]] + } + ], + "stages": [ + { "_idx": 0, "ensemble": "nvt", "nsteps": 20, "press": null, "conf_idx": [0], "temps": [50,100], "trj_freq": 10, "n_sample" : 3 }, + { "_idx": 1, "ensemble": "nvt", "nsteps": 20, "press": null, "conf_idx": [1], "temps": [50,100], "trj_freq": 10, "n_sample" : 3 } + ], +} +``` +The `"type" : "lmp"` means that configurations are explored by LAMMPS DPMD runs. +The `"config"` key defines the lmp configs, see [the full documentation](runlmpargs). +The `"configurations"` provides the initial configurations (coordinates of atoms and the simulation cell) of the DPMD simulations. It is a list. The elements of the list can be + +- `list[str]`: The strings provides the path to the configuration files. +- `dict`: Automatic alloy configuration generator. See [the detailed doc](alloy_configs) of the allowed keys. + +The `"stages"` defines the exploration stages. It is a list of `dict`s, with each `dict` defining a stage. The `"ensemble"`, `"nsteps"`, `"press"`, `"temps"`, `"traj_freq"` keys are self-explanatory. `"conf_idx"` pickes initial configurations of DPMD simulations from the `"configurations"` list, it provides the index of the element in the `"configurations"` list. `"n_sample"` tells the number of confgiruations randomly sampled from the set picked by `"conf_idx"` for each thermodynamic state. All configurations picked by `"conf_idx"` has the same possibility to be sampled. The default value of `"n_sample"` is `null`, in this case all picked configurations are sampled. In the example, each stage have 3 samples and 2 thermodynamic states (NVT, T=50 and 100K), then each iteration run 3x2=6 NVT DPMD simulatins. + + +### FP + +This section defines the first-principle (FP) calculation . + +```json +"fp" : { + "type" : "vasp", + "config" : { + "command": "source /opt/intel/oneapi/setvars.sh && mpirun -n 16 vasp_std" + }, + "task_max": 2, + "pp_files": {"Al" : "vasp/POTCAR.Al", "Mg" : "vasp/POTCAR.Mg"}, + "incar": "vasp/INCAR", + "_comment" : "all" +} +``` +The `"type" : "vasp"` means that first-principles are VASP calculations. +The `"config"` key defines the vasp configs, see [the full documentation](runvaspargs). +The `"task_max"` key defines the maximal number of vasp calculations in each dpgen2 iteration. +The `"pp_files"` and `"incar"` keys provides the pseudopotential files and the template incar file. + + +### Configuration of dflow step + +The execution units of the dpgen2 are the dflow `Step`s. How each step is executed is defined by the `"step_configs"`. +```json +"step_configs":{ + "prep_train_config" : { + "_comment" : "content omitted" + }, + "run_train_config" : { + "_comment" : "content omitted" + }, + "prep_explore_config" : { + "_comment" : "content omitted" + }, + "run_explore_config" : { + "_comment" : "content omitted" + }, + "prep_fp_config" : { + "_comment" : "content omitted" + }, + "run_fp_config" : { + "_comment" : "content omitted" + }, + "select_confs_config" : { + "_comment" : "content omitted" + }, + "collect_data_config" : { + "_comment" : "content omitted" + }, + "cl_step_config" : { + "_comment" : "content omitted" + }, + "_comment" : "all" +}, +``` +The configs for prepare training, run training, prepare exploration, run exploration, prepare fp, run fp, select configurations, collect data and concurrent learning steps are given correspondingly. + +The readers are refered to [this page](stepconfigargs) for a full documentation of the step configs. + +Any of the config in the `step_configs` can be ommitted. If so, the configs of the step is set to the default step configs, which is provided by the following section, for example, +```json +"default_step_config" : { + "template_config" : { + "image" : "dpgen2:x.x.x" + } +}, +``` +The way of writing the `default_step_config` is the same as any step config in the `step_configs`. One may refer to [this page](stepconfigargs) for full documentation. diff --git a/docs/op_configs.rst b/docs/op_configs.rst index da49b334..90e1502b 100644 --- a/docs/op_configs.rst +++ b/docs/op_configs.rst @@ -3,18 +3,21 @@ OP Configs RunDPTrain ---------- +.. _rundptrainargs: .. dargs:: :module: dpgen2.op.run_dp_train :func: config_args RunLmp ------ +.. _runlmpargs: .. dargs:: :module: dpgen2.op.run_lmp :func: config_args RunVasp ------- +.. _runvaspargs: .. dargs:: :module: dpgen2.op.run_vasp :func: config_args diff --git a/docs/quickcli.md b/docs/quickcli.md new file mode 100644 index 00000000..d30c985f --- /dev/null +++ b/docs/quickcli.md @@ -0,0 +1,86 @@ +# Guide on dpgen2 commands + +One may use dpgen2 through command line interface. A full documentation of the cli is found [here](fullcli) + +## Submit a workflow +The dpgen2 workflow can be submitted via the `submit` command +```bash +dpgen2 submit input.json +``` +where `input.json` is the input script. A guide of writing the script is found [here](inputscript). +When a workflow is submitted, a ID (WFID) of the workflow will be printed for later reference. + +## Check the convergence of a workflow +The convergence of stages of the workflow can be checked by the `status` command. It prints the indexes of the finished stages, iterations, and the accurate, candidate and failed ratio of explored configurations of each iteration. +```bash +$ dpgen2 status input.json WFID +# stage id_stg. iter. accu. cand. fail. +# Stage 0 -------------------- + 0 0 0 0.8333 0.1667 0.0000 + 0 1 1 0.7593 0.2407 0.0000 + 0 2 2 0.7778 0.2222 0.0000 + 0 3 3 1.0000 0.0000 0.0000 +# Stage 0 converged YES reached max numb iterations NO +# All stages converged +``` + +## Watch the progress of a workflow +The progress of a workflow can be watched on-the-fly +```bash +$ dpgen2 watch input.json WFID +INFO:root:steps iter-000000--prep-run-train----------------------- finished +INFO:root:steps iter-000000--prep-run-lmp------------------------- finished +INFO:root:steps iter-000000--prep-run-fp-------------------------- finished +INFO:root:steps iter-000000--collect-data------------------------- finished +INFO:root:steps iter-000001--prep-run-train----------------------- finished +INFO:root:steps iter-000001--prep-run-lmp------------------------- finished +... +``` +The artifacts can be downloaded on-the-fly with `-d` flag. + + +## Show the keys of steps + +Each dpgen2 step is assigned a unique key. The keys of the finished steps can be checked with `showkey` command +```bash $ dpgen2 watch input.json WFID + 0 : init--scheduler + 1 : init--id + 2 : iter-000000--prep-train + 3 -> 6 : iter-000000--run-train-0000 -> iter-000000--run-train-0003 + 7 : iter-000000--prep-run-train + 8 : iter-000000--prep-lmp + 9 -> 17 : iter-000000--run-lmp-000000 -> iter-000000--run-lmp-000008 + 18 : iter-000000--prep-run-lmp + 19 : iter-000000--select-confs + 20 : iter-000000--prep-fp + 21 -> 24 : iter-000000--run-fp-000000 -> iter-000000--run-fp-000003 + 25 : iter-000000--prep-run-fp + 26 : iter-000000--collect-data + 27 : iter-000000--block + 28 : iter-000000--scheduler + 29 : iter-000000--id + 30 : iter-000001--prep-train + 31 -> 34 : iter-000001--run-train-0000 -> iter-000001--run-train-0003 + 35 : iter-000001--prep-run-train + 36 : iter-000001--prep-lmp + 37 -> 45 : iter-000001--run-lmp-000000 -> iter-000001--run-lmp-000008 + 46 : iter-000001--prep-run-lmp + 47 : iter-000001--select-confs + 48 : iter-000001--prep-fp + 49 -> 52 : iter-000001--run-fp-000000 -> iter-000001--run-fp-000003 + 53 : iter-000001--prep-run-fp + 54 : iter-000001--collect-data + 55 : iter-000001--block + 56 : iter-000001--scheduler + 57 : iter-000001--id +``` + + +## Resubmit a workflow + +If a workflow stopped abnormally, one may submit a new workflow with some steps of the old workflow reused. +```bash +dpgen2 resubmit input.json WFID --reuse 0-49 +``` +The steps of workflow WDID 0-49 will be reused in the new workflow. The indexes of the steps are printed by `dpgen2 showkey`. In the example, all the steps before the `iter-000001--run-fp-000000` will be used in the new workflow. + diff --git a/docs/step_configs.rst b/docs/step_configs.rst new file mode 100644 index 00000000..a4c55ec8 --- /dev/null +++ b/docs/step_configs.rst @@ -0,0 +1,9 @@ +Step Configs +============ + +.. _stepconfigargs: +Configurations for dflow steps +------------------------------ +.. dargs:: + :module: dpgen2.utils.step_config + :func: step_conf_args diff --git a/docs/submit_args.rst b/docs/submit_args.rst new file mode 100644 index 00000000..87e0be67 --- /dev/null +++ b/docs/submit_args.rst @@ -0,0 +1,7 @@ +Arguments of the submit script +============================== + +.. _submitargs: +.. dargs:: + :module: dpgen2.entrypoint.submit_args + :func: submit_args diff --git a/dpgen2/entrypoint/main.py b/dpgen2/entrypoint/main.py index dba8e00d..440403b8 100644 --- a/dpgen2/entrypoint/main.py +++ b/dpgen2/entrypoint/main.py @@ -21,6 +21,9 @@ from .status import ( status, ) +from .showkey import ( + showkey, +) from .download import ( download, ) @@ -93,6 +96,21 @@ def main_parser() -> argparse.ArgumentParser: "-o", "--old-compatible", action='store_true', help="compatible with old-style input script used in dpgen2 < 0.0.6." ) + ########################################## + # show key + parser_showkey = subparsers.add_parser( + "showkey", + help="Print the keys of the successful DPGEN2 steps", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser_showkey.add_argument( + "CONFIG", help="the config file in json format." + ) + parser_showkey.add_argument( + "ID", help="the ID of the existing workflow." + ) + + ########################################## # status parser_status = subparsers.add_parser( @@ -213,6 +231,13 @@ def main(): status( wfid, config, ) + elif args.command == "showkey": + with open(args.CONFIG) as fp: + config = json.load(fp) + wfid = args.ID + showkey( + wfid, config, + ) elif args.command == "download": with open(args.CONFIG) as fp: config = json.load(fp) diff --git a/dpgen2/entrypoint/showkey.py b/dpgen2/entrypoint/showkey.py new file mode 100644 index 00000000..68fe064a --- /dev/null +++ b/dpgen2/entrypoint/showkey.py @@ -0,0 +1,25 @@ +import glob, dpdata, os, pickle +from pathlib import Path +from dflow import ( + Workflow, +) +from dpgen2.entrypoint.submit import successful_step_keys +from dpgen2.utils import ( + sort_slice_ops, + print_keys_in_nice_format, + workflow_config_from_dict, +) + +def showkey( + wf_id, + wf_config, +): + workflow_config_from_dict(wf_config) + wf = Workflow(id=wf_id) + all_step_keys = successful_step_keys(wf) + all_step_keys = sort_slice_ops( + all_step_keys, ['run-train', 'run-lmp', 'run-fp'],) + prt_str = print_keys_in_nice_format( + all_step_keys, ['run-train', 'run-lmp', 'run-fp'],) + print(prt_str) + diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py index ad1fe67b..e02d7d50 100644 --- a/dpgen2/entrypoint/submit.py +++ b/dpgen2/entrypoint/submit.py @@ -221,7 +221,10 @@ def make_naive_exploration_scheduler( tgroup = NPTTaskGroup() ## ignore the expansion of sys_idx # get all file names of md initial configurations - sys_idx = job['sys_idx'] + try: + sys_idx = job['sys_idx'] + except KeyError: + sys_idx = job['conf_idx'] conf_list = [] for ii in sys_idx: conf_list += make_conf_list(sys_configs[ii], type_map) @@ -458,7 +461,7 @@ def successful_step_keys(wf): if wf_info.get_step(key=ii)[0]['phase'] == 'Succeeded': all_step_keys.append(ii) return all_step_keys - + def resubmit_concurrent_learning( wf_config, diff --git a/dpgen2/entrypoint/submit_args.py b/dpgen2/entrypoint/submit_args.py index 34295027..0eb9b506 100644 --- a/dpgen2/entrypoint/submit_args.py +++ b/dpgen2/entrypoint/submit_args.py @@ -144,7 +144,7 @@ def dpgen_step_config_args(default_config): ] -def submit_args(default_step_config): +def submit_args(default_step_config = normalize_step_dict({})): doc_step_configs = "Configurations for executing dflow steps" doc_upload_python_package = "Upload python package, for debug purpose" doc_inputs = "The input parameter and artifacts for dpgen2" diff --git a/examples/almg/input.json b/examples/almg/input.json index 7400c6e3..9e6df926 100644 --- a/examples/almg/input.json +++ b/examples/almg/input.json @@ -172,7 +172,7 @@ } ], "stages": [ - { "_idx": 0, "ensemble": "nvt", "nsteps": 20, "press": [1.0,2.0], "sys_idx": [0], "temps": [50,100], "trj_freq": 10, "n_sample" : 3 + { "_idx": 0, "ensemble": "nvt", "nsteps": 20, "press": [1.0,2.0], "conf_idx": [0], "temps": [50,100], "trj_freq": 10, "n_sample" : 3 } ], "_comment" : "all"