forked from stanfordnlp/pyvene
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dcbf83b
commit 4fa209d
Showing
10 changed files
with
8,919 additions
and
165 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
results*/ | ||
results_notebook/ | ||
logic_data/*.pkl | ||
wandb/ | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "268522cc", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from utils.train_utils import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "060c0e51", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"if __name__ == '__main__':\n", | ||
" is_notebook = False\n", | ||
" try:\n", | ||
" cmd = argparse.ArgumentParser('The testing components of')\n", | ||
" cmd.add_argument('--train_batch_size', default=128, type=int, help='training batch size')\n", | ||
" cmd.add_argument('--eval_batch_size', default=128, type=int, help='training batch size')\n", | ||
" cmd.add_argument('--lr', default=0.01, type=float, help='learning rate')\n", | ||
" cmd.add_argument('--data_path', required=True, type=str, help='path to the training corpus')\n", | ||
" cmd.add_argument('--train_data_path', required=True, type=str, help='path to the training corpus')\n", | ||
" cmd.add_argument('--test_data_path', required=True, type=str, help='path to the training corpus')\n", | ||
" cmd.add_argument(\n", | ||
" '--encoder_config_path', \n", | ||
" type=str, help='path to the encoder config'\n", | ||
" )\n", | ||
" cmd.add_argument(\n", | ||
" '--decoder_config_path', \n", | ||
" type=str, help='path to the decoder config'\n", | ||
" )\n", | ||
" cmd.add_argument('--max_seq_len', default=512, type=int)\n", | ||
" cmd.add_argument('--seed', default=42, type=int)\n", | ||
" cmd.add_argument('--gradient_accumulation_steps', default=1, type=int)\n", | ||
" cmd.add_argument('--output_dir', required=True, type=str, help='save dir')\n", | ||
" cmd.add_argument('--local_rank', default=-1, type=int, help='multi gpu training')\n", | ||
" cmd.add_argument('--epochs', default=10, type=int, help='training epochs')\n", | ||
" cmd.add_argument('--model_path', type=str, required=False, default=None)\n", | ||
" cmd.add_argument('--warm_up', type=float, default=0.1)\n", | ||
" cmd.add_argument('--is_wandb', default=False, action='store_true')\n", | ||
" cmd.add_argument('--log_step', default=10, type=int)\n", | ||
" cmd.add_argument('--valid_steps', default=500, type=int)\n", | ||
" cmd.add_argument('--early_stopping', default=5, type=int)\n", | ||
" cmd.add_argument('--device', default=\"cuda\", type=str, help='')\n", | ||
" cmd.add_argument('--do_align', default=False, action='store_true')\n", | ||
" cmd.add_argument('--do_eval', default=False, action='store_true')\n", | ||
" cmd.add_argument('--do_test', default=False, action='store_true')\n", | ||
" \n", | ||
" cmd.add_argument('--aligning_layer_n', default=0, type=int)\n", | ||
" cmd.add_argument('--number_of_aligning_tokens', default=\"\", type=str, help='[START_TOKEN];[END_TOKEN]')\n", | ||
" cmd.add_argument('--n_training_examples', default=10000, type=int)\n", | ||
" cmd.add_argument('--n_eval_examples', default=1000, type=int)\n", | ||
" cmd.add_argument('--task_name', default=\"cost_no_type\", type=str, help='')\n", | ||
" cmd.add_argument('--task_config', default=\"\", type=str, help='')\n", | ||
" \n", | ||
" cmd.add_argument('--unit_test_mode', default=False, action='store_true')\n", | ||
" \n", | ||
" args = cmd.parse_args(sys.argv[1:])\n", | ||
" except:\n", | ||
" is_notebook = True\n", | ||
" parser = argparse.ArgumentParser()\n", | ||
" args = parser.parse_args([])\n", | ||
" args.train_batch_size = 8\n", | ||
" args.eval_batch_size = 8\n", | ||
" args.gradient_accumulation_steps = 16\n", | ||
" args.lr = 1e-4\n", | ||
" args.seed = 42\n", | ||
" args.output_dir = \"./results_notebook/\"\n", | ||
" args.epochs = 1\n", | ||
" args.warm_up = 0.1\n", | ||
" args.is_wandb = False\n", | ||
" args.log_step = 10\n", | ||
" args.valid_steps = 100 # -1 not do training eval!\n", | ||
" args.early_stopping = 999 # large == never early stop!\n", | ||
" args.device = \"cuda\"\n", | ||
" args.do_align = True\n", | ||
" args.do_eval = True\n", | ||
" args.n_gpu = 1\n", | ||
" \n", | ||
" # alignment search setting\n", | ||
" args.aligning_layer_n = 16\n", | ||
" args.number_of_aligning_tokens = \"79;80\"\n", | ||
" \n", | ||
" args.aligning_basis_n_per_variable = 128\n", | ||
" args.aligning_var_n = 2\n", | ||
" args.task_config = \"3.50;8.50;0.00;9.99\"\n", | ||
" args.n_training_examples = 1000\n", | ||
" args.n_eval_examples = 200\n", | ||
" args.task_name = \"cost_no_type\"\n", | ||
" \n", | ||
" args.unit_test_mode = False\n", | ||
" \n", | ||
" print(\"Using in a notebook env.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "30345888", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"###################\n", | ||
"# data loaders\n", | ||
"###################\n", | ||
"tokenizer = AutoTokenizer.from_pretrained(\n", | ||
" pretrained_model_name_or_path=\"../alpaca_7b/\",\n", | ||
" cache_dir=CACHE_DIR\n", | ||
")\n", | ||
"train_dataloader, eval_dataloader = prepare_dataloader(args, tokenizer)\n", | ||
"\n", | ||
"###################\n", | ||
"# model object loading\n", | ||
"###################\n", | ||
"alignment_config = {\n", | ||
" \"token_range\" : [\n", | ||
" int(args.number_of_aligning_tokens.split(\";\")[0]), \n", | ||
" int(args.number_of_aligning_tokens.split(\";\")[1]), \n", | ||
" ]\n", | ||
"}\n", | ||
"\n", | ||
"if args.aligning_var_n == 1:\n", | ||
" intervention_config = {\n", | ||
" 0: [[0, args.aligning_basis_n_per_variable]]\n", | ||
" }\n", | ||
"elif args.aligning_var_n == 2:\n", | ||
" intervention_config = {\n", | ||
" 0: [[0, args.aligning_basis_n_per_variable]],\n", | ||
" 1: [[args.aligning_basis_n_per_variable, 2*args.aligning_basis_n_per_variable]],\n", | ||
" }\n", | ||
"logger.info(f\"intervention_config = {intervention_config}\")\n", | ||
"logger.info(f\"alignment_config = {alignment_config}\")\n", | ||
"\n", | ||
"if args.unit_test_mode:\n", | ||
" logger.info(\"Loading Dummy Model for Testing ...\")\n", | ||
" # Testing code.\n", | ||
" config = AutoConfig.from_pretrained(\n", | ||
" \"../alpaca_7b/\",\n", | ||
" )\n", | ||
" config.intermediate_size = 512\n", | ||
" config.hidden_size = 512\n", | ||
" config.num_attention_heads = 2\n", | ||
" config.num_hidden_layers = 32\n", | ||
" model = AlignableLlamaForCausalLM(\n", | ||
" config=config,\n", | ||
" alignment_config=alignment_config,\n", | ||
" intervention_config=intervention_config\n", | ||
" )\n", | ||
"else:\n", | ||
" logger.info(\"Loading Alpaca 7B, Takes 2 Mins ...\")\n", | ||
" model = AlignableLlamaForCausalLM.from_pretrained(\n", | ||
" \"../alpaca_7b/\",\n", | ||
" alignment_config=alignment_config,\n", | ||
" intervention_config=intervention_config\n", | ||
" )\n", | ||
"\n", | ||
"device = \"cuda\"\n", | ||
"if torch.cuda.device_count() > 1:\n", | ||
" print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n", | ||
" model = torch.nn.DataParallel(model)\n", | ||
"model.to(device)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e5ce65fb", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# set off the gradients among all other layers.\n", | ||
"for name, param in model.named_parameters():\n", | ||
" if \"rotate_layer\" not in name:\n", | ||
" param.requires_grad = False\n", | ||
"\n", | ||
"t_total = int(len(train_dataloader) * args.epochs)\n", | ||
"warm_up_steps = args.warm_up * t_total\n", | ||
"optimizer = torch.optim.Adam(model.model.rotate_layer.parameters(), lr=args.lr)\n", | ||
"scheduler = get_linear_schedule_with_warmup(\n", | ||
" optimizer, num_warmup_steps=warm_up_steps,\n", | ||
" num_training_steps=t_total\n", | ||
")\n", | ||
"\n", | ||
"###################\n", | ||
"# trainer loading\n", | ||
"###################\n", | ||
"logger.info(f\"Aligning layer = {args.aligning_layer_n}\")\n", | ||
"model.model.alignment_config['layer'] = args.aligning_layer_n\n", | ||
"run_name = f\"alpaca-7B.task.{args.task_name}.config.{args.task_config}.\"\\\n", | ||
" f\"seed.{args.seed}.intl.{args.aligning_layer_n}.intr.{alignment_config['token_range'][0]}.\"\\\n", | ||
" f\"{alignment_config['token_range'][1]}\"\n", | ||
"\n", | ||
"is_master = True\n", | ||
"if not os.path.exists(args.output_dir) and is_master:\n", | ||
" os.mkdir(args.output_dir)\n", | ||
"os.environ[\"WANDB_PROJECT\"] = f\"ToM-DAS\"\n", | ||
"output_dir = os.path.join(args.output_dir, run_name)\n", | ||
"if not os.path.exists(output_dir) and is_master:\n", | ||
" os.mkdir(output_dir)\n", | ||
"\n", | ||
"aligner = AlpacaAligner(\n", | ||
" model,\n", | ||
" logger=logger,\n", | ||
" args=args,\n", | ||
" is_master=is_master,\n", | ||
" n_gpu=torch.cuda.device_count(),\n", | ||
" model_name=run_name,\n", | ||
" intervention_config=intervention_config,\n", | ||
" device=device\n", | ||
")\n", | ||
"\n", | ||
"# Train\n", | ||
"if args.do_align:\n", | ||
" aligner.train(\n", | ||
" train_dataloader, eval_dataloader,\n", | ||
" optimizer, scheduler, \n", | ||
" log_step=args.log_step, valid_steps=args.valid_steps,\n", | ||
" output_dir=output_dir, epochs=args.epochs, \n", | ||
" gradient_accumulation_steps=args.gradient_accumulation_steps,\n", | ||
" )" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.