From 91667694a5874f4f83382d6c00d75e337f19627a Mon Sep 17 00:00:00 2001 From: M98M Date: Mon, 12 Feb 2024 04:17:42 +0330 Subject: [PATCH] Add files via upload --- notebooks/ft_pquad.ipynb | 3366 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 3366 insertions(+) create mode 100644 notebooks/ft_pquad.ipynb diff --git a/notebooks/ft_pquad.ipynb b/notebooks/ft_pquad.ipynb new file mode 100644 index 0000000..ae052c5 --- /dev/null +++ b/notebooks/ft_pquad.ipynb @@ -0,0 +1,3366 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r_8q5PzFZ0QM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e7f9c346-4842-42eb-ee8f-6642762d5154" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'transformers'...\n", + "remote: Enumerating objects: 181456, done.\u001b[K\n", + "remote: Counting objects: 100% (283/283), done.\u001b[K\n", + "remote: Compressing objects: 100% (159/159), done.\u001b[K\n", + "remote: Total 181456 (delta 139), reused 201 (delta 95), pack-reused 181173\u001b[K\n", + "Receiving objects: 100% (181456/181456), 201.51 MiB | 17.87 MiB/s, done.\n", + "Resolving deltas: 100% (127092/127092), done.\n" + ] + } + ], + "source": [ + "!git clone https://github.com/huggingface/transformers" + ] + }, + { + "cell_type": "code", + "source": [ + "%cd /content/transformers\n", + "!pip install . --quiet" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3i1GMKdu50ny", + "outputId": "4d0ee337-b88a-4983-bde7-0b50c30b7c88" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/transformers\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install accelerate datasets evaluate --quiet" + ], + "metadata": { + "id": "P_u_R_685eLL", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b836e58f-c25c-4960-d9af-117e059067f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/270.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━\u001b[0m \u001b[32m194.6/270.9 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.9/270.9 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%cd /content/transformers/examples/pytorch/question-answering\n", + "!python run_qa.py \\\n", + " --model_name_or_path \"sbunlp/fabert\" \\\n", + " --dataset_name \"Gholamreza/pquad\" \\\n", + " --do_eval \\\n", + " --do_train \\\n", + " --do_predict \\\n", + " --version_2_with_negative \\\n", + " --per_device_train_batch_size 16 \\\n", + " --per_device_eval_batch_size 16 \\\n", + " --metric_for_best_model f1 \\\n", + " --learning_rate 5e-5 \\\n", + " --warmup_ratio 0 \\\n", + " --weight_decay 0 \\\n", + " --num_train_epochs 2 \\\n", + " --max_seq_length 512 \\\n", + " --save_strategy epoch \\\n", + " --evaluation_strategy epoch \\\n", + " --doc_stride 128 \\\n", + " --output_dir /tmp/pquad/" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OsdIqbMc35dQ", + "outputId": "47822b47-b848-485a-9788-aee8d10c1bfe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/transformers/examples/pytorch/question-answering\n", + "2024-02-06 12:42:21.417694: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-02-06 12:42:21.417749: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-02-06 12:42:21.419090: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-02-06 12:42:22.544593: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "02/06/2024 12:42:25 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False\n", + "02/06/2024 12:42:25 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "auto_find_batch_size=False,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "data_seed=None,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_persistent_workers=False,\n", + "dataloader_pin_memory=True,\n", + "dataloader_prefetch_factor=None,\n", + "ddp_backend=None,\n", + "ddp_broadcast_buffers=None,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "ddp_timeout=1800,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "dispatch_batches=None,\n", + "do_eval=True,\n", + "do_predict=True,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_delay=0,\n", + "eval_steps=None,\n", + "evaluation_strategy=epoch,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "fsdp=[],\n", + "fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},\n", + "fsdp_min_num_params=0,\n", + "fsdp_transformer_layer_cls_to_wrap=None,\n", + "full_determinism=False,\n", + "gradient_accumulation_steps=1,\n", + "gradient_checkpointing=False,\n", + "gradient_checkpointing_kwargs=None,\n", + "greater_is_better=True,\n", + "group_by_length=False,\n", + "half_precision_backend=auto,\n", + "hub_always_push=False,\n", + "hub_model_id=None,\n", + "hub_private_repo=False,\n", + "hub_strategy=every_save,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "include_num_input_tokens_seen=False,\n", + "include_tokens_per_second=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=0,\n", + "log_level=passive,\n", + "log_level_replica=warning,\n", + "log_on_each_node=True,\n", + "logging_dir=/tmp/pquad/runs/Feb06_12-42-25_fbe3ab783ddc,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=500,\n", + "logging_strategy=steps,\n", + "lr_scheduler_kwargs={},\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=f1,\n", + "mp_parameters=,\n", + "neftune_noise_alpha=None,\n", + "no_cuda=False,\n", + "num_train_epochs=2.0,\n", + "optim=adamw_torch,\n", + "optim_args=None,\n", + "output_dir=/tmp/pquad/,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=16,\n", + "per_device_train_batch_size=16,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard'],\n", + "resume_from_checkpoint=None,\n", + "run_name=/tmp/pquad/,\n", + "save_on_each_node=False,\n", + "save_only_model=False,\n", + "save_safetensors=True,\n", + "save_steps=500,\n", + "save_strategy=epoch,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "skip_memory_metrics=True,\n", + "split_batches=False,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_cpu=False,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "/usr/local/lib/python3.10/dist-packages/datasets/load.py:1429: FutureWarning: The repository for Gholamreza/pquad contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/Gholamreza/pquad\n", + "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n", + "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n", + " warnings.warn(\n", + "https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/pquad.py not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py.incomplete\n", + "02/06/2024 12:42:27 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/pquad.py not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py.incomplete\n", + "Downloading builder script: 100% 4.41k/4.41k [00:00<00:00, 17.9MB/s]\n", + "storing https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/pquad.py in cache at /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py\n", + "02/06/2024 12:42:28 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/pquad.py in cache at /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py\n", + "creating metadata file for /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py\n", + "02/06/2024 12:42:28 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/af60dd7822e88b2d43a064300a281221050d30163c1871c3d4c6add91f0acd4e.bda586981fe7784e05f748e15cab4632486930295ec402e6854ecdc3abcc6a8f.py\n", + "https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/README.md not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a.incomplete\n", + "02/06/2024 12:42:28 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/README.md not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a.incomplete\n", + "Downloading readme: 100% 5.15k/5.15k [00:00<00:00, 18.9MB/s]\n", + "storing https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/README.md in cache at /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a\n", + "02/06/2024 12:42:28 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/Gholamreza/pquad/resolve/main/README.md in cache at /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a\n", + "creating metadata file for /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a\n", + "02/06/2024 12:42:28 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/a7d7435bbe10abd639bc26e0a4eac35b7ad9c438fa25c43a75365226380fb510.e05912f0331f8f1e779148e2090e3b737c2ebb3e77c9928e4df62369fbe3b77a\n", + "No config specified, defaulting to the single config: pquad/pquad\n", + "02/06/2024 12:42:28 - INFO - datasets.builder - No config specified, defaulting to the single config: pquad/pquad\n", + "Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/Gholamreza--pquad/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0\n", + "02/06/2024 12:42:28 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/Gholamreza--pquad/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0\n", + "Generating dataset pquad (/root/.cache/huggingface/datasets/Gholamreza___pquad/pquad/1.0.0/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0)\n", + "02/06/2024 12:42:28 - INFO - datasets.builder - Generating dataset pquad (/root/.cache/huggingface/datasets/Gholamreza___pquad/pquad/1.0.0/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0)\n", + "Downloading and preparing dataset pquad/pquad to /root/.cache/huggingface/datasets/Gholamreza___pquad/pquad/1.0.0/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0...\n", + "02/06/2024 12:42:28 - INFO - datasets.builder - Downloading and preparing dataset pquad/pquad to /root/.cache/huggingface/datasets/Gholamreza___pquad/pquad/1.0.0/ce0e1bf6a7a67398a195ccb3a16263336e977abad6e67800390346e8d9530ab0...\n", + "Dataset not on Hf google storage. Downloading and preparing it from source\n", + "02/06/2024 12:42:28 - INFO - datasets.builder - Dataset not on Hf google storage. Downloading and preparing it from source\n", + "https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Train.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591.incomplete\n", + "02/06/2024 12:42:31 - INFO - datasets.utils.file_utils - https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Train.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591.incomplete\n", + "Downloading data: 26.4MB [00:00, 83.6MB/s]\n", + "storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Train.json in cache at /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591\n", + "02/06/2024 12:42:32 - INFO - datasets.utils.file_utils - storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Train.json in cache at /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591\n", + "creating metadata file for /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591\n", + "02/06/2024 12:42:32 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/e49d5f650d69a5999fe6ceb4438a023cccdcf3e6519abc4dabce736f91595591\n", + "https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Validation.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc.incomplete\n", + "02/06/2024 12:42:32 - INFO - datasets.utils.file_utils - https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Validation.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc.incomplete\n", + "Downloading data: 3.49MB [00:00, 51.1MB/s] \n", + "storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Validation.json in cache at /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc\n", + "02/06/2024 12:42:33 - INFO - datasets.utils.file_utils - storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Validation.json in cache at /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc\n", + "creating metadata file for /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc\n", + "02/06/2024 12:42:33 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/ea42ddfa9db6f39bc3249a878c853a6f6b466f6217a360bbb8afbac9410d84cc\n", + "https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Test.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882.incomplete\n", + "02/06/2024 12:42:34 - INFO - datasets.utils.file_utils - https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Test.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882.incomplete\n", + "Downloading data: 3.45MB [00:00, 53.6MB/s] \n", + "storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Test.json in cache at /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882\n", + "02/06/2024 12:42:34 - INFO - datasets.utils.file_utils - storing https://raw.githubusercontent.com/AUT-NLP/PQuAD/main/Dataset/Test.json in cache at /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882\n", + "creating metadata file for /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882\n", + "02/06/2024 12:42:34 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/d6ba3b80ff2a6d0333454fac286694b5e777518ea141e0dcd7c0558b71624882\n", + "Downloading took 0.0 min\n", + "02/06/2024 12:42:34 - INFO - datasets.download.download_manager - Downloading took 0.0 min\n", + "Checksum Computation took 0.0 min\n", + "02/06/2024 12:42:34 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min\n", + "Generating train split\n", + "02/06/2024 12:42:34 - INFO - datasets.builder - Generating train split\n", + "Generating train split: 0% 0/63994 [00:00> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/config.json\n", + "[INFO|configuration_utils.py:792] 2024-02-06 12:42:42,695 >> Model config BertConfig {\n", + " \"_name_or_path\": \"sbunlp/fabert\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"classifier_dropout\": null,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.38.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50000\n", + "}\n", + "\n", + "tokenizer_config.json: 100% 18.3k/18.3k [00:00<00:00, 45.1MB/s]\n", + "vocab.txt: 100% 552k/552k [00:00<00:00, 2.16MB/s]\n", + "tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 3.28MB/s]\n", + "special_tokens_map.json: 100% 125/125 [00:00<00:00, 689kB/s]\n", + "[INFO|tokenization_utils_base.py:2029] 2024-02-06 12:42:45,621 >> loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/vocab.txt\n", + "[INFO|tokenization_utils_base.py:2029] 2024-02-06 12:42:45,621 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2029] 2024-02-06 12:42:45,621 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2029] 2024-02-06 12:42:45,621 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/special_tokens_map.json\n", + "[INFO|tokenization_utils_base.py:2029] 2024-02-06 12:42:45,621 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/tokenizer_config.json\n", + "model.safetensors: 100% 498M/498M [00:09<00:00, 53.4MB/s]\n", + "[INFO|modeling_utils.py:3337] 2024-02-06 12:42:55,601 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--sbunlp--fabert/snapshots/9e6604e667a0b96c281b6b6956a2757f534912f6/model.safetensors\n", + "[INFO|modeling_utils.py:4060] 2024-02-06 12:42:56,006 >> Some weights of the model checkpoint at sbunlp/fabert were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']\n", + "- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:4072] 2024-02-06 12:42:56,006 >> Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at sbunlp/fabert and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/63994 [00:00> ***** Running training *****\n", + "[INFO|trainer.py:1748] 2024-02-06 12:44:08,934 >> Num examples = 63,994\n", + "[INFO|trainer.py:1749] 2024-02-06 12:44:08,934 >> Num Epochs = 2\n", + "[INFO|trainer.py:1750] 2024-02-06 12:44:08,934 >> Instantaneous batch size per device = 16\n", + "[INFO|trainer.py:1753] 2024-02-06 12:44:08,934 >> Total train batch size (w. parallel, distributed & accumulation) = 16\n", + "[INFO|trainer.py:1754] 2024-02-06 12:44:08,934 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1755] 2024-02-06 12:44:08,934 >> Total optimization steps = 8,000\n", + "[INFO|trainer.py:1756] 2024-02-06 12:44:08,934 >> Number of trainable parameters = 123,852,290\n", + "{'loss': 3.2502, 'learning_rate': 4.6875e-05, 'epoch': 0.12}\n", + "{'loss': 2.4828, 'learning_rate': 4.375e-05, 'epoch': 0.25}\n", + "{'loss': 2.3835, 'learning_rate': 4.0625000000000005e-05, 'epoch': 0.38}\n", + "{'loss': 2.297, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.5}\n", + "{'loss': 2.2245, 'learning_rate': 3.4375e-05, 'epoch': 0.62}\n", + "{'loss': 1.9735, 'learning_rate': 3.125e-05, 'epoch': 0.75}\n", + "{'loss': 1.0372, 'learning_rate': 2.8125000000000003e-05, 'epoch': 0.88}\n", + "{'loss': 1.01, 'learning_rate': 2.5e-05, 'epoch': 1.0}\n", + " 50% 4000/8000 [1:41:56<1:30:43, 1.36s/it][INFO|trainer.py:737] 2024-02-06 14:26:05,386 >> The following columns in the evaluation set don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping. If example_id, offset_mapping are not expected by `BertForQuestionAnswering.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:3287] 2024-02-06 14:26:05,388 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:3289] 2024-02-06 14:26:05,388 >> Num examples = 7976\n", + "[INFO|trainer.py:3292] 2024-02-06 14:26:05,388 >> Batch size = 16\n", + "\n", + " 0% 0/499 [00:00> Saving model checkpoint to /tmp/pquad/tmp-checkpoint-4000\n", + "[INFO|configuration_utils.py:473] 2024-02-06 14:31:20,353 >> Configuration saved in /tmp/pquad/tmp-checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:2532] 2024-02-06 14:31:22,513 >> Model weights saved in /tmp/pquad/tmp-checkpoint-4000/model.safetensors\n", + "[INFO|tokenization_utils_base.py:2435] 2024-02-06 14:31:22,515 >> tokenizer config file saved in /tmp/pquad/tmp-checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2444] 2024-02-06 14:31:22,516 >> Special tokens file saved in /tmp/pquad/tmp-checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.7621, 'learning_rate': 2.1875e-05, 'epoch': 1.12}\n", + "{'loss': 0.7472, 'learning_rate': 1.8750000000000002e-05, 'epoch': 1.25}\n", + "{'loss': 0.7524, 'learning_rate': 1.5625e-05, 'epoch': 1.38}\n", + "{'loss': 0.7581, 'learning_rate': 1.25e-05, 'epoch': 1.5}\n", + "{'loss': 0.7334, 'learning_rate': 9.375000000000001e-06, 'epoch': 1.62}\n", + "{'loss': 0.7324, 'learning_rate': 6.25e-06, 'epoch': 1.75}\n", + "{'loss': 0.71, 'learning_rate': 3.125e-06, 'epoch': 1.88}\n", + "{'loss': 0.7157, 'learning_rate': 0.0, 'epoch': 2.0}\n", + "100% 8000/8000 [3:29:35<00:00, 1.37s/it][INFO|trainer.py:737] 2024-02-06 16:13:44,244 >> The following columns in the evaluation set don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping. If example_id, offset_mapping are not expected by `BertForQuestionAnswering.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:3287] 2024-02-06 16:13:44,246 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:3289] 2024-02-06 16:13:44,247 >> Num examples = 7976\n", + "[INFO|trainer.py:3292] 2024-02-06 16:13:44,247 >> Batch size = 16\n", + "\n", + " 0% 0/499 [00:00> Saving model checkpoint to /tmp/pquad/tmp-checkpoint-8000\n", + "[INFO|configuration_utils.py:473] 2024-02-06 16:18:59,731 >> Configuration saved in /tmp/pquad/tmp-checkpoint-8000/config.json\n", + "[INFO|modeling_utils.py:2532] 2024-02-06 16:19:01,939 >> Model weights saved in /tmp/pquad/tmp-checkpoint-8000/model.safetensors\n", + "[INFO|tokenization_utils_base.py:2435] 2024-02-06 16:19:01,941 >> tokenizer config file saved in /tmp/pquad/tmp-checkpoint-8000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2444] 2024-02-06 16:19:01,941 >> Special tokens file saved in /tmp/pquad/tmp-checkpoint-8000/special_tokens_map.json\n", + "[INFO|trainer.py:1988] 2024-02-06 16:19:06,401 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 12897.4673, 'train_samples_per_second': 9.923, 'train_steps_per_second': 0.62, 'train_loss': 1.4106296997070313, 'epoch': 2.0}\n", + "100% 8000/8000 [3:34:57<00:00, 1.61s/it]\n", + "[INFO|trainer.py:2981] 2024-02-06 16:19:06,404 >> Saving model checkpoint to /tmp/pquad/\n", + "[INFO|configuration_utils.py:473] 2024-02-06 16:19:06,405 >> Configuration saved in /tmp/pquad/config.json\n", + "[INFO|modeling_utils.py:2532] 2024-02-06 16:19:08,393 >> Model weights saved in /tmp/pquad/model.safetensors\n", + "[INFO|tokenization_utils_base.py:2435] 2024-02-06 16:19:08,396 >> tokenizer config file saved in /tmp/pquad/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2444] 2024-02-06 16:19:08,396 >> Special tokens file saved in /tmp/pquad/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 2.0\n", + " train_loss = 1.4106\n", + " train_runtime = 3:34:57.46\n", + " train_samples = 63994\n", + " train_samples_per_second = 9.923\n", + " train_steps_per_second = 0.62\n", + "02/06/2024 16:19:08 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:737] 2024-02-06 16:19:08,444 >> The following columns in the evaluation set don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping. If example_id, offset_mapping are not expected by `BertForQuestionAnswering.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:3287] 2024-02-06 16:19:08,447 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:3289] 2024-02-06 16:19:08,447 >> Num examples = 7976\n", + "[INFO|trainer.py:3292] 2024-02-06 16:19:08,447 >> Batch size = 16\n", + "100% 499/499 [04:17<00:00, 2.25it/s]02/06/2024 16:23:37 - INFO - utils_qa - Post-processing 7976 example predictions split into 7976 features.\n", + "\n", + " 0% 0/7976 [00:00> The following columns in the test set don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping. If example_id, offset_mapping are not expected by `BertForQuestionAnswering.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:3287] 2024-02-06 16:24:14,622 >> ***** Running Prediction *****\n", + "[INFO|trainer.py:3289] 2024-02-06 16:24:14,622 >> Num examples = 8002\n", + "[INFO|trainer.py:3292] 2024-02-06 16:24:14,622 >> Batch size = 16\n", + "100% 500/501 [04:18<00:00, 1.89it/s]02/06/2024 16:28:45 - INFO - utils_qa - Post-processing 8002 example predictions split into 8002 features.\n", + "\n", + " 0% 0/8002 [00:00> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Question Answering', 'type': 'question-answering'}, 'dataset': {'name': 'Gholamreza/pquad', 'type': 'Gholamreza/pquad', 'config': 'pquad', 'split': 'validation', 'args': 'pquad'}}\n", + "100% 501/501 [05:06<00:00, 1.63it/s]\n" + ] + } + ] + } + ] +} \ No newline at end of file