commit for submission

zarzouram · Jan 10, 2023 · 6e890ac · 6e890ac
1 parent bdf557f
commit 6e890ac
Show file tree

Hide file tree

Showing 12 changed files with 297 additions and 531 deletions.
diff --git a/.gitignore b/.gitignore
@@ -407,4 +407,4 @@ data/models/*
 !data/models/slim_18-03-08h06_7485.pt
 .test
 .vscode
-paper
+
diff --git a/README.md b/README.md
@@ -24,11 +24,12 @@ reason.
 
 ## 2. Dataset
 
-Dataset information can be found [here](./data/README.md#1-general-description))
+Dataset information can be found
+[here](./data/README.md#1-general-description)
 
 ## 3. Code
 
-### 3.1. Requiremnts
+### 3.1. Requirements
 
 Code is tested using python 3.9. Use `conda create --name <env> --file
 requirements.txt` to create virual env and to install the required libraries.
@@ -38,7 +39,9 @@ requirements.txt` to create virual env and to install the required libraries.
 Dataset download and conversion can be found
 [here](./data/README.md#2-download)
 
-### 3.3 training the model
+Processed dataset files are under `/srv/data/zarzouram/lt2318/slim/turk_torch/`
+
+### 3.3. training the model
 
 `run_train.py` expects the following arguments:
 
@@ -48,15 +51,32 @@ Dataset download and conversion can be found
 4. checkpoint_model: if train resuming is needed pass the checkpoint filename
 5. device: either gpu or cpu
 
-Loss and evaluation metrics are tracked using Tensorboard. The path to
+Loss are tracked using Tensorboard. The path to
 tensoboard files is `./logs`.
 
 ```bash
 python code/run_train.py [ARGUMENT]
 ```
 
+You do not need to create new datasets, use `python code/run_train.py` to use
+the default arguments.
+
+### 3.4. Testing
+
+Model testing are done in the `experiments.ipynb` notebook. The notebook is
+configured to load the test results done by me from
+`/srv/data/zarzouram/lt2318/test_outputs`.
+
+If you want to re-test the model, under the `Testing Model` section, please
+change the `retest` value to `True`.  Please do not forget to change the path
+in the `save_data` otherwise you will override the saved test results.
+
+## 4. Results
+
+Please see the attached report, under `paper`
+
 
-## 4. Reference
+## 5. Reference
 
 <a id="1">[1]</a>  Ramalho, T., Kočiský, T., Besse, F., Eslami, S. M., Melis,
 G., Viola, F., ... & Hermann, K. M. (2018). Encoding spatial relations from

diff --git a/experiments.ipynb b/experiments.ipynb
@@ -270,8 +270,9 @@
     "from pathlib import Path\n",
     "import pandas as pd\n",
     "\n",
+    "retest = False\n",
     "save_data = Path(f\"{project_dir}/test_outputs/inference_outputs_1.pt\")\n",
-    "if save_data.is_file():\n",
+    "if save_data.is_file() and not retest:\n",
     "    data_dict = torch.load(save_data)\n",
     "else:\n",
     "    model_inference.eval()\n",
@@ -482,7 +483,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.9.12 (main, Jun  1 2022, 11:38:51) \n[GCC 7.5.0]"
   },
   "metadata": {
    "interpreter": {

diff --git a/paper/PAPER.tex b/paper/PAPER.tex
diff --git a/paper/acl2020.bib b/paper/acl2020.bib
@@ -1,67 +1,131 @@
-@book{Aho:72,
-    author  = {Alfred V. Aho and Jeffrey D. Ullman},
-    title   = {The Theory of Parsing, Translation and Compiling},
-    year    = "1972",
-    volume  = "1",
-    publisher = {Prentice-Hall},
-    address = {Englewood Cliffs, NJ}
+@ARTICLE{ramalho2018encoding,
+       author = {{Ramalho}, Tiago and {Ko{\v{c}}isk{\'y}}, Tom{\'a}{\v{s}} and {Besse}, Frederic and {Eslami}, S.~M. Ali and {Melis}, G{\'a}bor and {Viola}, Fabio and {Blunsom}, Phil and {Hermann}, Karl Moritz},
+        title = "{Encoding Spatial Relations from Natural Language}",
+      journal = {arXiv e-prints},
+     keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
+         year = 2018,
+        month = jul,
+          eid = {arXiv:1807.01670},
+        pages = {arXiv:1807.01670},
+archivePrefix = {arXiv},
+       eprint = {1807.01670},
+ primaryClass = {cs.CL},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2018arXiv180701670R},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
 
-@book{APA:83,
-    author  = {{American Psychological Association}},
-    title   = {Publications Manual},
-    year    = "1983",
-   publisher = {American Psychological Association},
-   address = {Washington, DC}
+
+
+
+@article{hudsonGQANewDataset2019,
+  title = {{{GQA}}: {{A New Dataset}} for {{Real}}-{{World Visual Reasoning}} and {{Compositional Question Answering}}},
+  shorttitle = {{{GQA}}},
+  author = {Hudson, Drew A. and Manning, Christopher D.},
+  year = {2019},
+  month = may,
+  journal = {arXiv:1902.09506 [cs]},
+  eprint = {1902.09506},
+  eprinttype = {arxiv},
+  primaryclass = {cs},
+  archiveprefix = {arXiv},
+  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
 }
 
-@article{Chandra:81,
-	author = {Ashok K. Chandra and Dexter C. Kozen and Larry J. Stockmeyer},
-	year = "1981",
-	title = {Alternation},
-	journal = {Journal of the Association for Computing Machinery},
-	volume = "28",
-	number = "1",
-	pages = "114--133",
-	doi = "10.1145/322234.322243",
+
+@ARTICLE{2016arXiv160408772G,
+       author = {{Gregor}, Karol and {Besse}, Frederic and {Jimenez Rezende}, Danilo and {Danihelka}, Ivo and {Wierstra}, Daan},
+        title = "{Towards Conceptual Compression}",
+      journal = {arXiv e-prints},
+     keywords = {Statistics - Machine Learning, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
+         year = 2016,
+        month = apr,
+          eid = {arXiv:1604.08772},
+        pages = {arXiv:1604.08772},
+archivePrefix = {arXiv},
+       eprint = {1604.08772},
+ primaryClass = {stat.ML},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2016arXiv160408772G},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
 
-@inproceedings{andrew2007scalable,
-  title={Scalable training of {L1}-regularized log-linear models},
-  author={Andrew, Galen and Gao, Jianfeng},
-  booktitle={Proceedings of the 24th International Conference on Machine Learning},
-  pages={33--40},
-  year={2007},
+
+@article{vaswani2017attention,
+  title={Attention is all you need},
+  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
+  journal={Advances in neural information processing systems},
+  volume={30},
+  year={2017}
+}
+
+@ARTICLE{xiong2020layer,
+       author = {{Xiong}, Ruibin and {Yang}, Yunchang and {He}, Di and {Zheng}, Kai and {Zheng}, Shuxin and {Xing}, Chen and {Zhang}, Huishuai and {Lan}, Yanyan and {Wang}, Liwei and {Liu}, Tie-Yan},
+        title = "{On Layer Normalization in the Transformer Architecture}",
+      journal = {arXiv e-prints},
+     keywords = {Computer Science - Machine Learning, Computer Science - Computation and Language, Statistics - Machine Learning},
+         year = 2020,
+        month = feb,
+          eid = {arXiv:2002.04745},
+        pages = {arXiv:2002.04745},
+archivePrefix = {arXiv},
+       eprint = {2002.04745},
+ primaryClass = {cs.LG},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200204745X},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+
+@ARTICLE{2015arXiv150604214S,
+       author = {{Shi}, Xingjian and {Chen}, Zhourong and {Wang}, Hao and {Yeung}, Dit-Yan and {Wong}, Wai-kin and {Woo}, Wang-chun},
+        title = "{Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting}",
+      journal = {arXiv e-prints},
+     keywords = {Computer Science - Computer Vision and Pattern Recognition},
+         year = 2015,
+        month = jun,
+          eid = {arXiv:1506.04214},
+        pages = {arXiv:1506.04214},
+archivePrefix = {arXiv},
+       eprint = {1506.04214},
+ primaryClass = {cs.CV},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150604214S},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
 
-@book{Gusfield:97,
-    author  = {Dan Gusfield},
-    title   = {Algorithms on Strings, Trees and Sequences},
-    year    = "1997",
-    publisher = {Cambridge University Press},
-    address = {Cambridge, UK}
+@ARTICLE{7508408,  
+    author={Greff, Klaus and Srivastava, Rupesh K. and Koutník, Jan and Steunebrink, Bas R. and Schmidhuber, Jürgen},  
+    journal={IEEE Transactions on Neural Networks and Learning Systems},   
+    title={LSTM: A Search Space Odyssey},   
+    year={2017},  volume={28},  number={10},  
+    pages={2222-2232},  doi={10.1109/TNNLS.2016.2582924}
 }
 
-@article{rasooli-tetrault-2015,
-    author    = {Mohammad Sadegh Rasooli and Joel R. Tetreault},
-    title     = {Yara Parser: {A} Fast and Accurate Dependency Parser},
-    journal   = {Computing Research Repository},
-    volume    = {arXiv:1503.06733},
-    year      = {2015},
-    url       = {http://arxiv.org/abs/1503.06733},
-    note    = {version 2}
+@ARTICLE{2013arXiv1312.6114K,
+       author = {{Kingma}, Diederik P and {Welling}, Max},
+        title = "{Auto-Encoding Variational Bayes}",
+      journal = {arXiv e-prints},
+     keywords = {Statistics - Machine Learning, Computer Science - Machine Learning},
+         year = 2013,
+        month = dec,
+          eid = {arXiv:1312.6114},
+        pages = {arXiv:1312.6114},
+archivePrefix = {arXiv},
+       eprint = {1312.6114},
+ primaryClass = {stat.ML},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2013arXiv1312.6114K},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
 
-@article{Ando2005,
-	Acmid = {1194905},
-	Author = {Ando, Rie Kubota and Zhang, Tong},
-	Issn = {1532-4435},
-	Issue_Date = {12/1/2005},
-	Journal = {Journal of Machine Learning Research},
-	Month = dec,
-	Numpages = {37},
-	Pages = {1817--1853},
-	Publisher = {JMLR.org},
-	Title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
-	Volume = {6},
-	Year = {2005}}
+@inproceedings{pennington-etal-2014-glove,
+    title = "{G}lo{V}e: Global Vectors for Word Representation",
+    author = "Pennington, Jeffrey  and
+      Socher, Richard  and
+      Manning, Christopher",
+    booktitle = "Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing ({EMNLP})",
+    month = oct,
+    year = "2014",
+    address = "Doha, Qatar",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/D14-1162",
+    doi = "10.3115/v1/D14-1162",
+    pages = "1532--1543",
+}
diff --git a/paper/acl2020.pdf b/paper/acl2020.pdf
diff --git a/paper/images/SLIMExample.png b/paper/images/SLIMExample.png
diff --git a/paper/images/SLIMModel.png b/paper/images/SLIMModel.png
diff --git a/paper/images/STD_Annlng.png b/paper/images/STD_Annlng.png
diff --git a/paper/images/output1.png b/paper/images/output1.png
diff --git a/paper/images/output2.png b/paper/images/output2.png
diff --git a/paper/paper.pdf b/paper/paper.pdf
-Original file line number
+Diff line change
@@ Expand Up / @@ -407,4 +407,4 @@ data/models/* @@
     !data/models/slim_18-03-08h06_7485.pt
     .test
     .vscode
-    paper