fastai · nthomsencph · Feb 6, 2022 · Feb 13, 2022 · Feb 14, 2022 · Feb 14, 2022
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ See below for a more detailed list of features.
 
 ## Setup Instructions
 
-1.  Generate a copy of this repo by clicking [on this link](https://github.com/fastai/fastpages/generate). Make sure to sign in to your account, or you will see a 404 error. Name your repo anything you like **except** {your-username}.github.io.
+1.  Generate a copy of this repo by clicking [on this link](https://github.com/fastai/fastpages/generate). Make sure to sign in to your account, or you will see a 404 error. Furthremore, **do not check the `include all branches` checkbox**. Name your repo anything you like **except** {your-username}.github.io.  
 
 2. **GitHub Actions will automatically open a PR** on your new repository ~ 30 seconds after the copy is created.  Follow the instructions in that PR to continue.
 

diff --git a/_action_files/action_entrypoint.sh b/_action_files/action_entrypoint.sh
@@ -6,6 +6,9 @@ export GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChec
 eval "$(ssh-agent -s)"
 
 ######## Run notebook/word converter ########
+
+# download gdocs as word docs
+python /fastpages/gdocs2word.py
 # word converter using pandoc
 /fastpages/word2post.sh
 # notebook converter using nbdev

diff --git a/_action_files/gdocs2word.py b/_action_files/gdocs2word.py
@@ -0,0 +1,56 @@
+"""Pulls specified google documents"""
+import json
+import re
+import requests
+from pathlib import Path
+from typing import Iterable
+
+
+def download_gdocs(
+    g_ids: Iterable[str],
+    format: str = "docx",
+    dir_path: str = "_word",
+    chunk_size: int = 8192,
+):
+
+    """ Downloads specified google docs into /_word as .docx files.
+
+    Args:
+        g_ids (list): A list of google doc ids e.g. 
+        format (str): Export format to request. Default: "docx".
+        dir_path (str): dir to download files to. Default: "_word".
+        chunk_size (int): bytes chunk size for stream. Default: 8192.
+    """
+
+    try:
+
+        session = requests.Session()
+        for g_id in g_ids:
+
+            dl_uri = "https://docs.google.com/document/export"
+            params = {"format": format, "id": g_id}
+
+            with session.get(dl_uri, params=params, stream=True) as r:
+                r.raise_for_status()
+
+                # get file name from headers
+                file_name = re.findall(
+                    'filename="(.+)"', r.headers["Content-Disposition"]
+                )[0]
+
+                local_file = Path(f"{dir_path}/{file_name}")
+                with local_file.open("wb") as f:
+                    for chunk in r.iter_content(chunk_size):
+                        f.write(chunk)
+
+    except requests.HTTPError:
+        pass
+
+
+if __name__ == "__main__":
+
+    with Path("_word/gdoc_ids.json").open() as j:
+        gdoc_ids = json.load(j)["gdoc_ids"]
+
+    if gdoc_ids:
+        download_gdocs(gdoc_ids)
diff --git a/_word/gdoc_ids.json b/_word/gdoc_ids.json
@@ -0,0 +1,5 @@
+{
+    "gdoc_ids": [
+
+    ]
+}