Skip to content
This repository has been archived by the owner on Nov 13, 2022. It is now read-only.

support for auto pulling gdocs #621

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ See below for a more detailed list of features.

## Setup Instructions

1. Generate a copy of this repo by clicking [on this link](https://github.com/fastai/fastpages/generate). Make sure to sign in to your account, or you will see a 404 error. Name your repo anything you like **except** {your-username}.github.io.
1. Generate a copy of this repo by clicking [on this link](https://github.com/fastai/fastpages/generate). Make sure to sign in to your account, or you will see a 404 error. Furthremore, **do not check the `include all branches` checkbox**. Name your repo anything you like **except** {your-username}.github.io.

2. **GitHub Actions will automatically open a PR** on your new repository ~ 30 seconds after the copy is created. Follow the instructions in that PR to continue.

Expand Down
3 changes: 3 additions & 0 deletions _action_files/action_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ export GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChec
eval "$(ssh-agent -s)"

######## Run notebook/word converter ########

# download gdocs as word docs
python /fastpages/gdocs2word.py
# word converter using pandoc
/fastpages/word2post.sh
# notebook converter using nbdev
Expand Down
56 changes: 56 additions & 0 deletions _action_files/gdocs2word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Pulls specified google documents"""
import json
import re
import requests
from pathlib import Path
from typing import Iterable


def download_gdocs(
g_ids: Iterable[str],
format: str = "docx",
dir_path: str = "_word",
chunk_size: int = 8192,
):

""" Downloads specified google docs into /_word as .docx files.

Args:
g_ids (list): A list of google doc ids e.g.
format (str): Export format to request. Default: "docx".
dir_path (str): dir to download files to. Default: "_word".
chunk_size (int): bytes chunk size for stream. Default: 8192.
"""

try:

session = requests.Session()
for g_id in g_ids:

dl_uri = "https://docs.google.com/document/export"
params = {"format": format, "id": g_id}

with session.get(dl_uri, params=params, stream=True) as r:
r.raise_for_status()

# get file name from headers
file_name = re.findall(
'filename="(.+)"', r.headers["Content-Disposition"]
)[0]

local_file = Path(f"{dir_path}/{file_name}")
with local_file.open("wb") as f:
for chunk in r.iter_content(chunk_size):
f.write(chunk)

except requests.HTTPError:
pass


if __name__ == "__main__":

with Path("_word/gdoc_ids.json").open() as j:
gdoc_ids = json.load(j)["gdoc_ids"]

if gdoc_ids:
download_gdocs(gdoc_ids)
5 changes: 5 additions & 0 deletions _word/gdoc_ids.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"gdoc_ids": [

]
}