-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathdocs.ex
More file actions
131 lines (106 loc) · 4.22 KB
/
docs.ex
File metadata and controls
131 lines (106 loc) · 4.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
defmodule GoogleSheets.Loader.Docs do
@moduledoc """
Implements GoogleSheets.Loader behavior by fetching a Spreadsheet through Google spreadsheet API.
The only configuration value required is :url, which should point to the Atom feed of the spreadsheet.
See [README](extra-readme.html) how to publish a spreadsheet and find the URL.
The loader first requests the Atom feed and parses URLs pointing to CSV data for each individual
worksheet and the last_udpdated time stamp for spreadsheet.
If the last_updated field is equal to the one passes as previous_version, the loader stops and returns :unchanged
If not, it will filter the found CSV URLs and leave only those that exist in the sheets argument. If the sheets argument
is nil, it will load all worksheets.
After requesting all URLs and parsing the responses, the loader checks that each individual spreadsheet given as sheets
parameter exist and returns an SpreadSheetData.t structure.
If there are any errors during HTTP requests and/or parsing, it will most likely raise an exception. If you use this
loader in code which is not crash resistant, do handle the exceptions.
"""
import SweetXml
require Logger
@behaviour GoogleSheets.Loader
@connect_timeout 2_000
@receive_timeout 120_000
@doc """
Load spreadsheet from Google sheets using the URL specified in config[:url] key.
"""
def load(previous_version, _id, config) when is_list(config) do
try do
url = Keyword.fetch!(config, :url)
ignored_sheets = Keyword.get(config, :ignored_sheets, [])
sheets =
config
|> Keyword.get(:sheets, [])
|> Enum.reject(fn sheet -> sheet in ignored_sheets end)
load_spreadsheet(previous_version, url, sheets)
catch
result -> result
end
end
# Fetch Atom feed describing feed and request individual sheets if not modified.
defp load_spreadsheet(previous_version, url, sheets) do
{:ok, %HTTPoison.Response{status_code: 200} = response} =
HTTPoison.get(url, [], timeout: @connect_timeout, recv_timeout: @receive_timeout)
updated =
response.body
|> xpath(~x"//feed/updated/text()")
|> List.to_string()
|> String.trim()
version =
:crypto.hash(:sha, url <> Enum.join(sheets) <> updated)
|> Base.encode16(case: :lower)
if previous_version != nil and version == previous_version do
throw({:ok, :unchanged})
end
worksheets =
response.body
|> xpath(
~x"//feed/entry"l,
title: ~x"./title/text()",
url: ~x"./link[@type='text/csv']/@href"
)
|> convert_entries([])
|> filter_entries(sheets, [])
|> load_worksheets([])
if not Enum.all?(sheets, fn sheetname ->
Enum.any?(worksheets, fn ws -> sheetname == ws.name end)
end) do
loaded =
worksheets
|> Enum.map(fn ws -> ws.name end)
|> Enum.join(",")
throw(
{:error,
"All requested sheets not loaded, expected: #{Enum.join(sheets, ",")} loaded: #{loaded}"}
)
end
{:ok, version, worksheets}
end
# Converts xpath entries to {title, url} with data converted to strings
defp convert_entries([], acc), do: acc
defp convert_entries([entry | rest], acc) do
title = List.to_string(entry[:title])
url = List.to_string(entry[:url])
convert_entries(rest, [{title, url} | acc])
end
# Filter out entries not specified in sheets list, if empty sheets list, accept all
defp filter_entries(entries, [], _acc), do: entries
defp filter_entries([], _sheets, acc), do: acc
defp filter_entries([{title, url} | rest], sheets, acc) do
if title in sheets do
filter_entries(rest, sheets, [{title, url} | acc])
else
filter_entries(rest, sheets, acc)
end
end
# Request worksheets and create WorkSheet.t entries
defp load_worksheets([], worksheets), do: worksheets
defp load_worksheets([{title, url} | rest], worksheets) do
{:ok, %HTTPoison.Response{status_code: 200} = response} =
HTTPoison.get(
url,
[],
timeout: @connect_timeout,
recv_timeout: @receive_timeout,
follow_redirect: false
)
load_worksheets(rest, [%GoogleSheets.WorkSheet{name: title, csv: response.body} | worksheets])
end
end