Skip to content

Commit c3d06f9

Browse files
committed
Script to publish persistent UDFs
1 parent d6e3529 commit c3d06f9

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

script/publish_persistent_udfs

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This script publishes all user-defined functions in udf/ as persistent UDFs in the udf dataset.
5+
6+
The udf_ prefix will be stripped from names of published UDFs.
7+
"""
8+
9+
from argparse import ArgumentParser
10+
import os
11+
import sys
12+
import re
13+
from google.cloud import bigquery
14+
15+
# sys.path needs to be modified to enable package imports from parent
16+
# and sibling directories. Also see:
17+
# https://stackoverflow.com/questions/6323860/sibling-package-imports/23542795#23542795
18+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19+
from bigquery_etl.parse_udf import (
20+
read_udf_dir,
21+
udf_usages_in_file,
22+
accumulate_dependencies,
23+
)
24+
25+
26+
UDF_RE = re.compile(r"udf_([a-zA-z0-9_]+)")
27+
28+
29+
parser = ArgumentParser(description=__doc__)
30+
parser.add_argument(
31+
"--dataset",
32+
default="udf",
33+
help="The names of the dataset the persistent UDFs will be stored in.",
34+
)
35+
parser.add_argument(
36+
"--udf-dir",
37+
default="udf/",
38+
help="The directory where declarations of temporary UDFs are stored.",
39+
)
40+
41+
42+
def main():
43+
args = parser.parse_args()
44+
client = bigquery.Client()
45+
46+
raw_udfs = {x.name: x for x in read_udf_dir(args.udf_dir)}
47+
48+
for raw_udf in raw_udfs:
49+
# get all dependencies for UDF and publish as persistent UDF
50+
dependencies = []
51+
for dep in accumulate_dependencies([], raw_udfs, raw_udf):
52+
if dep not in dependencies:
53+
dependencies.append(dep)
54+
publish_persistent_udf(raw_udfs[dep], args.dataset)
55+
56+
publish_persistent_udf(raw_udfs[raw_udf], args.dataset)
57+
58+
59+
def publish_persistent_udf(raw_udf, dataset):
60+
# transforms temporary UDF to persistent UDFs and publishes them
61+
for definition in raw_udf.definitions:
62+
query_with_renamed_udfs = re.sub(UDF_RE, dataset + "." + r"\1", definition)
63+
query = query_with_renamed_udfs.replace(
64+
"CREATE TEMP FUNCTION", "CREATE OR REPLACE FUNCTION"
65+
)
66+
67+
client.query(query)
68+
69+
70+
if __name__ == "__main__":
71+
main()

0 commit comments

Comments
 (0)