From e9574ff74d9388e69a931afb654a2bb663d9431b Mon Sep 17 00:00:00 2001 From: Tom Gillam Date: Fri, 29 Dec 2023 15:14:50 +0000 Subject: [PATCH] Artifact rework (#47) * Not building artifacts centrally any more * Switch to defining new artifacts at runtime * Bump version * Add compat * Determine releases from GitHub release API. * Remove heavy dep * Add test for _get_artifact_path --- .gitignore | 1 + Artifacts.toml | 47 ----------------- Project.toml | 10 ++-- artifact_build/Manifest.toml | 10 ---- artifact_build/Project.toml | 2 - artifact_build/README.md | 5 -- artifact_build/create.jl | 49 ------------------ src/TimeZoneFinder.jl | 98 +++++++++++++++++++++++++++++++----- test/runtests.jl | 19 ++++++- 9 files changed, 108 insertions(+), 133 deletions(-) delete mode 100644 Artifacts.toml delete mode 100644 artifact_build/Manifest.toml delete mode 100644 artifact_build/Project.toml delete mode 100644 artifact_build/README.md delete mode 100644 artifact_build/create.jl diff --git a/.gitignore b/.gitignore index e02b3c5..d84d15c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /Manifest.toml /docs/Manifest.toml /docs/build/ +/Artifacts.toml diff --git a/Artifacts.toml b/Artifacts.toml deleted file mode 100644 index 36ae971..0000000 --- a/Artifacts.toml +++ /dev/null @@ -1,47 +0,0 @@ -[timezone-boundary-builder-2021c] -git-tree-sha1 = "6418fde407d1c917005a96b9615ba4758ef170cb" -lazy = true - - [[timezone-boundary-builder-2021c.download]] - sha256 = "297ff27da3e50b1f2b1bcb75009ed1d8aac616fec56ff1ac081021b610017d0d" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2021c/timezone-boundary-builder-2021c.tar.gz" - -[timezone-boundary-builder-2022b] -git-tree-sha1 = "fe7eee9ec0f981eb34fd268e17150273d1442dac" -lazy = true - - [[timezone-boundary-builder-2022b.download]] - sha256 = "7abdf814632a20399bcc688de3e69a38e0c9f6d3b08d67f5584caa27cf5cb86b" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2022b/timezone-boundary-builder-2022b.tar.gz" - -[timezone-boundary-builder-2022d] -git-tree-sha1 = "928337e2a168ac0efbde0f95e414dc991c741e95" -lazy = true - - [[timezone-boundary-builder-2022d.download]] - sha256 = "2d4affa97700cbeae9d6166a225ec899dc4896c1ff32fe35a916cecf7dfcda08" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2022d/timezone-boundary-builder-2022d.tar.gz" - -[timezone-boundary-builder-2022f] -git-tree-sha1 = "651b8f30bc4a601937df84d2d9c8f1029dcd47e6" -lazy = true - - [[timezone-boundary-builder-2022f.download]] - sha256 = "50a41b7f574d12429e5591a3583777a38434395af1e9dd6ee28bef019a841fff" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2022f/timezone-boundary-builder-2022f.tar.gz" - -[timezone-boundary-builder-2022g] -git-tree-sha1 = "b396105d1c4b1be5d3d5f9d1f6cb7cd13cd4e4c7" -lazy = true - - [[timezone-boundary-builder-2022g.download]] - sha256 = "c9f558d204071aa28202a3c9a613e0b20e09a951ce879825030e892bcc2c37b7" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2022g/timezone-boundary-builder-2022g.tar.gz" - -[timezone-boundary-builder-2023b] -git-tree-sha1 = "062c3da562b49ed8cebb528ed29582f3f52aa17f" -lazy = false - - [[timezone-boundary-builder-2023b.download]] - sha256 = "0883cafe32408169d29a3fe3c57da3e185b40e1b20e783fc7924e249f481d9cf" - url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/timezone-boundary-builder-2023b/timezone-boundary-builder-2023b.tar.gz" diff --git a/Project.toml b/Project.toml index 03353c3..e088c67 100644 --- a/Project.toml +++ b/Project.toml @@ -1,27 +1,27 @@ name = "TimeZoneFinder" uuid = "3ccf6684-3f25-4581-8c58-114637dcab4a" authors = ["Tom Gillam "] -version = "0.5.2" +version = "0.6.0" [deps] +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Memoize = "c03570c3-d221-55d1-a50c-7939bbd78826" Meshes = "eacbb407-ea5a-433e-ab97-5258b1ca43fa" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Scratch = "6c6a2e73-6563-6170-7368-637461726353" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" +ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [compat] +Downloads = "1" JSON3 = "1" -LazyArtifacts = "1" Memoize = "0.4" Meshes = "0.32,0.33,0.34,0.35,0.36, 0.37, 0.38, 0.39" -PrecompileTools = "1" Scratch = "1" TimeZones = "1.10" +ZipArchives = "1" julia = "1.6" [extras] diff --git a/artifact_build/Manifest.toml b/artifact_build/Manifest.toml deleted file mode 100644 index a3adda5..0000000 --- a/artifact_build/Manifest.toml +++ /dev/null @@ -1,10 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.8.0" -manifest_format = "2.0" -project_hash = "51bf0e60bc4d1e97a273a20c75a70a9d30d38d7e" - -[[deps.Inflate]] -git-tree-sha1 = "5cd07aab533df5170988219191dfad0519391428" -uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" -version = "0.1.3" diff --git a/artifact_build/Project.toml b/artifact_build/Project.toml deleted file mode 100644 index d24b32d..0000000 --- a/artifact_build/Project.toml +++ /dev/null @@ -1,2 +0,0 @@ -[deps] -Inflate = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" diff --git a/artifact_build/README.md b/artifact_build/README.md deleted file mode 100644 index d8eb22a..0000000 --- a/artifact_build/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Artifact creation - -This environment is used for building the artifacts that are then added to the main package. - -Only package maintainers should need to look at this. diff --git a/artifact_build/create.jl b/artifact_build/create.jl deleted file mode 100644 index 38a3e8f..0000000 --- a/artifact_build/create.jl +++ /dev/null @@ -1,49 +0,0 @@ -using Downloads: download -using Inflate -using Pkg.Artifacts -using SHA -using Tar - -# This script assumes that the `unzip` command is available on the command line. - -release = "2023b" - -# This is the name of the artifact that we're creating. -artifact_name = "timezone-boundary-builder-$release" - -url = "https://github.com/evansiroky/timezone-boundary-builder/releases/download/$release/timezones-with-oceans.geojson.zip" - -working_dir = mktempdir() -zip_path = joinpath(working_dir, basename(url)) - -download(url, zip_path) -run(`unzip $zip_path -d $working_dir`) -rm(zip_path) - -# This creates an artifact directory: `.julia/artifacts//` -hash = create_artifact() do artifact_dir - rm(artifact_dir) - cp(working_dir, artifact_dir) -end - -# Archive artifact to a tarball, which will get copied into the current directory.. -tarball_name = "$(artifact_name).tar.gz" -tarball_path = joinpath(@__DIR__, tarball_name) -tarball_hash = archive_artifact(hash, tarball_path) - -tarball_url = "https://github.com/tpgillam/TimeZoneFinder.jl/releases/download/$(artifact_name)/$tarball_name" -@info("Please release $tarball_path on github as $tarball_url") -@warn( - "If the tarball ends up at a path other than $tarball_url, " * - "Artifacts.toml should be edited accordingly." -) - -# Bind artifact to an Artifacts.toml file in the package directory. -bind_artifact!( - joinpath(@__DIR__, "../Artifacts.toml"), - artifact_name, - hash; - download_info=[(tarball_url, tarball_hash)], - lazy=true, - force=true, -) diff --git a/src/TimeZoneFinder.jl b/src/TimeZoneFinder.jl index fa27384..6e51455 100644 --- a/src/TimeZoneFinder.jl +++ b/src/TimeZoneFinder.jl @@ -2,15 +2,16 @@ module TimeZoneFinder export timezone_at, timezones_at +using Downloads: download using JSON3 -using LazyArtifacts using Memoize using Meshes +using Pkg.Artifacts using Pkg.TOML -using PrecompileTools using Scratch using Serialization using TimeZones +using ZipArchives: ZipBufferReader, zip_names, zip_openentry """Get points that form a closed loop. @@ -72,12 +73,60 @@ function Base.in(point::Point, bpa::BoundedPolyArea) return in(point, bpa.polyarea) end +""" + _get_artifact_path(version) -> String + +Get the path to the artifact for `version`, e.g. "2023b". + +This will download the necessary data if it doesn't already exist. +""" +function _get_artifact_path(version::AbstractString) + artifacts_toml = joinpath(dirname(@__DIR__), "Artifacts.toml") + artifact_name = "timezone-boundary-builder-$version" + hash = artifact_hash(artifact_name, artifacts_toml) + + if !isnothing(hash) && artifact_exists(hash) + # The artifact is known, and exists on-disk, we can use it. + return artifact_path(hash) + end + + # We need to download and extract the dataset. + # We aren't going to keep the zip archive around, so download to memory only, then + # decompress + hash = create_artifact() do artifact_dir + url = ( + "https://github.com/evansiroky/timezone-boundary-builder/releases/download/" * + "$version/timezones-with-oceans.geojson.zip" + ) + reader = ZipBufferReader(take!(download(url, IOBuffer()))) + # We expect this archive to contain a single file, which we will + # extract into `artifact_dir`. + filename = only(zip_names(reader)) + # We use `basename` here, since sometimes the archive includes an additional + # level of indirection. e.g. 2018d contains: + # dist/combined-with-oceans.json + # whereas more recent releases contain: + # combined-with-oceans.json + output_path = joinpath(artifact_dir, basename(filename)) + zip_openentry(reader, filename) do io + open(output_path, "w") do f + write(f, read(io)) + end + end + end + + # We are happy to overwrite any existing mapping; this means that we set + # `force` to be true. (Otherwise we would fail here if e.g. the artifacts + # directory had been emptied). + bind_artifact!(artifacts_toml, artifact_name, hash; force=true) + return artifact_path(hash) +end + """ Generate the timezone map data from the artifact identified by `version`. """ function generate_data(version::AbstractString) - artifact_name = "timezone-boundary-builder-$version" - dir = LazyArtifacts.@artifact_str(artifact_name) + dir = _get_artifact_path(version) obj = open(JSON3.read, joinpath(dir, "combined-with-oceans.json")) # Vectors that will be populated in the loop below. @@ -147,16 +196,42 @@ Julia process. end end +function _read_gh_api_paginated(url::AbstractString, per_page::Int64, page::Int64) + return JSON3.read( + take!(download("$(url)?per_page=$(per_page)&page=$(page)", IOBuffer())) + ) +end + +function _read_gh_api_paginated(url::AbstractString) + responses = [] + # TODO: This is the maximum per-page limit, at least for the "releases" command + per_page = 100 + page = 1 + while isempty(responses) || length(responses[end]) > 0 + response = _read_gh_api_paginated(url, per_page, page) + push!(responses, response) + page += 1 + end + return reduce(vcat, responses) +end + """ _get_boundary_builder_versions() -Get a list of versions for we have boundary data. Will be e.g. `["2022a", "2023b"]`. +Get a list of versions for we have boundary data. -The list will be sorted in order of increasing versions. +Will be e.g. `["2022a", "2023b"]`. The list will be sorted in order of increasing versions. """ -function _get_boundary_builder_versions() - toml = TOML.parsefile(find_artifacts_toml(@__FILE__)) - return sort!([last(split(name, "-")) for name in keys(toml)]) +@memoize function _get_boundary_builder_versions() + # TODO: There are some older versions than 2018d (back to 2016d), but these provide a differently named + # zip file. We could aim to support these if there is demand. + + # NOTE: we are doing this manually to avoid a moderately heavy dependency on GitHub.jl + release_data = _read_gh_api_paginated( + "https://api.github.com/repos/evansiroky/timezone-boundary-builder/releases" + ) + all_tags = [x[:tag_name] for x in release_data] + return sort(filter(tag -> tag >= "2018d", all_tags)) end """ @@ -170,7 +245,7 @@ the `TimeZones` package. The map from tzdata version -> boundary version is memo This is determined by the rules in the "note" in the docstring for [`timezone_at`](@ref). """ -@memoize function _timezone_boundary_builder_version(tzdata_version::AbstractString) +function _timezone_boundary_builder_version(tzdata_version::AbstractString) boundary_builder_versions = _get_boundary_builder_versions() i = searchsortedlast(boundary_builder_versions, tzdata_version) @@ -254,7 +329,4 @@ function timezone_at(latitude::Real, longitude::Real) return only(tzs) end -# Precompile the primary API. -@compile_workload timezone_at(1.0, 1.0) - end diff --git a/test/runtests.jl b/test/runtests.jl index 8f6db3b..3bb524d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,8 @@ using Memoize using Test using TimeZoneFinder -using TimeZoneFinder: _timezone_boundary_builder_version +using TimeZoneFinder: + _get_artifact_path, _get_boundary_builder_versions, _timezone_boundary_builder_version using TimeZones """ @@ -197,7 +198,7 @@ const TEST_LOCATIONS = @testset "old tzdata versions" begin # Run for several tzdata versions that we should be able to support. - for version in ["2021c", "2022d", "2022f"] + for version in ["2018d", "2021c", "2022d", "2022f"] tzdata_context(version) do @test timezone_at(52.5061, 13.358) == TimeZone("Europe/Berlin") end @@ -211,4 +212,18 @@ const TEST_LOCATIONS = @test timezone_at(50.438114, 30.5179595) == TimeZone("Europe/Kyiv") end end + + @testset "_get_artifact_path" begin + dir = _get_artifact_path("2023b") + @test isfile(joinpath(dir, "combined-with-oceans.json")) + dir2 = _get_artifact_path("2023b") + @test dir == dir2 + end + + @testset "_get_boundary_builder_versions" begin + versions = _get_boundary_builder_versions() + @test sort(versions) == versions + @test versions[1] == "2018d" + @test length(versions) >= 10 + end end