Skip to content

quarto: adjust manifest construction #554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`starlette>=0.35.0`. When deploying to these servers, the starlette version
is now automatically set to `starlette<0.35.0`.

### Fixed

- Quarto content is marked as a "site" only when there are multiple input
files. (#552)

- Quarto content automatically ignores `name.html` and `name_files` when
`name.md`, `name.ipynb`, `name.Rmd`, or `name.qmd` is an input. (#553)

- Patterns provided to `--exclude` allow NT-style paths on Windows. (#320)

### Removed

- Python 3.7 support.
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,15 @@ The following shows an example of an extra file taking precedence:
rsconnect deploy dash --exclude “*.csv” dash-app/ important_data.csv
```

The "`**`" glob pattern will recursively match all files and directories,
while "`*`" only matches files. The "`**`" pattern is useful with complicated
project hierarchies where enumerating the _included_ files is simpler than
listing the _exclusions_.

```bash
rsconnect deploy quarto . _quarto.yml index.qmd requirements.txt --exclude "**"
```

Some directories are excluded by default, to prevent bundling and uploading files that are not needed or might interfere with the deployment process:

```
Expand Down
40 changes: 21 additions & 19 deletions rsconnect/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,10 @@ def __init__(
"version": quarto_inspection.get("quarto", {}).get("version", "99.9.9"),
"engines": quarto_inspection.get("engines", []),
}
project_config = quarto_inspection.get("config", {}).get("project", {})
render_targets = project_config.get("render", [])
if len(render_targets):
self.data["metadata"]["primary_rmd"] = render_targets[0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we not need to set primary_rmd?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not used by Connect for Quarto content, today.

With R Markdown and Jupyter content, we tell the tool the specific file that we want to render, and the output file is written to the .index file.

This path only applies for projects, and we want to always rely on the Quarto discovery rules in that case. Only projects have the config.project.render structure in the output from quarto inspect.

For Quarto standalone documents, there is a stronger case for communicating the primary document/entrypoint. Connect currently transforms content into a project and then has Quarto use its default discovery rules. If the primary file is deployed along with secondary Markdown files, those would need to be named _filename.md to be ignored during rendering. Otherwise, Quarto could render both primary.qmd and do-not-render.qmd. They could also deploy a _quarto.yml file alongside the file, and deploy it as a simple "project". Both of those options feel OK, right now.

We may want to eventually annotate the manifest with metadata.entrypoint, but I'd like to see if we can avoid it.

project_type = project_config.get("type", None)
if project_type or len(render_targets) > 1:

files_data = quarto_inspection.get("files", {})
files_input_data = files_data.get("input", [])
if len(files_input_data) > 1:
self.data["metadata"]["content_category"] = "site"

if environment:
Expand Down Expand Up @@ -325,12 +323,10 @@ def make_source_manifest(
"version": quarto_inspection.get("quarto", {}).get("version", "99.9.9"),
"engines": quarto_inspection.get("engines", []),
}
project_config = quarto_inspection.get("config", {}).get("project", {})
render_targets = project_config.get("render", [])
if len(render_targets):
manifest["metadata"]["primary_rmd"] = render_targets[0]
project_type = project_config.get("type", None)
if project_type or len(render_targets) > 1:

files_data = quarto_inspection.get("files", {})
files_input_data = files_data.get("input", [])
if len(files_input_data) > 1:
manifest["metadata"]["content_category"] = "site"

if environment:
Expand Down Expand Up @@ -1303,13 +1299,19 @@ def make_quarto_manifest(
output_dir = project_config.get("output-dir", None)
if output_dir:
excludes = excludes + [output_dir]
else:
render_targets = project_config.get("render", [])
for target in render_targets:
t, _ = splitext(target)
# TODO: Single-file inspect would give inspect.formats.html.pandoc.output-file
# For foo.qmd, we would get an output-file=foo.html, but foo_files is not available.
excludes = excludes + [t + ".html", t + "_files"]

files_data = quarto_inspection.get("files", {})
files_input_data = files_data.get("input", [])
# files.input is a list of absolute paths to input (rendered)
# files. Automatically ignore the most common derived files for
# those inputs.
#
# These files are ignored even when the project has an output
# directory, as Quarto may create these files while a render is
# in-flight.
for each in files_input_data:
t, _ = splitext(os.path.relpath(each, file_or_directory))
excludes = excludes + [t + ".html", t + "_files/**/*"]

# relevant files don't need to include requirements.txt file because it is
# always added to the manifest (as a buffer) from the environment contents
Expand Down
10 changes: 7 additions & 3 deletions rsconnect/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Data models
"""

import os
import pathlib
import re

import fnmatch
Expand Down Expand Up @@ -163,6 +163,7 @@ class GlobMatcher(object):
"""

def __init__(self, pattern):
pattern = pathlib.PurePath(pattern).as_posix()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for Pathlib!

if pattern.endswith("/**/*"):
# Note: the index used here makes sure the pattern has a trailing
# slash. We want that.
Expand All @@ -185,7 +186,8 @@ def _to_parts_list(pattern):
:return: a list of pattern pieces and the index of the special '**' pattern.
The index will be None if `**` is never found.
"""
parts = pattern.split(os.path.sep)
# Incoming pattern is ALWAYS a Posix-style path.
parts = pattern.split("/")
depth_wildcard_index = None
for index, name in enumerate(parts):
if name == "**":
Expand All @@ -197,10 +199,12 @@ def _to_parts_list(pattern):
return parts, depth_wildcard_index

def _match_with_starts_with(self, path):
path = pathlib.PurePath(path).as_posix()
return path.startswith(self._pattern)

def _match_with_list_parts(self, path):
parts = path.split(os.path.sep)
path = pathlib.PurePath(path).as_posix()
parts = path.split("/")

def items_match(i1, i2):
if i2 >= len(parts):
Expand Down
188 changes: 160 additions & 28 deletions tests/test_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,23 @@
from .utils import get_dir, get_manifest_path


def create_fake_quarto_rendered_output(target_dir, name):
with open(join(target_dir, f"{name}.html"), "w") as fp:
fp.write(f"<html><body>fake rendering: {name}</body></html>\n")
files_dir = join(target_dir, f"{name}_files")
os.mkdir(files_dir)
with open(join(files_dir, "resource.js"), "w") as fp:
fp.write("// fake resource.js\n")


class TestBundle(TestCase):
@staticmethod
def python_version():
return ".".join(map(str, sys.version_info[:3]))

def setUp(self):
self.maxDiff = None

def test_to_bytes(self):
self.assertEqual(to_bytes(b"abc123"), b"abc123")
self.assertEqual(to_bytes(b"\xc3\xa5bc123"), b"\xc3\xa5bc123")
Expand All @@ -64,7 +76,6 @@ def test_to_bytes(self):
self.assertEqual(to_bytes("åbc123"), b"\xc3\xa5bc123")

def test_make_notebook_source_bundle1(self):
self.maxDiff = 5000
directory = get_dir("pip1")
nb_path = join(directory, "dummy.ipynb")

Expand Down Expand Up @@ -135,7 +146,6 @@ def test_make_notebook_source_bundle1(self):
)

def test_make_notebook_source_bundle2(self):
self.maxDiff = 5000
directory = get_dir("pip2")
nb_path = join(directory, "dummy.ipynb")

Expand Down Expand Up @@ -221,24 +231,27 @@ def test_make_notebook_source_bundle2(self):
},
)

def test_make_quarto_source_bundle_from_project(self):
def test_make_quarto_source_bundle_from_simple_project(self):
temp_proj = tempfile.mkdtemp()

# add project files
fp = open(join(temp_proj, "myquarto.qmd"), "w")
fp.write("---\n")
fp.write("title: myquarto\n")
fp.write("jupyter: python3\n")
fp.write("---\n\n")
fp.write("```{python}\n")
fp.write("1 + 1\n")
fp.write("```\n")
fp.close()

fp = open(join(temp_proj, "_quarto.yml"), "w")
fp.write("project:\n")
fp.write(' title: "myquarto"\n')
fp.write("editor: visual\n")
# This is a simple project; it has a _quarto.yml and one Markdown file.
with open(join(temp_proj, "_quarto.yml"), "w") as fp:
fp.write("project:\n")
fp.write(' title: "project with one rendered file"\n')

with open(join(temp_proj, "myquarto.qmd"), "w") as fp:
fp.write("---\n")
fp.write("title: myquarto\n")
fp.write("jupyter: python3\n")
fp.write("---\n\n")
fp.write("```{python}\n")
fp.write("1 + 1\n")
fp.write("```\n")

# Create some files that should not make it into the manifest; they
# should be automatically ignored because myquarto.qmd is a project
# input file.
create_fake_quarto_rendered_output(temp_proj, "myquarto")

environment = detect_environment(temp_proj)

Expand Down Expand Up @@ -299,6 +312,122 @@ def test_make_quarto_source_bundle_from_project(self):
},
)

def test_make_quarto_source_bundle_from_complex_project(self):
temp_proj = tempfile.mkdtemp()

# This is a complex project; it has a _quarto.yml and multiple
# Markdown files.
with open(join(temp_proj, "_quarto.yml"), "w") as fp:
fp.write("project:\n")
fp.write(" type: website\n")
fp.write(' title: "myquarto"\n')

with open(join(temp_proj, "index.qmd"), "w") as fp:
fp.write("---\n")
fp.write("title: home\n")
fp.write("jupyter: python3\n")
fp.write("---\n\n")
fp.write("```{python}\n")
fp.write("1 + 1\n")
fp.write("```\n")

with open(join(temp_proj, "about.qmd"), "w") as fp:
fp.write("---\n")
fp.write("title: about\n")
fp.write("---\n\n")
fp.write("math, math, math.\n")

# Create some files that should not make it into the manifest; they
# should be automatically ignored because myquarto.qmd is a project
# input file.
#
# Create files both in the current directory and beneath _site (the
# implicit output-dir for websites).
create_fake_quarto_rendered_output(temp_proj, "index")
create_fake_quarto_rendered_output(temp_proj, "about")
site_dir = join(temp_proj, "_site")
os.mkdir(site_dir)
create_fake_quarto_rendered_output(site_dir, "index")
create_fake_quarto_rendered_output(site_dir, "about")

environment = detect_environment(temp_proj)

# mock the result of running of `quarto inspect <project_dir>`
inspect = {
"quarto": {"version": "1.3.433"},
"dir": temp_proj,
"engines": [
"markdown",
"jupyter",
],
"config": {
"project": {
"type": "website",
"output-dir": "_site",
},
},
"files": {
"input": [
temp_proj + "/index.qmd",
temp_proj + "/about.qmd",
],
"resources": [],
"config": [temp_proj + "/_quarto.yml"],
"configResources": [],
},
}

with make_quarto_source_bundle(
temp_proj, inspect, AppModes.STATIC_QUARTO, environment, [], [], None
) as bundle, tarfile.open(mode="r:gz", fileobj=bundle) as tar:
names = sorted(tar.getnames())
self.assertEqual(
names,
[
"_quarto.yml",
"about.qmd",
"index.qmd",
"manifest.json",
"requirements.txt",
],
)

reqs = tar.extractfile("requirements.txt").read()
self.assertIsNotNone(reqs)

manifest = json.loads(tar.extractfile("manifest.json").read().decode("utf-8"))

# noinspection SpellCheckingInspection
self.assertEqual(
manifest,
{
"version": 1,
"locale": mock.ANY,
"metadata": {
"appmode": "quarto-static",
"content_category": "site",
},
"python": {
"version": self.python_version(),
"package_manager": {
"name": "pip",
"package_file": "requirements.txt",
"version": mock.ANY,
},
},
"quarto": {
"engines": ["markdown", "jupyter"],
"version": mock.ANY,
},
"files": {
"_quarto.yml": {"checksum": mock.ANY},
"index.qmd": {"checksum": mock.ANY},
"about.qmd": {"checksum": mock.ANY},
"requirements.txt": {"checksum": mock.ANY},
},
},
)

def test_make_quarto_source_bundle_from_project_with_requirements(self):
temp_proj = tempfile.mkdtemp()

Expand Down Expand Up @@ -385,14 +514,19 @@ def test_make_quarto_source_bundle_from_project_with_requirements(self):
def test_make_quarto_source_bundle_from_file(self):
temp_proj = tempfile.mkdtemp()

filename = join(temp_proj, "myquarto.qmd")
# add single qmd file with markdown engine
fp = open(join(temp_proj, "myquarto.qmd"), "w")
fp.write("---\n")
fp.write("title: myquarto\n")
fp.write("engine: markdown\n")
fp.write("---\n\n")
fp.write("### This is a test\n")
fp.close()
with open(filename, "w") as fp:
fp.write("---\n")
fp.write("title: myquarto\n")
fp.write("engine: markdown\n")
fp.write("---\n\n")
fp.write("### This is a test\n")

# Create some files that should not make it into the manifest; they
# should be automatically ignored because myquarto.qmd is the input
# file.
create_fake_quarto_rendered_output(temp_proj, "myquarto")

# mock the result of running of `quarto inspect <qmd_file>`
inspect = {
Expand All @@ -401,7 +535,7 @@ def test_make_quarto_source_bundle_from_file(self):
}

with make_quarto_source_bundle(
temp_proj, inspect, AppModes.STATIC_QUARTO, None, [], [], None
filename, inspect, AppModes.STATIC_QUARTO, None, [], [], None
) as bundle, tarfile.open(mode="r:gz", fileobj=bundle) as tar:
names = sorted(tar.getnames())
self.assertEqual(
Expand Down Expand Up @@ -469,7 +603,6 @@ def test_html_bundle2(self):
self.do_test_html_bundle(get_dir("pip2"))

def do_test_html_bundle(self, directory):
self.maxDiff = 5000
nb_path = join(directory, "dummy.ipynb")

bundle = make_notebook_html_bundle(
Expand Down Expand Up @@ -521,7 +654,6 @@ def test_keep_manifest_specified_file(self):
self.assertFalse(keep_manifest_specified_file(".Rproj.user/bogus.file"))

def test_manifest_bundle(self):
self.maxDiff = 5000
# noinspection SpellCheckingInspection
manifest_path = join(dirname(__file__), "testdata", "R", "shinyapp", "manifest.json")

Expand Down
Loading