Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read options from a manifest file in importcontent #9467

Merged
98 changes: 95 additions & 3 deletions kolibri/core/content/management/commands/importcontent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import concurrent.futures
import logging
import os
Expand All @@ -12,6 +13,7 @@
from kolibri.core.content.errors import InvalidStorageFilenameError
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.utils.content_manifest import ContentManifest
from kolibri.core.content.utils.file_availability import LocationError
from kolibri.core.content.utils.import_export_content import compare_checksums
from kolibri.core.content.utils.import_export_content import get_import_export_data
Expand Down Expand Up @@ -61,6 +63,24 @@ def add_arguments(self, parser):
# command to be given, where we'll expect a channel.

# However, some optional arguments apply to both groups. Add them here!

manifest_help_text = """
Specify a path to a manifest file. Content specified in this manifest file will be imported.

e.g.

kolibri manage importcontent --manifest /path/to/KOLIBRI_DATA/content/manifest.json disk
"""
parser.add_argument(
"--manifest",
# Split the comma separated string we get, into a list of strings
dylanmccall marked this conversation as resolved.
Show resolved Hide resolved
type=argparse.FileType("r"),
default=None,
required=False,
dest="manifest",
help=manifest_help_text,
)

node_ids_help_text = """
Specify one or more node IDs to import. Only the files associated to those node IDs will be imported.

Expand All @@ -72,7 +92,7 @@ def add_arguments(self, parser):
"--node_ids",
"-n",
# Split the comma separated string we get, into a list of strings
type=lambda x: x.split(","),
type=lambda x: x.split(",") if x else [],
default=None,
required=False,
dest="node_ids",
Expand All @@ -89,7 +109,7 @@ def add_arguments(self, parser):
parser.add_argument(
"--exclude_node_ids",
# Split the comma separated string we get, into a list of string
type=lambda x: x.split(","),
type=lambda x: x.split(",") if x else [],
default=None,
required=False,
dest="exclude_node_ids",
Expand Down Expand Up @@ -170,18 +190,25 @@ def add_arguments(self, parser):
name="disk", cmd=self, help="Copy the content from the given folder."
)
disk_subparser.add_argument("channel_id", type=str)
disk_subparser.add_argument("directory", type=str)
disk_subparser.add_argument("directory", type=str, nargs="?")
disk_subparser.add_argument("--drive_id", type=str, dest="drive_id", default="")
disk_subparser.add_argument(
"--content_dir",
type=str,
default=paths.get_content_dir_path(),
help="Copy the content to the given content dir.",
)
disk_subparser.add_argument(
"--no_detect_manifest",
dest="detect_manifest",
action="store_false",
default=True,
)

def download_content(
self,
channel_id,
manifest_file=None,
node_ids=None,
exclude_node_ids=None,
baseurl=None,
Expand All @@ -195,6 +222,7 @@ def download_content(
self._transfer(
DOWNLOAD_METHOD,
channel_id,
manifest_file=manifest_file,
node_ids=node_ids,
exclude_node_ids=exclude_node_ids,
baseurl=baseurl,
Expand All @@ -210,7 +238,9 @@ def copy_content(
self,
channel_id,
path,
manifest_file=None,
drive_id=None,
detect_manifest=True,
node_ids=None,
exclude_node_ids=None,
renderable_only=True,
Expand All @@ -222,6 +252,8 @@ def copy_content(
COPY_METHOD,
channel_id,
path=path,
manifest_file=manifest_file,
detect_manifest=detect_manifest,
drive_id=drive_id,
node_ids=node_ids,
exclude_node_ids=exclude_node_ids,
Expand All @@ -235,6 +267,8 @@ def _transfer( # noqa: max-complexity=16
self,
method,
channel_id,
manifest_file=None,
detect_manifest=None,
path=None,
drive_id=None,
node_ids=None,
Expand All @@ -247,6 +281,29 @@ def _transfer( # noqa: max-complexity=16
timeout=transfer.Transfer.DEFAULT_TIMEOUT,
content_dir=None,
):
if manifest_file and not path:
# If manifest_file is stdin, its name will be "<stdin>" and path
# will become "". This feels clumsy, but the resulting behaviour
# is reasonable.
manifest_dir = os.path.dirname(manifest_file.name)
path = os.path.dirname(manifest_dir)

content_manifest = ContentManifest()
use_content_manifest = False

if manifest_file:
content_manifest.read_file(manifest_file)
use_content_manifest = True
elif path and detect_manifest and not (node_ids or exclude_node_ids):
manifest_path = os.path.join(path, "content", "manifest.json")
if content_manifest.read(manifest_path):
use_content_manifest = True
logging.info("Using node_ids from {}".format(manifest_path))

if use_content_manifest:
node_ids = _node_ids_from_content_manifest(content_manifest, channel_id)
exclude_node_ids = None

try:
if not import_updates:
(
Expand Down Expand Up @@ -570,6 +627,11 @@ def _start_file_transfer(self, f, filetransfer):
return FILE_TRANSFERRED, data_transferred

def handle_async(self, *args, **options):
if options["manifest"] and (options["node_ids"] or options["exclude_node_ids"]):
raise CommandError(
"The --manifest option must not be combined with --node_ids or --exclude_node_ids."
)

try:
ChannelMetadata.objects.get(id=options["channel_id"])
except ValueError:
Expand All @@ -580,9 +642,11 @@ def handle_async(self, *args, **options):
raise CommandError(
"Must import a channel with importchannel before importing content."
)

if options["command"] == "network":
self.download_content(
options["channel_id"],
manifest_file=options["manifest"],
node_ids=options["node_ids"],
exclude_node_ids=options["exclude_node_ids"],
baseurl=options["baseurl"],
Expand All @@ -594,9 +658,16 @@ def handle_async(self, *args, **options):
content_dir=options["content_dir"],
)
elif options["command"] == "disk":
if not options["directory"] and not options["manifest"]:
raise CommandError(
"Either a directory or a manifest file must be provided."
)

self.copy_content(
options["channel_id"],
options["directory"],
manifest_file=options["manifest"],
detect_manifest=options["detect_manifest"],
drive_id=options["drive_id"],
node_ids=options["node_ids"],
exclude_node_ids=options["exclude_node_ids"],
Expand All @@ -612,3 +683,24 @@ def handle_async(self, *args, **options):
options["command"]
)
)


def _node_ids_from_content_manifest(content_manifest, channel_id):
node_ids = []

channel_metadata = ChannelMetadata.objects.get(id=channel_id)

for channel_version in content_manifest.get_channel_versions(channel_id):
if channel_version != channel_metadata.version:
logger.warning(
"Manifest entry for {channel_id} has a different version ({manifest_version}) than the installed channel ({local_version})".format(
channel_id=channel_id,
manifest_version=channel_version,
local_version=channel_metadata.version,
)
)
node_ids.extend(
content_manifest.get_include_node_ids(channel_id, channel_version)
)

return node_ids