forked from nv-morpheus/Morpheus
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Limit which lfs assets are pulled by default (nv-morpheus#139)
* Adds helper script `scripts/fetch_data.py` to pull additional assets. * Update `README.md` & `CONTRIBUTING.md` docs to reflect changes * Updated the hammah unittest to no longer depend on training data. * ~~Pinned Neo dep to 22.04.00a to work-around incompatible neo version~~ ``` $ scripts/fetch_data.py -h usage: Fetches data not included in the repository by default [-h] {fetch,check} ... optional arguments: -h, --help show this help message and exit Subcommands: valid subcommands {fetch,check} fetch Fetch datasets check Check download status of large files. Displays a True/False whether all files are downloaded. ``` Closes nv-morpheus#88 Authors: - David Gardner (https://github.com/dagardner-nv) - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Bartley Richardson (https://github.com/BartleyR) - Michael Demoret (https://github.com/mdemoret-nv) URL: nv-morpheus#139
- Loading branch information
1 parent
55edc6a
commit 1a1be1a
Showing
8 changed files
with
179 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[lfs] | ||
fetchinclude = morpheus/data/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#!/usr/bin/env python3 | ||
# SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import argparse | ||
import logging | ||
import os | ||
import subprocess | ||
import sys | ||
import time | ||
|
||
LFS_DATASETS = { | ||
'all': '**', | ||
'examples': 'examples/**', | ||
'models': 'models/**', | ||
'tests': 'tests/**', | ||
'validation': 'models/datasets/validation-data/**' | ||
} | ||
|
||
|
||
def lfsPull(include_paths, poll_interval=0.1): | ||
""" | ||
Performs a git lfs pull. | ||
""" | ||
cmd = 'git lfs pull -I "{}"'.format(','.join(include_paths)) | ||
env = os.environ.copy() | ||
|
||
# Instruct git lfs to not supress progress output. Fetching the models can | ||
# take over a minute to complete, so we want our users to receive feedback. | ||
env['GIT_LFS_FORCE_PROGRESS'] = '1' | ||
popen = subprocess.Popen(cmd, | ||
env=env, | ||
shell=True, | ||
universal_newlines=True, | ||
stderr=subprocess.STDOUT, | ||
stdout=subprocess.PIPE) | ||
|
||
outpipe = popen.stdout | ||
returncode = None | ||
all_out = [] | ||
while returncode is None: | ||
time.sleep(poll_interval) | ||
out = outpipe.readline() | ||
if out.rstrip() != '': | ||
logging.info(out.rstrip()) | ||
all_out.append(out) | ||
|
||
returncode = popen.poll() | ||
|
||
# Check if we have any additional output written to the pipe before our last poll | ||
out = outpipe.read() | ||
if out != '': | ||
all_out.append(out) | ||
|
||
output = ''.join(all_out).rstrip("\n") | ||
if returncode != 0: | ||
logging.error(output) | ||
raise subprocess.CalledProcessError(returncode=returncode, cmd=cmd, output=output) | ||
|
||
return output | ||
|
||
|
||
def lfsCheck(list_all=False): | ||
output = subprocess.check_output('git lfs ls-files', shell=True, universal_newlines=True) | ||
output_lines = output.splitlines() | ||
|
||
# Output lines are in the format of: | ||
# <oid> [-|*] <file name> | ||
# where '-' indicates a file pointer and '*' indicates a downloaded file | ||
# https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-ls-files.1.ronn | ||
missing_files = [] | ||
for file_status in output_lines: | ||
parts = file_status.split() | ||
downloaded = parts[1] == '*' | ||
filename = ' '.join(parts[2:]) | ||
|
||
if not downloaded: | ||
missing_files.append(filename) | ||
|
||
if list_all: | ||
# the join on 2: is needed to handle file names that contain a blank space | ||
logging.info('%s - %s', filename, downloaded) | ||
|
||
if not list_all: | ||
if len(missing_files): | ||
logging.error("Missing the following LFS files:\n%s", "\n".join(missing_files)) | ||
else: | ||
logging.info("All LFS files downloaded") | ||
|
||
if len(missing_files): | ||
sys.exit(1) | ||
|
||
|
||
def parse_args(): | ||
argparser = argparse.ArgumentParser("Fetches data not included in the repository by default") | ||
subparsers = argparser.add_subparsers(title='Subcommands', | ||
description='valid subcommands', | ||
required=True, | ||
dest='subcommand') | ||
|
||
fetch_parser = subparsers.add_parser('fetch', help='Fetch datasets') | ||
fetch_parser.add_argument("data_set", nargs='*', choices=list(LFS_DATASETS.keys()), help="Data set to fetch") | ||
|
||
check_parser = subparsers.add_parser('check', | ||
help=('Check download status of large files. Exits with a status of 0 if all ' | ||
'large files have been downloaded, 1 otherwise.')) | ||
check_parser.add_argument("-l", | ||
"--list", | ||
action="store_true", | ||
default=False, | ||
dest='list_all', | ||
help="List all missing files") | ||
|
||
args = argparser.parse_args() | ||
return args | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
logging.basicConfig(level=logging.INFO, format="%(message)s") | ||
|
||
if args.subcommand == 'fetch': | ||
include_paths = [LFS_DATASETS[p] for p in args.data_set] | ||
lfsPull(include_paths) | ||
else: | ||
lfsCheck(list_all=args.list_all) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters