Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest): SageMaker jobs and models #2830

Merged
merged 71 commits into from
Jul 8, 2021
Merged
Changes from 1 commit
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
5fbcd31
Add get_model utils
kevinhu Jun 30, 2021
0c5aee0
Add job listing commands
kevinhu Jun 30, 2021
459a5fc
Init sagemaker processors
kevinhu Jun 30, 2021
98ca20b
Add process_training_job
kevinhu Jun 30, 2021
df90714
Add tuning jobs
kevinhu Jun 30, 2021
6eea1b3
Create SageMakerJob intermediate
kevinhu Jun 30, 2021
bf49145
Reorganize URN generators
kevinhu Jun 30, 2021
8763515
Construct arn-name translator
kevinhu Jul 1, 2021
233c00f
Refactor SageMaker job processors into class
kevinhu Jul 1, 2021
a089b2d
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-mo…
kevinhu Jul 1, 2021
4cb0e5a
Switch to tuple-indexed ARNs
kevinhu Jul 1, 2021
02d3437
Add input/outputs for process_transform_job
kevinhu Jul 1, 2021
6807382
Comment out unsupported aspects
kevinhu Jul 1, 2021
c282a6b
process_labeling_job datasets
kevinhu Jul 1, 2021
b05e66b
process_training_job datasets
kevinhu Jul 1, 2021
a51b79f
Init status enums
kevinhu Jul 1, 2021
386f46d
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-mo…
kevinhu Jul 1, 2021
d003991
Update models
kevinhu Jul 1, 2021
925b72f
Add unknown status enum
kevinhu Jul 1, 2021
854bf4b
Revise source report
kevinhu Jul 1, 2021
82d4425
Init job stubs
kevinhu Jul 2, 2021
b99025f
Init model stubs
kevinhu Jul 2, 2021
abaab8b
Add list-models stub
kevinhu Jul 2, 2021
5abd576
Set names and ARNs for job stubs
kevinhu Jul 2, 2021
77e9b9e
Add list-jobs stubs
kevinhu Jul 2, 2021
67442aa
Refactor stubbed job names and arns
kevinhu Jul 2, 2021
ad02711
Refactor job types
kevinhu Jul 2, 2021
8cd750e
Refactor job MCE constructor
kevinhu Jul 2, 2021
2cd0b85
Add feast and sagemaker dataplatforms
kevinhu Jul 2, 2021
77209d8
Furnish S3 paths in stubs
kevinhu Jul 2, 2021
bf63192
Add stubber responses
kevinhu Jul 2, 2021
856b251
Refactor and fix stub validation errors
kevinhu Jul 2, 2021
f570003
Setup model stubs
kevinhu Jul 2, 2021
a59ab89
Refactor feature group yielding
kevinhu Jul 2, 2021
2432357
Set up model ingestion
kevinhu Jul 2, 2021
1502e01
Parse in model creation times
kevinhu Jul 2, 2021
996c818
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-mo…
kevinhu Jul 2, 2021
bfb46eb
Regenerate snapshots
kevinhu Jul 2, 2021
5449c7e
Move custom properties
kevinhu Jul 2, 2021
44b18fc
Generate model custom properties
kevinhu Jul 2, 2021
0fc259d
Fix job stubbing order
kevinhu Jul 2, 2021
f72d776
Working jobs ingestion
kevinhu Jul 2, 2021
7f30e86
Add custom properties
kevinhu Jul 2, 2021
3559015
Switch to sets for i/o jobs
kevinhu Jul 2, 2021
d204b76
Ingest input jobs
kevinhu Jul 2, 2021
ef8568b
Add job filtering options
kevinhu Jul 3, 2021
0ed1d73
Fix jobs filter and sort datasets
kevinhu Jul 3, 2021
e4797d0
Ingest datasets
kevinhu Jul 3, 2021
884631c
Ingest custom dataset properties
kevinhu Jul 3, 2021
d193e67
Typo fixes
kevinhu Jul 3, 2021
75f174a
Refactor reports
kevinhu Jul 6, 2021
8def468
Refactor out s3 URN constructor
kevinhu Jul 6, 2021
cba59c2
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-mo…
kevinhu Jul 6, 2021
27e921a
Remove unused
kevinhu Jul 6, 2021
ddb696a
Add env to models
kevinhu Jul 6, 2021
23c7541
Add umbrella flow and fix job envs
kevinhu Jul 6, 2021
bc80bff
Fix edge packaging stub
kevinhu Jul 6, 2021
24ab430
Set model sort order
kevinhu Jul 6, 2021
571e2b6
Comments for jobs
kevinhu Jul 6, 2021
e10592d
Fix time zones in stubs
kevinhu Jul 6, 2021
3584cfc
Create dataflow for each job
kevinhu Jul 6, 2021
4c44910
Set flows and migrate from azkaban enum
kevinhu Jul 7, 2021
8018022
Update rest sink test
kevinhu Jul 7, 2021
6675dd5
Set browse paths
kevinhu Jul 7, 2021
8e8eecd
Revert file to rest recipe
kevinhu Jul 7, 2021
d3b2e17
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-mo…
kevinhu Jul 7, 2021
5331ba0
Browse paths for feature tables
kevinhu Jul 8, 2021
83fb6fd
Refactor make_s3_urn to aws_common
kevinhu Jul 8, 2021
76c7c7e
Add comment for deprecated azkaban types
kevinhu Jul 8, 2021
df56a61
Resolve merge conflict
kevinhu Jul 8, 2021
f98553b
Update schema_classes
kevinhu Jul 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Set names and ARNs for job stubs
  • Loading branch information
kevinhu committed Jul 2, 2021
commit 5abd576af27769f7e34d4d2794c23aec5dbce517
33 changes: 16 additions & 17 deletions metadata-ingestion/tests/unit/test_sagemaker_source_stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@
}

describe_auto_ml_job_response = {
"AutoMLJobName": "string",
"AutoMLJobArn": "string",
"AutoMLJobName": "an-auto-ml-job",
"AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job",
"InputDataConfig": [
{
"DataSource": {
Expand Down Expand Up @@ -226,8 +226,8 @@
}

describe_compilation_job_response = {
"CompilationJobName": "string",
"CompilationJobArn": "string",
"CompilationJobName": "a-compilation-job",
"CompilationJobArn": "arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job",
"CompilationJobStatus": "INPROGRESS", # 'INPROGRESS'|'COMPLETED'|'FAILED'|'STARTING'|'STOPPING'|'STOPPED'
"CompilationStartTime": datetime(2015, 1, 1),
"CompilationEndTime": datetime(2015, 1, 1),
Expand Down Expand Up @@ -267,8 +267,8 @@
}

describe_edge_packaging_job_response = {
"EdgePackagingJobArn": "string",
"EdgePackagingJobName": "string",
"EdgePackagingJobArn": "an-edge-packaging-job",
"EdgePackagingJobName": "arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job",
"CompilationJobName": "string",
"ModelName": "string",
"ModelVersion": "string",
Expand All @@ -294,10 +294,9 @@
},
}


describe_hyper_parameter_tuning_job_response = {
"HyperParameterTuningJobName": "string",
"HyperParameterTuningJobArn": "string",
"HyperParameterTuningJobName": "a-hyper-parameter-tuning-job",
"HyperParameterTuningJobArn": "arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job",
"HyperParameterTuningJobConfig": {
"Strategy": "Bayesian", # 'Bayesian'|'Random'
"HyperParameterTuningJobObjective": {
Expand Down Expand Up @@ -589,8 +588,8 @@
"CreationTime": datetime(2015, 1, 1),
"LastModifiedTime": datetime(2015, 1, 1),
"JobReferenceCode": "string",
"LabelingJobName": "string",
"LabelingJobArn": "string",
"LabelingJobName": "a-labeling-job",
"LabelingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job",
"LabelAttributeName": "string",
"InputConfig": {
"DataSource": {
Expand Down Expand Up @@ -703,7 +702,7 @@
],
"KmsKeyId": "string",
},
"ProcessingJobName": "string",
"ProcessingJobName": "a-processing-job",
"ProcessingResources": {
"ClusterConfig": {
"InstanceCount": 123,
Expand Down Expand Up @@ -741,7 +740,7 @@
"TrialName": "string",
"TrialComponentDisplayName": "string",
},
"ProcessingJobArn": "string",
"ProcessingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job",
"ProcessingJobStatus": "InProgress", # 'InProgress'|'Completed'|'Failed'|'Stopping'|'Stopped'
"ExitMessage": "string",
"FailureReason": "string",
Expand All @@ -755,8 +754,8 @@
}

describe_training_job_response = {
"TrainingJobName": "string",
"TrainingJobArn": "string",
"TrainingJobName": "a-training-job",
"TrainingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job",
"TuningJobArn": "string",
"LabelingJobArn": "string",
"AutoMLJobArn": "string",
Expand Down Expand Up @@ -903,8 +902,8 @@
}

describe_transform_job_response = {
"TransformJobName": "string",
"TransformJobArn": "string",
"TransformJobName": "a-transform-job",
"TransformJobArn": "arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job",
"TransformJobStatus": "InProgress",
# 'InProgress' |'Completed'|'Failed'|'Stopping'|'Stopped'
"FailureReason": "string",
Expand Down