Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(backup): Add restore indices and restore backup tasks #2779

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ project.ext.externalDependency = [
'guice': 'com.google.inject:guice:4.2.2',
'guava': 'com.google.guava:guava:27.0.1-jre',
'h2': 'com.h2database:h2:1.4.196',
'hadoopClient': 'org.apache.hadoop:hadoop-client:3.1.0',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm: Have we run any sec scans since adding these deps?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good

'hibernateCore': 'org.hibernate:hibernate-core:5.2.16.Final',
'httpClient': 'org.apache.httpcomponents:httpclient:4.5.9',
'iStackCommons': 'com.sun.istack:istack-commons-runtime:4.0.1',
Expand All @@ -87,6 +88,7 @@ project.ext.externalDependency = [
'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11',
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.1',
'parseqTest': 'com.linkedin.parseq:parseq:3.0.7:test',
'parquet': 'org.apache.parquet:parquet-avro:1.12.0',
'picocli': 'info.picocli:picocli:4.5.0',
'playCache': 'com.typesafe.play:play-cache_2.11:2.6.18',
'playDocs': 'com.typesafe.play:play-docs_2.11:2.6.18',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ spec:
name: "{{ .password.secretRef }}"
key: "{{ .password.secretKey }}"
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
- name: NEO4J_HOST
value: "{{ .Values.global.neo4j.host }}"
- name: NEO4J_URI
Expand All @@ -96,6 +99,7 @@ spec:
secretKeyRef:
name: "{{ .Values.global.neo4j.password.secretRef }}"
key: "{{ .Values.global.neo4j.password.secretKey }}"
{{- end }}
{{- with .Values.datahubUpgrade.extraEnvs }}
{{- toYaml . | nindent 16 }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{{- if .Values.datahubUpgrade.enabled -}}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct? Why does this depend on datahubUpgrade flag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All k8s files under datahub-upgrade are all enabled by the same flag

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Enabling it doesn't do much until you actually run the job since these cronjobs are suspended by default

# Job template for restoring indices by sending MAE corresponding to all entities in the local db
# Creates a suspended cronJob that you can use to create an adhoc job when ready to run clean up.
# Run the following command to do so
# kubectl create job --from=cronjob/<<release-name>>-datahub-restore-indices-job-template datahub-restore-indices-job
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: {{ .Release.Name }}-datahub-restore-indices-job-template
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
spec:
schedule: "* * * * *"
suspend: true
jobTemplate:
spec:
template:
spec:
{{- with .Values.global.hostAliases }}
hostAliases:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.datahubUpgrade.serviceAccount }}
serviceAccountName: {{ . }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
{{- with .Values.datahubUpgrade.extraVolumes }}
{{- toYaml . | nindent 12 }}
{{- end }}
restartPolicy: Never
securityContext:
runAsUser: 1000
fsGroup: 1000
containers:
- name: datahub-upgrade-job
image: "{{ .Values.datahubUpgrade.image.repository }}:{{ .Values.datahubUpgrade.image.tag }}"
args:
- "-u"
- "RestoreIndices"
env:
- name: DATAHUB_GMS_HOST
value: {{ printf "%s-%s" .Release.Name "datahub-gms" }}
- name: DATAHUB_GMS_PORT
value: "{{ .Values.global.datahub.gms.port }}"
- name: DATAHUB_MAE_CONSUMER_HOST
value: {{ printf "%s-%s" .Release.Name "datahub-mae-consumer" }}
- name: DATAHUB_MAE_CONSUMER_PORT
value: "{{ .Values.global.datahub.mae_consumer.port }}"
- name: EBEAN_DATASOURCE_USERNAME
value: "{{ .Values.global.sql.datasource.username }}"
- name: EBEAN_DATASOURCE_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .Values.global.sql.datasource.password.secretRef }}"
key: "{{ .Values.global.sql.datasource.password.secretKey }}"
- name: EBEAN_DATASOURCE_HOST
value: "{{ .Values.global.sql.datasource.host }}"
- name: EBEAN_DATASOURCE_URL
value: "{{ .Values.global.sql.datasource.url }}"
- name: EBEAN_DATASOURCE_DRIVER
value: "{{ .Values.global.sql.datasource.driver }}"
- name: KAFKA_BOOTSTRAP_SERVER
value: "{{ .Values.global.kafka.bootstrap.server }}"
- name: KAFKA_SCHEMAREGISTRY_URL
value: "{{ .Values.global.kafka.schemaregistry.url }}"
- name: ELASTICSEARCH_HOST
value: {{ .Values.global.elasticsearch.host | quote }}
- name: ELASTICSEARCH_PORT
value: {{ .Values.global.elasticsearch.port | quote }}
{{- with .Values.global.elasticsearch.useSSL }}
- name: ELASTICSEARCH_USE_SSL
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.auth }}
- name: ELASTICSEARCH_USERNAME
value: {{ .username }}
- name: ELASTICSEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .password.secretRef }}"
key: "{{ .password.secretKey }}"
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
- name: NEO4J_HOST
value: "{{ .Values.global.neo4j.host }}"
- name: NEO4J_URI
value: "{{ .Values.global.neo4j.uri }}"
- name: NEO4J_USERNAME
value: "{{ .Values.global.neo4j.username }}"
- name: NEO4J_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .Values.global.neo4j.password.secretRef }}"
key: "{{ .Values.global.neo4j.password.secretKey }}"
{{- end }}
{{- with .Values.datahubUpgrade.extraEnvs }}
{{- toYaml . | nindent 16 }}
{{- end }}
volumeMounts:
{{- with .Values.datahubUpgrade.extraVolumeMounts }}
{{- toYaml . | nindent 16 }}
{{- end }}
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 300m
memory: 256Mi
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ spec:
name: "{{ .password.secretRef }}"
key: "{{ .password.secretKey }}"
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
- name: NEO4J_HOST
value: "{{ .Values.global.neo4j.host }}"
- name: NEO4J_URI
Expand All @@ -102,6 +105,7 @@ spec:
secretKeyRef:
name: "{{ .Values.global.neo4j.password.secretRef }}"
key: "{{ .Values.global.neo4j.password.secretKey }}"
{{- end }}
{{- with .Values.datahubUpgrade.extraEnvs }}
{{- toYaml . | nindent 12 }}
{{- end }}
Expand Down
3 changes: 3 additions & 0 deletions datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ dependencies {
compile project(':metadata-io')
compile project(':gms:impl')
compile project(':gms:factories')
compile project(':gms:client')

compile externalDependency.javaxInject
compile externalDependency.hadoopClient
compile externalDependency.lombok
compile externalDependency.picocli
compile externalDependency.parquet
compile externalDependency.springBeans
compile externalDependency.springBootAutoconfigure
compile externalDependency.springCore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager;
import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade;
import java.util.List;
import com.linkedin.datahub.upgrade.nocodecleanup.NoCodeCleanupUpgrade;
import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup;
import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices;
import java.util.List;
import javax.inject.Inject;
import javax.inject.Named;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -34,10 +36,20 @@ private static final class Args {
@Named("noCodeCleanup")
private NoCodeCleanupUpgrade noCodeCleanup;

@Inject
@Named("restoreIndices")
private RestoreIndices restoreIndices;

@Inject
@Named("restoreBackup")
private RestoreBackup restoreBackup;

@Override
public void run(String... cmdLineArgs) {
_upgradeManager.register(noCodeUpgrade);
_upgradeManager.register(noCodeCleanup);
_upgradeManager.register(restoreIndices);
_upgradeManager.register(restoreBackup);

final Args args = new Args();
new CommandLine(args).setCaseInsensitiveEnumValuesAllowed(true).parseArgs(cmdLineArgs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
@SpringBootApplication(exclude = {RestClientAutoConfiguration.class}, scanBasePackages = {
"com.linkedin.gms.factory.common", "com.linkedin.datahub.upgrade.config", "com.linkedin.gms.factory.entity"})
"com.linkedin.gms.factory.common", "com.linkedin.gms.factory.search", "com.linkedin.datahub.upgrade.config", "com.linkedin.gms.factory.entity"})
public class UpgradeCliApplication {
public static void main(String[] args) {
new SpringApplicationBuilder(UpgradeCliApplication.class, UpgradeCli.class).web(WebApplicationType.NONE).run(args);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.function.Function;


/**
* Represents a single executable step in an {@link Upgrade}.
*/
Expand Down Expand Up @@ -31,4 +32,10 @@ default boolean isOptional() {
return false;
}

/**
* Returns whether or not to skip the step based on the UpgradeContext
*/
default boolean skip(UpgradeContext context) {
Copy link
Collaborator

@jjoyce0510 jjoyce0510 Jun 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Why do we need this API if the step can simply check the context and no-op inside its "execute" method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I wanted to mention in the logs that it was skipped.

Right now it says that DeleteAspectV2TableStep ran successfully which to me implies that the table was deleted. But it actually wasn't, and was rather just skipped. I think it's good to make this explicit.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay makes sense.. agree its useful to know

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package com.linkedin.datahub.upgrade.common.steps;

import com.linkedin.datahub.upgrade.UpgradeContext;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.UpgradeStepResult;
import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult;
import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade;
import com.linkedin.metadata.graph.GraphService;
import java.util.function.Function;


public class ClearGraphServiceStep implements UpgradeStep {

private final String deletePattern = ".*";

private final GraphService _graphService;
private final boolean _alwaysRun;

public ClearGraphServiceStep(final GraphService graphService, final boolean alwaysRun) {
_graphService = graphService;
_alwaysRun = alwaysRun;
}

@Override
public String id() {
return "ClearGraphServiceStep";
}

@Override
public boolean skip(UpgradeContext context) {
if (_alwaysRun) {
return false;
}
if (context.parsedArgs().containsKey(NoCodeUpgrade.CLEAN_ARG_NAME)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a "cleanup has been requested" report line here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That log is added in the execute function. I can move it to here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no lets just keep that

return false;
}
context.report().addLine("Cleanup has not been requested.");
return true;
}

@Override
public int retryCount() {
return 1;
}

@Override
public Function<UpgradeContext, UpgradeStepResult> executable() {
return (context) -> {
try {
_graphService.clear();
} catch (Exception e) {
context.report().addLine(String.format("Failed to clear graph indices: %s", e.toString()));
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.FAILED);
}
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED);
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.linkedin.datahub.upgrade.common.steps;

import com.linkedin.datahub.upgrade.UpgradeContext;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.UpgradeStepResult;
import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult;
import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade;
import com.linkedin.metadata.search.SearchService;
import java.util.function.Function;


public class ClearSearchServiceStep implements UpgradeStep {

private final SearchService _searchService;
private final boolean _alwaysRun;

public ClearSearchServiceStep(final SearchService searchService, final boolean alwaysRun) {
_searchService = searchService;
_alwaysRun = alwaysRun;
}

@Override
public String id() {
return "ClearSearchServiceStep";
}

@Override
public boolean skip(UpgradeContext context) {
if (_alwaysRun) {
return false;
}
if (context.parsedArgs().containsKey(NoCodeUpgrade.CLEAN_ARG_NAME)) {
return false;
}
context.report().addLine("Cleanup has not been requested.");
return true;
}

@Override
public int retryCount() {
return 1;
}

@Override
public Function<UpgradeContext, UpgradeStepResult> executable() {
return (context) -> {
try {
_searchService.clear();
} catch (Exception e) {
context.report().addLine(String.format("Failed to clear search service: %s", e.toString()));
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.FAILED);
}
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED);
};
}
}
Loading