Skip to content

Commit

Permalink
[#1616] feat(fileset): Add gravitino hadoop file system support (#1700)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR proposes to add the code skeleton for gravitino hadoop file
system to proxy hdfs file system.

### Why are the changes needed?

Fix: #1616 

### How was this patch tested?

Add uts to cover the main interface method.

---------

Co-authored-by: xiaojiebao <xiaojiebao@xiaomi.com>
  • Loading branch information
xloya and xiaojiebao authored Mar 22, 2024
1 parent d2ed24f commit 11a423e
Show file tree
Hide file tree
Showing 11 changed files with 1,394 additions and 3 deletions.
5 changes: 3 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ subprojects {
options.locale = "en_US"

val projectName = project.name
if (projectName == "common" || projectName == "api" || projectName == "client-java") {
if (projectName == "common" || projectName == "api" || projectName == "client-java" || projectName == "filesystem-hadoop3") {
options {
(this as CoreJavadocOptions).addStringOption("Xwerror", "-quiet")
isFailOnError = true
Expand Down Expand Up @@ -641,7 +641,7 @@ tasks {
register("copySubprojectDependencies", Copy::class) {
subprojects.forEach() {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") && it.name != "trino-connector" &&
!it.name.startsWith("client") && !it.name.startsWith("filesystem") && it.name != "trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" && it.name != "spark-connector"
) {
from(it.configurations.runtimeClasspath)
Expand All @@ -654,6 +654,7 @@ tasks {
subprojects.forEach() {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") &&
!it.name.startsWith("filesystem") &&
it.name != "trino-connector" &&
it.name != "spark-connector" &&
it.name != "integration-test" &&
Expand Down
31 changes: 31 additions & 0 deletions clients/filesystem-hadoop3-runtime/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar

plugins {
`maven-publish`
id("java")
alias(libs.plugins.shadow)
}

dependencies {
implementation(project(":clients:filesystem-hadoop3"))
}

tasks.withType<ShadowJar>(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.runtimeClasspath.get())
archiveClassifier.set("")

// Relocate dependencies to avoid conflicts
relocate("com.google", "com.datastrato.gravitino.shaded.com.google")
relocate("com.github.benmanes.caffeine", "com.datastrato.gravitino.shaded.com.github.benmanes.caffeine")
}

tasks.jar {
dependsOn(tasks.named("shadowJar"))
archiveClassifier.set("empty")
}
35 changes: 35 additions & 0 deletions clients/filesystem-hadoop3/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

plugins {
`maven-publish`
id("java")
id("idea")
}

dependencies {
compileOnly(libs.hadoop3.common)
implementation(project(":clients:client-java-runtime", configuration = "shadow"))
implementation(libs.caffeine)

testImplementation(libs.hadoop3.common)
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.mockito.core)
testImplementation(libs.mockserver.netty) {
exclude("com.google.guava", "guava")
}
testRuntimeOnly(libs.junit.jupiter.engine)
}

tasks.build {
dependsOn("javadoc")
}

tasks.javadoc {
dependsOn(":clients:client-java-runtime:javadoc")
source = sourceSets["main"].allJava +
project(":clients:client-java-runtime").sourceSets["main"].allJava
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/
package com.datastrato.gravitino.filesystem.hadoop3;

import com.datastrato.gravitino.NameIdentifier;
import com.datastrato.gravitino.file.Fileset;
import com.datastrato.gravitino.shaded.com.google.common.base.Preconditions;
import com.google.common.base.Objects;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
* A context object that holds the information about the fileset and the file system which used in
* the {@link GravitinoVirtualFileSystem}'s operations.
*/
class FilesetContext {
private NameIdentifier identifier;
private Fileset fileset;
private FileSystem fileSystem;
private Path actualPath;

private FilesetContext() {}

public NameIdentifier getIdentifier() {
return identifier;
}

public Fileset getFileset() {
return fileset;
}

public FileSystem getFileSystem() {
return fileSystem;
}

public Path getActualPath() {
return actualPath;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof FilesetContext)) return false;
FilesetContext that = (FilesetContext) o;
return Objects.equal(getIdentifier(), that.getIdentifier())
&& Objects.equal(getFileset(), that.getFileset())
&& Objects.equal(getFileSystem(), that.getFileSystem())
&& Objects.equal(getActualPath(), that.getActualPath());
}

@Override
public int hashCode() {
return Objects.hashCode(getIdentifier(), getFileset(), getFileSystem(), getActualPath());
}

public static Builder builder() {
return new Builder();
}

/** A builder class for {@link FilesetContext}. */
public static class Builder {

private final FilesetContext context;

private Builder() {
this.context = new FilesetContext();
}

public Builder withIdentifier(NameIdentifier identifier) {
context.identifier = identifier;
return this;
}

public Builder withFileSystem(FileSystem fileSystem) {
context.fileSystem = fileSystem;
return this;
}

public Builder withFileset(Fileset fileset) {
context.fileset = fileset;
return this;
}

public Builder withActualPath(Path actualPath) {
context.actualPath = actualPath;
return this;
}

public FilesetContext build() {
Preconditions.checkArgument(context.identifier != null, "Identifier is required");
Preconditions.checkArgument(context.fileset != null, "Fileset is required");
Preconditions.checkArgument(context.fileSystem != null, "FileSystem is required");
Preconditions.checkArgument(context.actualPath != null, "ActualPath is required");
return context;
}
}
}
Loading

0 comments on commit 11a423e

Please sign in to comment.