Skip to content

Commit

Permalink
Merge the dev branch.
Browse files Browse the repository at this point in the history
  • Loading branch information
shaomengwang committed Oct 31, 2022
1 parent 33acb21 commit 0d8f2b5
Show file tree
Hide file tree
Showing 223 changed files with 13,003 additions and 542 deletions.
2 changes: 1 addition & 1 deletion connectors/connector-datahub/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-hbase/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-hive/hive-bridge/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors_hive</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-jdbc/connector-jdbc-derby/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connector_jdbc</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-jdbc/connector-jdbc-mysql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connector_jdbc</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-jdbc/connector-jdbc-sqlite/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connector_jdbc</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-jdbc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-kafka/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-odps/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-parquet-0.11/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/connector-redis/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_hadoop_fs</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/filesystem/hadoop-fs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_filesystem</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/filesystem/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink_connectors</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion connectors/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
10 changes: 5 additions & 5 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>alink</artifactId>
<groupId>com.alibaba.alink</groupId>
<version>1.5-SNAPSHOT</version>
<version>1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -204,7 +204,7 @@
<dependency>
<groupId>com.alibaba.alink</groupId>
<artifactId>shaded_protobuf_java</artifactId>
<version>3.6.1-0.5</version>
<version>3.6.1-0.6</version>
</dependency>

<!-- lazy-api dependency -->
Expand Down Expand Up @@ -270,7 +270,7 @@
<dependency>
<groupId>com.alibaba.alink</groupId>
<artifactId>shaded_flink_ai_extended_tf2</artifactId>
<version>0.3.0-SNAPSHOT-0.5</version>
<version>0.3.0-SNAPSHOT-0.6</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
Expand All @@ -282,7 +282,7 @@
<dependency>
<groupId>com.alibaba.alink</groupId>
<artifactId>shaded_tensorflow_core_api</artifactId>
<version>0.2.0-0.5</version>
<version>0.2.0-0.6</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
Expand All @@ -306,7 +306,7 @@
<dependency>
<groupId>com.alibaba.alink</groupId>
<artifactId>shaded_alink_metadata_def</artifactId>
<version>3.21.1-0.5</version>
<version>3.21.1-0.6</version>
</dependency>

<!-- unit test dependency -->
Expand Down
18 changes: 14 additions & 4 deletions core/src/main/java/com/alibaba/alink/common/MTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

Expand Down Expand Up @@ -199,28 +200,29 @@ public MTable select(int... colIndexes) {
public TableSummary summary(String... selectedColNames) {
TableSchema schema = getSchema();
TableSummarizer srt = new TableSummarizer(
//selectedColNames.length == 0 ? schema.getFieldNames() : selectedColNames,
schema.getFieldNames(),
TableUtil.findColIndicesWithAssertAndHint(schema, getCalcCols(schema, selectedColNames)), true);
for (Row row : this.rows) {
srt.visit(row);
}
return srt.toSummary();
return srt.toSummary(selectedColNames);
}

//summary for data from fromId line to endId line, include fromId and exclude endId.
public TableSummary subSummary(String[] selectedColNames, int fromId, int endId) {
TableSchema schema = getSchema();
TableSummarizer srt = new TableSummarizer(
// selectedColNames.length == 0 ? schema.getFieldNames() : selectedColNames,
schema.getFieldNames(),
TableUtil.findColIndicesWithAssertAndHint(schema, getCalcCols(schema, selectedColNames)), true);
for (int i = Math.max(fromId, 0); i < Math.min(endId, this.getNumRow()); i++) {
srt.visit(this.rows.get(i));
}
return srt.toSummary();
return srt.toSummary(selectedColNames);
}

/**
* exclude columns that are not supported types and not in selected columns
*/
private static String[] getCalcCols(TableSchema tableSchema, String[] selectedColNames) {
ArrayList <String> calcCols = new ArrayList <>();
String[] inColNames = selectedColNames.length == 0 ? tableSchema.getFieldNames() : selectedColNames;
Expand Down Expand Up @@ -276,6 +278,14 @@ public MTable sampleWithSize(int numSamples, Random rnd) {
return new MTable(q.stream().map(item -> item.f1).collect(Collectors.toList()), schemaStr);
}

public MTable sampleWithSizeReplacement(int numSamples, Random rnd) {
List <Row> chosenRows = IntStream.range(0, numSamples)
.map(d -> rnd.nextInt() % numSamples)
.mapToObj(rows::get)
.collect(Collectors.toList());
return new MTable(chosenRows, schemaStr);
}

/**
* Ascending order
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import com.alibaba.alink.common.dl.DLEnvConfig.Version;
import com.alibaba.alink.common.utils.JsonConverter;
import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.dataproc.ShuffleBatchOp;
import com.alibaba.alink.operator.batch.dataproc.RebalanceBatchOp;
import com.alibaba.alink.params.dl.BaseDLTableModelTrainParams;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
Expand Down Expand Up @@ -70,7 +70,7 @@ public T linkFrom(BatchOperator <?>... inputs) {
input = input.select(getSelectedCols());
}

input = new ShuffleBatchOp().linkFrom(input);
input = new RebalanceBatchOp().linkFrom(input);

ExternalFilesConfig externalFiles = getUserFiles()
.addFilePaths(resPyFiles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import com.google.gson.JsonSerializationContext;
import com.google.gson.JsonSerializer;

import java.io.Serializable;
import java.lang.reflect.Type;

public class BaseDaysFeaturesAdapter<T> implements JsonSerializer <T>, JsonDeserializer <T> {
Expand Down Expand Up @@ -42,7 +41,7 @@ public T deserialize(JsonElement json, Type typeOfT,
klass = Class.forName(className);
} catch (ClassNotFoundException e) {
e.printStackTrace();
throw new AkIllegalStateException(e.getMessage());
throw new AkIllegalStateException(String.format("Failed to find class %s", className), e);
}
return context.deserialize(jsonObject.get(INSTANCE), klass);
}
Expand Down
18 changes: 13 additions & 5 deletions core/src/main/java/com/alibaba/alink/common/fe/udaf/SumUdaf.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public SumData createAccumulator() {
}

public static class SumData {
public final double[][][] mat;
public final Double[][][] mat;
public final int rows;
public final int cols;
public final int numCondition;
Expand All @@ -140,11 +140,15 @@ public SumData(int numCondition, int rows, int cols) {
this.rows = rows;
this.cols = cols;
this.numCondition = numCondition;
this.mat = new double[numCondition][rows][cols];
this.mat = new Double[numCondition][rows][cols];
}

public void addData(int conditionIndex, int rowIndex, int windowIdx, double val) {
mat[conditionIndex][rowIndex][windowIdx] += val;
if (mat[conditionIndex][rowIndex][windowIdx] == null) {
mat[conditionIndex][rowIndex][windowIdx] = val;
} else {
mat[conditionIndex][rowIndex][windowIdx] += val;
}
}

public void retract(int conditionIndex, int rowIndex, int minLevel, double val) {
Expand All @@ -157,7 +161,7 @@ public void reset() {
for (int ic = 0; ic < numCondition; ic++) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
mat[ic][i][j] = 0;
mat[ic][i][j] = null;
}
}
}
Expand All @@ -167,7 +171,11 @@ public void merge(SumData data) {
for (int ic = 0; ic < numCondition; ic++) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
mat[ic][i][j] += data.mat[ic][i][j];
if (mat[ic][i][j] == null) {
mat[ic][i][j] = data.mat[ic][i][j];
} else if (mat[ic][i][j] != null && data.mat[ic][i][j] != null) {
mat[ic][i][j] += data.mat[ic][i][j];
}
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/com/alibaba/alink/common/io/xls/XlsReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.alibaba.alink.common.io.xls;

import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.fs.FileInputSplit;
import org.apache.flink.ml.api.misc.param.Params;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.types.Row;

public interface XlsReader {
Tuple2 <RichInputFormat <Row, FileInputSplit>, TableSchema> create(Params params);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.alibaba.alink.common.io.xls;

import com.alibaba.alink.common.exceptions.AkPluginErrorException;
import com.alibaba.alink.common.io.plugin.ClassLoaderContainer;
import com.alibaba.alink.common.io.plugin.ClassLoaderFactory;
import com.alibaba.alink.common.io.plugin.PluginDistributeCache;
import com.alibaba.alink.common.io.plugin.RegisterKey;
import com.alibaba.alink.common.io.plugin.TemporaryClassLoaderContext;

import java.util.Iterator;
import java.util.ServiceLoader;

public class XlsReaderClassLoader extends ClassLoaderFactory {
private final static String XLS_NAME = "xls";

public XlsReaderClassLoader(String version) {
super(new RegisterKey(XLS_NAME, version), PluginDistributeCache.createDistributeCache(XLS_NAME, version));
}

public static XlsReader create(XlsReaderClassLoader factory) {
ClassLoader classLoader = factory.create();

try (TemporaryClassLoaderContext context = TemporaryClassLoaderContext.of(classLoader)) {
Iterator <XlsReader> iter = ServiceLoader
.load(XlsReader.class, classLoader)
.iterator();
if (iter.hasNext()) {
return iter.next();
} else {
throw new AkPluginErrorException("Could not find the class factory in classloader.");
}
}
}

@Override
public ClassLoader create() {
return ClassLoaderContainer.getInstance().create(
registerKey,
distributeCache,
XlsReader.class,
xlsReader -> true,
descriptor -> registerKey.getVersion()
);
}
}
Loading

0 comments on commit 0d8f2b5

Please sign in to comment.