diff --git a/common/pom.xml b/common/pom.xml
index 2f2a7211e4..265c56739b 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -118,6 +118,10 @@
${janino-version}
test
+
+ edu.ucar
+ cdm-core
+
src/main/java
diff --git a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
index 0cc65b5d11..e19df957b9 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
@@ -15,6 +15,7 @@
import org.apache.sedona.common.FunctionsGeoTools;
import org.apache.sedona.common.raster.inputstream.ByteArrayImageInputStream;
+import org.apache.sedona.common.raster.netcdf.NetCdfReader;
import org.apache.sedona.common.utils.RasterUtils;
import org.geotools.coverage.grid.GridCoverage2D;
import org.geotools.coverage.grid.GridEnvelope2D;
@@ -28,6 +29,7 @@
import org.geotools.geometry.jts.JTS;
import org.geotools.geometry.jts.ReferencedEnvelope;
import org.geotools.process.vector.VectorToRasterProcess;
+import org.geotools.referencing.CRS;
import org.geotools.referencing.crs.DefaultEngineeringCRS;
import org.geotools.referencing.operation.transform.AffineTransform2D;
import org.geotools.util.factory.Hints;
@@ -38,13 +40,15 @@
import org.opengis.referencing.crs.CoordinateReferenceSystem;
import org.opengis.referencing.datum.PixelInCell;
import org.opengis.referencing.operation.MathTransform;
+import ucar.nc2.NetcdfFile;
+import ucar.nc2.NetcdfFiles;
import javax.media.jai.RasterFactory;
import java.awt.image.WritableRaster;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
+import java.util.Map;
public class RasterConstructors
{
@@ -58,6 +62,25 @@ public static GridCoverage2D fromGeoTiff(byte[] bytes) throws IOException {
return geoTiffReader.read(null);
}
+ public static GridCoverage2D fromNetCDF(byte[] bytes, String variableName, String lonDimensionName, String latDimensionName) throws IOException, FactoryException {
+ NetcdfFile netcdfFile = openNetCdfBytes(bytes);
+ return NetCdfReader.getRaster(netcdfFile, variableName, latDimensionName, lonDimensionName);
+ }
+
+ public static GridCoverage2D fromNetCDF(byte[] bytes, String recordVariableName) throws IOException, FactoryException {
+ NetcdfFile netcdfFile = openNetCdfBytes(bytes);
+ return NetCdfReader.getRaster(netcdfFile, recordVariableName);
+ }
+
+ public static String getRecordInfo(byte[] bytes) throws IOException {
+ NetcdfFile netcdfFile = openNetCdfBytes(bytes);
+ return NetCdfReader.getRecordInfo(netcdfFile);
+ }
+
+ private static NetcdfFile openNetCdfBytes(byte[] bytes) throws IOException {
+ return NetcdfFiles.openInMemory("", bytes);
+ }
+
/**
* Returns a raster that is converted from the geometry provided.
* @param geom The geometry to convert
@@ -310,7 +333,31 @@ public static GridCoverage2D makeEmptyRaster(int numBand, String bandDataType, i
new GridEnvelope2D(0, 0, widthInPixel, heightInPixel),
PixelInCell.CELL_CORNER,
transform, crs, null);
- return RasterUtils.create(raster, gridGeometry, null);
+ return RasterUtils.create(raster, gridGeometry, null, null);
+ }
+
+ public static GridCoverage2D makeNonEmptyRaster(int numBand, int widthInPixel, int heightInPixel, double upperLeftX, double upperLeftY, double scaleX, double scaleY, double skewX, double skewY, int srid, double[][] data, Map> properties, Double noDataValue, PixelInCell anchor) throws FactoryException {
+ CoordinateReferenceSystem crs;
+ if (srid == 0) {
+ crs = DefaultEngineeringCRS.GENERIC_2D;
+ } else {
+ // Create the CRS from the srid
+ // Longitude first, Latitude second
+ crs = CRS.decode("EPSG:" + srid, true);
+ }
+
+ // Create a new empty raster
+ WritableRaster raster = RasterFactory.createBandedRaster(5, widthInPixel, heightInPixel, numBand, null); //create a raster with double values
+ for (int i = 0; i < numBand; i++) {
+ raster.setSamples(0, 0, widthInPixel, heightInPixel, i, data[i]);
+ }
+ //raster.setPixels(0, 0, widthInPixel, heightInPixel, data);
+ MathTransform transform = new AffineTransform2D(scaleX, skewY, skewX, scaleY, upperLeftX, upperLeftY);
+ GridGeometry2D gridGeometry = new GridGeometry2D(
+ new GridEnvelope2D(0, 0, widthInPixel, heightInPixel),
+ anchor,
+ transform, crs, null);
+ return RasterUtils.create(raster, gridGeometry, null, noDataValue, properties);
}
public static GridCoverage2D makeNonEmptyRaster(int numBands, String bandDataType, int widthInPixel, int heightInPixel, double upperLeftX, double upperLeftY, double scaleX, double scaleY, double skewX, double skewY, int srid, double[][] rasterValues) {
diff --git a/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java
new file mode 100644
index 0000000000..e05155352c
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfConstants.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.raster.netcdf;
+
+public class NetCdfConstants {
+ public static final String VALID_MIN = "valid_min";
+ public static final String VALID_MAX = "valid_max";
+ public static final String MISSING_VALUE = "missing_value";
+ public static final String VALID_RANGE = "valid_range";
+ public static final String SCALE_FACTOR = "scale_factor";
+ public static final String ADD_OFFSET = "add_offset";
+ public static final String LONG_NAME = "long_name";
+ public static final String UNITS = "units";
+ public static final String GLOBAL_ATTR_PREFIX = "GLOBAL_ATTRIBUTES";
+ public static final String VAR_ATTR_PREFIX = "VAR_";
+ public static final String BAND_ATTR_PREFIX = "BAND_";
+
+ public static final String INSUFFICIENT_DIMENSIONS_VARIABLE = "Provided variable has less than two dimensions";
+
+ public static final String NON_NUMERIC_VALUE = "An attribute expected to have numeric values does not have numeric value";
+
+ public static final String INVALID_LON_NAME = "Provided longitude variable short name is invalid";
+
+ public static final String INVALID_LAT_NAME = "Provided latitude variable short name is invalid";
+
+ public static final String INVALID_LAT_SHAPE = "Shape of latitude variable is supposed to be 1";
+
+ public static final String INVALID_LON_SHAPE = "Shape of longitude variable is supposed to be 1";
+
+ public static final String INVALID_RECORD_NAME = "Provided record variable short name is invalid";
+
+ public static final String COORD_VARIABLE_NOT_FOUND = "Corresponding coordinate variable not found for dimension %s";
+
+ public static final String COORD_IDX_NOT_FOUND = "Given record variable does not contain one of the latitude or longitude dimensions as the participating dimensions";
+}
diff --git a/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java
new file mode 100644
index 0000000000..ad8f423701
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/raster/netcdf/NetCdfReader.java
@@ -0,0 +1,376 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.raster.netcdf;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.sedona.common.raster.RasterConstructors;
+import org.geotools.coverage.grid.GridCoverage2D;
+import org.opengis.referencing.FactoryException;
+import org.opengis.referencing.datum.PixelInCell;
+import ucar.ma2.Array;
+import ucar.nc2.*;
+
+import java.io.IOException;
+import java.util.*;
+
+public class NetCdfReader {
+ /*
+ ==================================== Sedona NetCDF Public APIS ====================================
+ */
+
+ /**
+ * Return a string depicting information about record variables in the provided netcdf file along with its dimensions
+ * @param netcdfFile NetCDF file to retrieve info of
+ * @return a string containing information about all record variables present within the file
+ */
+ public static String getRecordInfo(NetcdfFile netcdfFile) {
+ ImmutableList recordVariables = getRecordVariables(netcdfFile);
+ if (Objects.isNull(recordVariables) || recordVariables.isEmpty()) return "";
+ int numRecordVariables = recordVariables.size();
+ StringBuilder recordInfo = new StringBuilder();
+ for (int i = 0; i < numRecordVariables; i++) {
+ Variable variable = recordVariables.get(i);
+ recordInfo.append(variable.getNameAndDimensions(false)); //make strict true to get short name
+ if (i != numRecordVariables - 1) recordInfo.append("\n\n");
+ }
+ return recordInfo.toString();
+ }
+
+ /**
+ * Return a raster containing the record variable data, assuming the last two dimensions of the variable as Y and X respectively
+ * @param netcdfFile NetCDF file to read
+ * @param variableShortName short name of the variable to read
+ * @return GridCoverage2D object
+ * @throws FactoryException
+ * @throws IOException
+ */
+ public static GridCoverage2D getRaster(NetcdfFile netcdfFile, String variableShortName) throws FactoryException, IOException {
+ Variable recordVariable = getRecordVariableFromShortName(netcdfFile, variableShortName);
+ ImmutableList variableDimensions = recordVariable.getDimensions();
+ int numDimensions = variableDimensions.size();
+ if (numDimensions < 2) {
+ throw new IllegalArgumentException(NetCdfConstants.INSUFFICIENT_DIMENSIONS_VARIABLE);
+ }
+ //Assume ... Y, X dimension list in the variable
+ String latDimensionShortName = variableDimensions.get(numDimensions - 2).getShortName();
+ String lonDimensionShortName = variableDimensions.get(numDimensions - 1).getShortName();
+
+ Variable[] coordVariablePair = getCoordVariables(netcdfFile, latDimensionShortName, lonDimensionShortName);
+ Variable latVariable = coordVariablePair[1];
+ Variable lonVariable = coordVariablePair[0];
+ return NetCdfReader.getRasterHelper(netcdfFile, recordVariable, lonVariable, latVariable, false);
+ }
+
+ /**
+ *
+ * @param netcdfFile NetCDF file to read
+ * @param variableShortName short name of the variable to read
+ * @param latDimShortName short name of the variable that serves as the lat dimension of the record variable
+ * @param lonDimShortName short name of the variable that serves as the lon dimension of the record variable
+ * @return GridCoverage2D object
+ * @throws FactoryException
+ * @throws IOException
+ */
+ public static GridCoverage2D getRaster(NetcdfFile netcdfFile, String variableShortName, String latDimShortName, String lonDimShortName) throws FactoryException, IOException {
+ Variable recordVariable = getRecordVariableFromShortName(netcdfFile, variableShortName);
+
+ Variable[] coordVariablePair = getCoordVariables(netcdfFile, latDimShortName, lonDimShortName);
+ Variable latVariable = coordVariablePair[1];
+ Variable lonVariable = coordVariablePair[0];
+ return NetCdfReader.getRasterHelper(netcdfFile, recordVariable, lonVariable, latVariable, true);
+ }
+
+ /**
+ * Return the properties map of the raster which contains metadata information about a netCDF file
+ * @param raster GridCoverage2D raster created from a NetCDF file
+ * @return a map of containing metadata
+ */
+ public static Map getRasterMetadata(GridCoverage2D raster) {
+ return raster.getProperties();
+ }
+
+ /*
+ ==================================== Sedona NetCDF Helper APIS ====================================
+ */
+
+ private static Variable getRecordVariableFromShortName(NetcdfFile netcdfFile, String variableShortName) {
+ Variable recordVariable = findVariableRecursive(variableShortName, netcdfFile.getRootGroup());
+ ensureRecordVar(recordVariable);
+ return recordVariable;
+ }
+
+ private static Variable[] getCoordVariables(NetcdfFile netcdfFile, String latVariableShortName, String lonVariableShortName) {
+ Group rootGroup = netcdfFile.getRootGroup();
+
+ //TODO: optimize to get all variables in 1 tree traversal?
+ Variable lonVariable = findVariableRecursive(lonVariableShortName, rootGroup);
+ Variable latVariable = findVariableRecursive(latVariableShortName, rootGroup);
+ ensureCoordVar(lonVariable, latVariable);
+ return new Variable[]{lonVariable, latVariable};
+ }
+
+ private static ImmutableList getRecordVariables(NetcdfFile netcdfFile) {
+ ImmutableList variables = netcdfFile.getVariables();
+ List recordVariables = null;
+ for (Variable variable: variables) {
+ ImmutableList variableDimensions = variable.getDimensions();
+ if (variableDimensions.size() < 2) continue; //record variables have least 2 dimensions (lat and lon)
+ if (Objects.isNull(recordVariables)) recordVariables = new ArrayList<>();
+ //ASSUMPTION: A record is not restricted to having an unlimited dimension
+ recordVariables.add(variable);
+ }
+ if (Objects.isNull(recordVariables)) return null;
+ return ImmutableList.copyOf(recordVariables);
+ }
+ private static GridCoverage2D getRasterHelper(NetcdfFile netcdfFile, Variable recordVariable, Variable lonVariable, Variable latVariable, boolean findCoordIndices) throws IOException, FactoryException {
+ //assert(netcdfFile.getFileTypeVersion().equalsIgnoreCase("1")); //CDF - 1
+ ImmutableList globalAttributes = netcdfFile.getGlobalAttributes();
+ Group rootGroup = netcdfFile.getRootGroup();
+
+ Array lonData, latData;
+ double minX = 0, maxY = 0, translateX, translateY, skewX = 0, skewY = 0;
+ int width, height;
+ boolean isIncreasing = false;
+ HashMap> varMetadata = new HashMap<>();
+
+
+ //========================= Set Raster geo-reference (width, height, scaleX, scaleY =========================
+ lonData = lonVariable.read();
+ width = lonData.getShape()[0];
+ double firstVal, lastVal;
+ firstVal = getElement(lonData, 0);
+ lastVal = getElement(lonData, width - 1);
+ isIncreasing = firstVal < lastVal;
+ minX = isIncreasing ? firstVal : lastVal;
+ translateX = Math.abs(lastVal - firstVal) / (width - 1);
+
+ latData = latVariable.read();
+ height = latData.getShape()[0];
+ firstVal = getElement(latData, 0);
+ lastVal = getElement(latData, height - 1);
+ isIncreasing = firstVal < lastVal;
+ maxY = isIncreasing ? lastVal : firstVal;
+ translateY = Math.abs(lastVal - firstVal) / (height - 1);
+ //======================================================================================================
+
+ String lonVariableName = lonVariable.getShortName();
+ String latVariableName = latVariable.getShortName();
+
+ List recordDimensions = recordVariable.getDimensions();
+ int numRecordDimensions = recordDimensions.size();
+
+ //==================== Re-check existence of dimensions if necessary ====================
+ int lonIndex = numRecordDimensions - 1;
+ int latIndex = numRecordDimensions - 2;
+ if (findCoordIndices) {
+ lonIndex = recordVariable.findDimensionIndex(lonVariableName);
+ latIndex = recordVariable.findDimensionIndex(latVariableName);
+ if (latIndex == -1 || lonIndex == -1) {
+ throw new IllegalArgumentException(NetCdfConstants.COORD_IDX_NOT_FOUND);
+ }
+ }
+
+ //read recordVariable data
+ Array recordData = recordVariable.read();
+
+ // Set noDataValue if available, null otherwise
+ Double noDataValue = null;
+ if (recordVariable.attributes().hasAttribute(NetCdfConstants.MISSING_VALUE)) {
+ Attribute noDataAttribute = recordVariable.findAttribute(NetCdfConstants.MISSING_VALUE);
+ if (!Objects.isNull(noDataAttribute))
+ noDataValue = getAttrDoubleValue(noDataAttribute);
+ }
+ int[] strides = recordData.getIndex().getShape();
+ int[] pointers = recordData.getIndex().getCurrentCounter();
+ int numBands = 1;
+ int numValuesPerBand = 1;
+ for (int i = 0; i < numRecordDimensions; i++) {
+ Dimension recordDimension = recordDimensions.get(i);
+ String dimensionShortName = recordDimension.getShortName();
+ //TODO: Maybe replace this string comparison with index comparison since we already have lon and lat dimension indices stored?
+ if (dimensionShortName.equalsIgnoreCase(latVariableName) || dimensionShortName.equalsIgnoreCase(lonVariableName)) {
+ numValuesPerBand *= strides[i];
+ }else {
+ numBands *= strides[i];
+ }
+ }
+ Array[] dimensionVars = new Array[numRecordDimensions];
+ double scaleFactor = 1, offset = 0;
+ for (int i = 0; i < numRecordDimensions; i++) {
+ if (i == lonIndex) dimensionVars[i] = lonData;
+ else if (i == latIndex) dimensionVars[i] = latData;
+ else {
+ Dimension recordDimension = recordDimensions.get(i);
+ String dimensionShortName = recordDimension.getShortName();
+ //TODO: This leads to tree traversal for dimensions, can we do this in one go?
+ Variable recordDimVar = findVariableRecursive(dimensionShortName, rootGroup);//rootGroup.findVariableLocal(dimensionShortName);
+ if (Objects.isNull(recordDimVar)) throw new IllegalArgumentException(String.format(NetCdfConstants.COORD_VARIABLE_NOT_FOUND, dimensionShortName));
+ AttributeContainer recordDimAttrs = recordDimVar.attributes();
+ varMetadata.computeIfAbsent(dimensionShortName, k -> new ArrayList<>());
+ for (Attribute attr: recordDimAttrs) {
+ varMetadata.get(dimensionShortName).add(attr.getStringValue());
+ if (attr.getShortName().equalsIgnoreCase(NetCdfConstants.SCALE_FACTOR)) {
+ Array values = attr.getValues();
+ if (!Objects.isNull(values))
+ scaleFactor = getElement(attr.getValues(), 0);
+ }
+ if (attr.getShortName().equalsIgnoreCase(NetCdfConstants.ADD_OFFSET)) {
+ Array values = attr.getValues();
+ if (!Objects.isNull(values))
+ offset = getElement(attr.getValues(), 0);
+ }
+ }
+ dimensionVars[i] = recordDimVar.read();
+
+ }
+ }
+ double[][] allBandValues = new double[numBands][numValuesPerBand];
+ //check for offset in the record variable
+ List> bandMetaData = new ArrayList<>();
+ int currBandNum = 0;
+ while (currBandNum < numBands) {
+ if (dimensionVars.length > 2) {
+ //start from the bottom 3rd dimension going up to form metadata
+ List currBandMetadata = new ArrayList<>();
+ for (int metadataDim = dimensionVars.length - 3; metadataDim >= 0; metadataDim--) {
+ double data = getElement(dimensionVars[metadataDim], pointers[metadataDim]);
+ String metadata = recordDimensions.get(metadataDim).getShortName() + " : " + data;
+ currBandMetadata.add(metadata);
+ }
+ bandMetaData.add(currBandMetadata);
+ }
+ //int dataIndex = currBandNum;
+ for (int j = height - 1; j >= 0; j--) {
+ for (int i = 0; i < width; i++) {
+ int index = (j * width + i);
+ int dataIndex = currBandNum * (width * height) + ((height - 1 - j) * width + i);
+ double currRecord = scaleFactor * getElement(recordData, dataIndex) + offset;
+ allBandValues[currBandNum][index] = currRecord;
+ }
+ }
+ boolean addCarry = true;
+ for (int currDim = dimensionVars.length - 3; currDim >= 0; currDim--) {
+ int maxIndex = strides[currDim];
+ if (addCarry) {
+ pointers[currDim]++;
+ addCarry = false;
+ }
+ if (pointers[currDim] == maxIndex) {
+ pointers[currDim] = 0;
+ addCarry = true;
+ }
+ }
+ currBandNum++;
+ }
+ Map> rasterProperties = getRasterProperties(globalAttributes, varMetadata, bandMetaData);
+ return RasterConstructors.makeNonEmptyRaster(numBands, width, height, minX, maxY, translateX, -translateY, skewX, skewY, 0, allBandValues, rasterProperties, noDataValue, PixelInCell.CELL_CENTER);
+ }
+
+ private static Variable findVariableRecursive(String shortName, Group currGroup) {
+ if (!Objects.isNull(currGroup.findVariableLocal(shortName))) {
+ return currGroup.findVariableLocal(shortName);
+ }else {
+ for (Group group: currGroup.getGroups()) {
+ Variable ret = findVariableRecursive(shortName, group);
+ if (!Objects.isNull(ret)) return ret;
+ }
+ return null;
+ }
+ }
+
+ private static Map> getRasterProperties(ImmutableList globalAttrs, Map> varAttrs, List> bandAttrs) {
+ Map> res = new HashMap<>(varAttrs);
+ if (!Objects.isNull(globalAttrs) && !globalAttrs.isEmpty()) {
+ List globalAttrString = new ArrayList<>();
+ for (Attribute globalAttr: globalAttrs) {
+ globalAttrString.add(globalAttr.getStringValue());
+ }
+ res.put(NetCdfConstants.GLOBAL_ATTR_PREFIX, globalAttrString);
+ }
+ for(int band = 0; band < bandAttrs.size(); band++) {
+ int currBandNum = band + 1;
+ res.put(NetCdfConstants.BAND_ATTR_PREFIX + currBandNum, bandAttrs.get(band));
+ }
+ return res;
+ }
+
+ private static Double getElement(Array array, int index) {
+ double res;
+ switch (array.getDataType()) {
+ case INT:
+ res = array.getInt(index);
+ break;
+ case LONG:
+ res = array.getLong(index);
+ break;
+ case FLOAT:
+ res = array.getFloat(index);
+ break;
+ case DOUBLE:
+ res = array.getDouble(index);
+ break;
+ case BYTE:
+ res = array.getByte(index);
+ break;
+ case SHORT:
+ case USHORT:
+ res = array.getShort(index);
+ break;
+ default:
+ throw new IllegalArgumentException("Error casting dimension data to double");
+ }
+ return res;
+ }
+
+ private static Double getAttrDoubleValue(Attribute attr) {
+ Number numericValue = attr.getNumericValue();
+ if (!Objects.isNull(numericValue)) {
+ return numericValue.doubleValue();
+ }else {
+ throw new IllegalArgumentException(NetCdfConstants.NON_NUMERIC_VALUE);
+ }
+ }
+
+
+ private static void ensureCoordVar(Variable lonVar, Variable latVar) throws NullPointerException, IllegalArgumentException {
+ if (latVar == null) {
+ throw new NullPointerException(NetCdfConstants.INVALID_LAT_NAME);
+ }
+
+ if (lonVar == null) {
+ throw new NullPointerException(NetCdfConstants.INVALID_LON_NAME);
+ }
+
+ if (latVar.getShape().length > 1) {
+ throw new IllegalArgumentException(NetCdfConstants.INVALID_LAT_SHAPE);
+ }
+
+ if (lonVar.getShape().length > 1) {
+ throw new IllegalArgumentException(NetCdfConstants.INVALID_LON_SHAPE);
+ }
+ }
+
+ private static void ensureRecordVar(Variable recordVar) throws NullPointerException {
+ if (recordVar == null) {
+ throw new IllegalArgumentException(NetCdfConstants.INVALID_RECORD_NAME);
+ }
+ }
+
+
+}
diff --git a/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java b/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java
index d5d4f1dbf5..db60411f94 100644
--- a/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java
+++ b/common/src/main/java/org/apache/sedona/common/utils/RasterUtils.java
@@ -195,6 +195,10 @@ public static GridCoverage2D clone(RenderedImage image, GridGeometry2D gridGeome
* @return A new GridCoverage2D object.
*/
public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue) {
+ return create(raster, gridGeometry, bands, noDataValue, null);
+ }
+
+ public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue, Map properties) {
int numBand = raster.getNumBands();
int rasterDataType = raster.getDataBuffer().getDataType();
@@ -220,7 +224,7 @@ public static GridCoverage2D create(WritableRaster raster, GridGeometry2D gridGe
}
final RenderedImage image = new BufferedImage(colorModel, raster, false, null);
- return gridCoverageFactory.create("genericCoverage", image, gridGeometry, bands, null, null);
+ return gridCoverageFactory.create("genericCoverage", image, gridGeometry, bands, null, properties);
}
public static GridCoverage2D create(RenderedImage image, GridGeometry2D gridGeometry, GridSampleDimension[] bands, Double noDataValue) {
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java
index 5dac999c9f..d8f52fca8c 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterAccessorsTest.java
@@ -18,10 +18,12 @@
*/
package org.apache.sedona.common.raster;
+import org.apache.sedona.common.utils.RasterUtils;
import org.geotools.coverage.grid.GridCoordinates2D;
import org.geotools.coverage.grid.GridCoverage2D;
import org.geotools.geometry.jts.JTS;
import org.geotools.referencing.CRS;
+import org.geotools.referencing.operation.transform.AffineTransform2D;
import org.junit.Test;
import org.locationtech.jts.geom.*;
import org.opengis.referencing.FactoryException;
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
index 2487fa75a8..d49c7b3970 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
@@ -20,12 +20,14 @@
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.ParseException;
import org.opengis.referencing.FactoryException;
+import org.opengis.referencing.operation.TransformException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
public class RasterConstructorsTest
extends RasterTestBase {
@@ -236,4 +238,44 @@ public void makeEmptyRaster() throws FactoryException {
assertEquals("SIGNED_32BITS", gridCoverage2D.getSampleDimension(0).getSampleDimensionType().name());
}
+ @Test
+ public void testNetCdfClassic() throws FactoryException, IOException, TransformException {
+ GridCoverage2D testRaster = RasterConstructors.fromNetCDF(testNc, "O3");
+ double[] expectedMetadata = {4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4};
+ double[] actualMetadata = RasterAccessors.metadata(testRaster);
+ for (int i = 0; i < expectedMetadata.length; i++) {
+ assertEquals(expectedMetadata[i], actualMetadata[i], 1e-5);
+ }
+
+ double actualFirstGridVal = PixelFunctions.value(testRaster, 0, 0, 1);
+ double expectedFirstGridVal = 60.95357131958008;
+ assertEquals(expectedFirstGridVal, actualFirstGridVal, 1e-6);
+ }
+
+ @Test
+ public void testNetCdfClassicLongForm() throws FactoryException, IOException, TransformException {
+ GridCoverage2D testRaster = RasterConstructors.fromNetCDF(testNc, "O3", "lon", "lat");
+ double[] expectedMetadata = {4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4};
+ double[] actualMetadata = RasterAccessors.metadata(testRaster);
+ for (int i = 0; i < expectedMetadata.length; i++) {
+ assertEquals(expectedMetadata[i], actualMetadata[i], 1e-5);
+ }
+
+ double actualFirstGridVal = PixelFunctions.value(testRaster, 0, 0, 1);
+ double expectedFirstGridVal = 60.95357131958008;
+ assertEquals(expectedFirstGridVal, actualFirstGridVal, 1e-6);
+ }
+
+
+ @Test
+ public void testRecordInfo() throws IOException {
+ String actualRecordInfo = RasterConstructors.getRecordInfo(testNc);
+ String expectedRecordInfo = "O3(time=2, z=2, lat=48, lon=80)\n" +
+ "\n" +
+ "NO2(time=2, z=2, lat=48, lon=80)";
+ assertEquals(expectedRecordInfo, actualRecordInfo);
+ }
+
+
+
}
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
index 88eebd58c6..e1f1a9ee95 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
@@ -13,7 +13,7 @@
*/
package org.apache.sedona.common.raster;
-import org.geotools.coverage.grid.GridCoordinates2D;
+ import org.geotools.coverage.grid.GridCoordinates2D;
import org.geotools.coverage.grid.GridCoverage2D;
import org.geotools.coverage.grid.GridCoverageFactory;
import org.geotools.gce.geotiff.GeoTiffReader;
@@ -33,14 +33,14 @@
import org.opengis.referencing.operation.TransformException;
import javax.media.jai.RasterFactory;
-import java.awt.Color;
+import java.awt.*;
import java.awt.image.BufferedImage;
import java.awt.image.WritableRaster;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
-import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
public class RasterTestBase {
String arc = "NCOLS 2\nNROWS 2\nXLLCORNER 378922\nYLLCORNER 4072345\nCELLSIZE 30\nNODATA_VALUE 0\n0 1 2 3\n";
@@ -52,6 +52,9 @@ public class RasterTestBase {
GridCoverage2D oneBandRaster;
GridCoverage2D multiBandRaster;
byte[] geoTiff;
+ byte[] testNc;
+ String ncFile = resourceFolder + "raster/netcdf/test.nc";
+
@Before
public void setup() throws IOException {
@@ -60,6 +63,8 @@ public void setup() throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
new GeoTiffWriter(bos).write(multiBandRaster, new GeneralParameterValue[]{});
geoTiff = bos.toByteArray();
+ File file = new File(ncFile);
+ testNc = Files.readAllBytes(file.toPath());
}
GridCoverage2D createEmptyRaster(int numBands)
diff --git a/core/src/test/resources/raster/netcdf/test.nc b/core/src/test/resources/raster/netcdf/test.nc
new file mode 100644
index 0000000000..2f7b67d87f
Binary files /dev/null and b/core/src/test/resources/raster/netcdf/test.nc differ
diff --git a/docs/api/sql/Raster-loader.md b/docs/api/sql/Raster-loader.md
index 595348c39e..0f9b8ff4aa 100644
--- a/docs/api/sql/Raster-loader.md
+++ b/docs/api/sql/Raster-loader.md
@@ -169,3 +169,63 @@ Output:
| GridCoverage2D["g...|
+------------------------------------------------------------------+
```
+
+### RS_FromNetCDF
+
+
+Introduction: Returns a raster geometry representing the given record variable short name from a NetCDF file.
+This API reads the array data of the record variable *in memory* along with all its dimensions
+Since the netCDF format has many variants, the reader might not work for your test case, if that is so, please report this using the public forums.
+
+This API has been tested for netCDF classic (NetCDF 1, 2, 5) and netCDF4/HDF5 files.
+
+This API requires the name of the record variable. It is assumed that a variable of the given name exists, and its last 2 dimensions are 'lat' and 'lon' dimensions *respectively*.
+
+If this assumption does not hold true for your case, you can choose to pass the lonDimensionName and latDimensionName explicitly.
+
+You can use [RS_NetCDFInfo](./#rs_netcdfinfo) to get the details of the passed netCDF file (variables and its dimensions).
+
+Format 1: `RS_FromNetCDF(netCDF: ARRAY[Byte], recordVariableName: String)`
+
+Format 2: `RS_FromNetCDF(netCDF: ARRAY[Byte], recordVariableName: String, lonDimensionName: String, latDimensionName: String)`
+
+Since: `v1.5.1`
+
+Spark Example:
+
+```scala
+val df = sedona.read.format("binaryFile").load("/some/path/test.nc")
+df = df.withColumn("raster", f.expr("RS_FromNetCDF(content, 'O3')"))
+```
+
+
+```scala
+val df = sedona.read.format("binaryFile").load("/some/path/test.nc")
+df = df.withColumn("raster", f.expr("RS_FromNetCDF(content, 'O3', 'lon', 'lat')"))
+```
+
+
+### RS_NetCDFInfo
+
+Introduction: Returns a string containing names of the variables in a given netCDF file along with its dimensions.
+
+Format: `RS_NetCDFInfo(netCDF: ARRAY[Byte])`
+
+Since: `1.5.1`
+
+Spark Example:
+
+```scala
+val df = sedona.read.format("binaryFile").load("/some/path/test.nc")
+recordInfo = df.selectExpr("RS_NetCDFInfo(content) as record_info").first().getString(0)
+print(recordInfo)
+```
+
+
+Output:
+
+```text
+O3(time=2, z=2, lat=48, lon=80)
+
+NO2(time=2, z=2, lat=48, lon=80)
+```
diff --git a/pom.xml b/pom.xml
index 72f4c9eb72..b17004f4d3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,10 @@
org.apache.logging.log4j
log4j-core
+
+ edu.ucar
+ cdm-core
+
org.apache.logging.log4j
log4j-slf4j-impl
diff --git a/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java b/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java
index e5118ca73c..0c23fb39e1 100644
--- a/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java
+++ b/spark/common/src/main/java/org/apache/sedona/core/formatMapper/netcdfParser/SerNetCDFUtils.java
@@ -32,7 +32,6 @@
package org.apache.sedona.core.formatMapper.netcdfParser;
import ucar.ma2.Array;
-import ucar.ma2.DataType;
import ucar.ma2.Index;
import ucar.nc2.Dimension;
import ucar.nc2.NetcdfFile;
@@ -201,4 +200,5 @@ public static Integer getDimensionSize(NetcdfDataset netcdfFile, String rowDim)
if (dimSize < 0) {throw new IllegalStateException("Dimension does not exist!!!");}
return dimSize;
}
+
}
diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index b49706d700..0b2a069300 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -256,7 +256,9 @@ object Catalog {
function[RS_AsImage](),
function[RS_ZonalStats](),
function[RS_ZonalStatsAll](),
- function[RS_Resample]()
+ function[RS_Resample](),
+ function[RS_FromNetCDF](),
+ function[RS_NetCDFInfo]()
)
val aggregateExpressions: Seq[Aggregator[Geometry, Geometry, Geometry]] = Seq(
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala
index f7880aafe2..29c8ab1116 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterConstructors.scala
@@ -62,3 +62,22 @@ case class RS_MakeEmptyRaster(inputExpressions: Seq[Expression])
copy(inputExpressions = newChildren)
}
}
+
+
+case class RS_FromNetCDF(inputExpressions: Seq[Expression])
+ extends InferredExpression(
+ inferrableFunction2(RasterConstructors.fromNetCDF), inferrableFunction4(RasterConstructors.fromNetCDF)) {
+
+ override def foldable: Boolean = false
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+ copy(inputExpressions = newChildren)
+ }
+}
+
+case class RS_NetCDFInfo(inputExpressions: Seq[Expression])
+ extends InferredExpression(RasterConstructors.getRecordInfo _) {
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+ copy(inputExpressions = newChildren)
+ }
+}
diff --git a/spark/common/src/test/resources/raster/netcdf/test.nc b/spark/common/src/test/resources/raster/netcdf/test.nc
new file mode 100644
index 0000000000..2f7b67d87f
Binary files /dev/null and b/spark/common/src/test/resources/raster/netcdf/test.nc differ
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
index 61ebc13163..5776949024 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
@@ -1429,5 +1429,44 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen
assertEquals(expectedMetadata(i), rasterMetadata(i), 1e-6)
}
}
+
+ it("Passed RS_FromNetCDF with NetCDF classic") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc")
+ val rasterDf = df.selectExpr("RS_FromNetCDF(content, 'O3') as raster")
+ val expectedMetadata = Seq(4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4)
+ val actualMetadata = rasterDf.selectExpr("RS_Metadata(raster) as metadata").first().getSeq[Double](0)
+
+ for (i <- expectedMetadata.indices) {
+ assertEquals(expectedMetadata(i), actualMetadata(i), 1e-6)
+ }
+
+ val expectedFirstVal = 60.95357131958008
+ val actualFirstVal = rasterDf.selectExpr("RS_Value(raster, 0, 0, 1) as raster").first().getDouble(0)
+ assertEquals(expectedFirstVal, actualFirstVal, 1e-6)
+ }
+
+ it("Passed RS_FromNetCDF with NetCDF classic long form") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc")
+ val rasterDf = df.selectExpr("RS_FromNetCDF(content, 'O3', 'lon', 'lat') as raster")
+ val expectedMetadata = Seq(4.9375, 50.9375, 80, 48, 0.125, -0.125, 0, 0, 0, 4)
+ val actualMetadata = rasterDf.selectExpr("RS_Metadata(raster) as metadata").first().getSeq[Double](0)
+
+ for (i <- expectedMetadata.indices) {
+ assertEquals(expectedMetadata(i), actualMetadata(i), 1e-6)
+ }
+
+ val expectedFirstVal = 60.95357131958008
+ val actualFirstVal = rasterDf.selectExpr("RS_Value(raster, 0, 0, 1) as raster").first().getDouble(0)
+ assertEquals(expectedFirstVal, actualFirstVal, 1e-6)
+ }
+
+ it("Passed RS_NetCDFInfo") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/netcdf/test.nc")
+ val recordInfo = df.selectExpr("RS_NetCDFInfo(content) as record_info").first().getString(0)
+ val expectedRecordInfo = "O3(time=2, z=2, lat=48, lon=80)\n" +
+ "\n" +
+ "NO2(time=2, z=2, lat=48, lon=80)"
+ assertEquals(expectedRecordInfo, recordInfo)
+ }
}
}