Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improving column name validation to only run once per source [#849](https://github.com/ie3-institute/PowerSystemDataModel/issues/849)
- Refactored and abstracted `EntitySource`s and `EntityData` creation [#969](https://github.com/ie3-institute/PowerSystemDataModel/issues/969)
- Updated contributing.md [#737](https://github.com/ie3-institute/PowerSystemDataModel/issues/737)
- `CsvDataSource` throws exceptions on error [#954](https://github.com/ie3-institute/PowerSystemDataModel/issues/954)

## [4.1.0] - 2023-11-02

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,28 +159,7 @@ public synchronized <C extends UniqueEntity> void closeEntityWriter(Class<C> clz
}

/**
* Initializes a file reader for the given class that should be read in. The expected file name is
* determined based on {@link FileNamingStrategy} of the this {@link CsvFileConnector} instance
*
* @param clz the class of the entity that should be read
* @return the reader that contains information about the file to be read in
* @throws FileNotFoundException If the matching file cannot be found
*/
public BufferedReader initReader(Class<? extends UniqueEntity> clz)
throws FileNotFoundException, ConnectorException {
Path filePath =
fileNamingStrategy
.getFilePath(clz)
.orElseThrow(
() ->
new ConnectorException(
"Cannot find a naming strategy for class '" + clz.getSimpleName() + "'."));
return initReader(filePath);
}

/**
* Initializes a file reader for the given file name. Use {@link
* CsvFileConnector#initReader(Class)} for files that actually correspond to concrete entities.
* Initializes a file reader for the given file name.
*
* @param filePath path of file starting from base folder, including file name but not file
* extension
Expand Down Expand Up @@ -247,18 +226,6 @@ private Set<Path> getIndividualTimeSeriesFilePaths() {
}
}

/**
* Initialises a reader to get grip on the file that contains mapping information between
* coordinate id and actual coordinate
*
* @return A {@link BufferedReader}
* @throws FileNotFoundException If the file is not present
*/
public BufferedReader initIdCoordinateReader() throws FileNotFoundException {
Path filePath = Path.of(fileNamingStrategy.getIdCoordinateEntityName());
return initReader(filePath);
}

/**
* Builds a new file definition consisting of file name and head line elements
*
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/edu/ie3/datamodel/io/source/DataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ Optional<Set<String>> getSourceFields(Class<? extends UniqueEntity> entityClass)
throws SourceException;

/** Creates a stream of maps that represent the rows in the database */
Stream<Map<String, String>> getSourceData(Class<? extends UniqueEntity> entityClass);
Stream<Map<String, String>> getSourceData(Class<? extends UniqueEntity> entityClass)
throws SourceException;
}
11 changes: 8 additions & 3 deletions src/main/java/edu/ie3/datamodel/io/source/EntitySource.java
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,14 @@ protected static Stream<Try<AssetInputEntityData, SourceException>> assetInputEn
*/
protected Stream<Try<EntityData, SourceException>> buildEntityData(
Class<? extends UniqueEntity> entityClass) {
return dataSource
.getSourceData(entityClass)
.map(fieldsToAttributes -> new Success<>(new EntityData(fieldsToAttributes, entityClass)));

return Try.of(() -> dataSource.getSourceData(entityClass), SourceException.class)
.convert(
data ->
data.map(
fieldsToAttributes ->
new Success<>(new EntityData(fieldsToAttributes, entityClass))),
exception -> Stream.of(Failure.of(exception)));
}

protected static <S extends UniqueEntity> Map<UUID, S> unpackMap(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ protected TimeSeriesMappingSource() {
*
* @return That mapping
*/
public Map<UUID, UUID> getMapping() {
public Map<UUID, UUID> getMapping() throws SourceException {
return getMappingSourceData()
.map(this::createMappingEntry)
.filter(Try::isSuccess)
Expand All @@ -48,7 +48,7 @@ public Map<UUID, UUID> getMapping() {
* @param modelIdentifier Identifier of the model
* @return An {@link Optional} to the time series identifier
*/
public Optional<UUID> getTimeSeriesUuid(UUID modelIdentifier) {
public Optional<UUID> getTimeSeriesUuid(UUID modelIdentifier) throws SourceException {
return Optional.ofNullable(getMapping().get(modelIdentifier));
}

Expand All @@ -57,7 +57,7 @@ public Optional<UUID> getTimeSeriesUuid(UUID modelIdentifier) {
*
* @return Stream of maps
*/
public abstract Stream<Map<String, String>> getMappingSourceData();
public abstract Stream<Map<String, String>> getMappingSourceData() throws SourceException;

/** Returns the option for fields found in the source */
public abstract Optional<Set<String>> getSourceFields() throws SourceException;
Expand Down
128 changes: 73 additions & 55 deletions src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
*/
package edu.ie3.datamodel.io.source.csv;

import edu.ie3.datamodel.exceptions.ConnectorException;
import edu.ie3.datamodel.exceptions.SourceException;
import edu.ie3.datamodel.io.connectors.CsvFileConnector;
import edu.ie3.datamodel.io.naming.FileNamingStrategy;
import edu.ie3.datamodel.io.source.DataSource;
import edu.ie3.datamodel.models.UniqueEntity;
import edu.ie3.datamodel.utils.Try;
import edu.ie3.datamodel.utils.Try.Failure;
import edu.ie3.datamodel.utils.Try.Success;
import edu.ie3.datamodel.utils.validation.ValidationUtils;
import edu.ie3.util.StringUtils;
import java.io.BufferedReader;
Expand Down Expand Up @@ -46,6 +48,8 @@ public class CsvDataSource implements DataSource {
protected final String csvSep;
protected final CsvFileConnector connector;

private final FileNamingStrategy fileNamingStrategy;

/**
* @deprecated ensures downward compatibility with old csv data format. Can be removed when
* support for old csv format is removed. *
Expand All @@ -56,41 +60,38 @@ public class CsvDataSource implements DataSource {
public CsvDataSource(String csvSep, Path folderPath, FileNamingStrategy fileNamingStrategy) {
this.csvSep = csvSep;
this.connector = new CsvFileConnector(folderPath, fileNamingStrategy);
this.fileNamingStrategy = fileNamingStrategy;
}

@Override
public Optional<Set<String>> getSourceFields(Class<? extends UniqueEntity> entityClass)
throws SourceException {
return getSourceFields(() -> connector.initReader(entityClass));
return getSourceFields(getFilePath(entityClass).getOrThrow());
}

public Optional<Set<String>> getSourceFields(ReaderSupplier readerSupplier)
throws SourceException {
try (BufferedReader reader = readerSupplier.get()) {
/**
* @param filePath path of file starting from base folder, including file name but not file
* extension
* @return The source field names as a set, if file exists
* @throws SourceException on error while reading the source file
*/
public Optional<Set<String>> getSourceFields(Path filePath) throws SourceException {
try (BufferedReader reader = connector.initReader(filePath)) {
return Optional.of(
Arrays.stream(parseCsvRow(reader.readLine(), csvSep)).collect(Collectors.toSet()));
} catch (FileNotFoundException e) {
// A file not existing can be acceptable in many cases, and is handled elsewhere.
log.debug("The source for the given entity couldn't be found! Cause: {}", e.getMessage());
return Optional.empty();
} catch (ConnectorException | IOException e) {
} catch (IOException e) {
throw new SourceException("Error while trying to read source", e);
}
}

public interface ReaderSupplier {
BufferedReader get() throws FileNotFoundException, ConnectorException;
}

@Override
public Stream<Map<String, String>> getSourceData(Class<? extends UniqueEntity> entityClass) {
return buildStreamWithFieldsToAttributesMap(entityClass, connector);
}

// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

public BufferedReader createReader(Path filePath) throws FileNotFoundException {
return connector.initReader(filePath);
public Stream<Map<String, String>> getSourceData(Class<? extends UniqueEntity> entityClass)
throws SourceException {
return buildStreamWithFieldsToAttributesMap(entityClass, true).getOrThrow();
}

// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Expand Down Expand Up @@ -279,25 +280,24 @@ protected <T extends UniqueEntity> Predicate<Optional<T>> isPresentCollectIfNot(
};
}

public FileNamingStrategy getNamingStrategy() {
return fileNamingStrategy;
}

/**
* Tries to open a file reader from the connector based on the provided entity class and hands it
* over for further processing.
* Tries to open a file reader based on the provided entity class and hands it over for further
* processing.
*
* @param entityClass the entity class that should be build and that is used to get the
* corresponding reader
* @param connector the connector that should be used to get the reader from
* @return a parallel stream of maps, where each map represents one row of the csv file with the
* mapping (fieldName to fieldValue)
*/
protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
Class<? extends UniqueEntity> entityClass, CsvFileConnector connector) {
try {
return buildStreamWithFieldsToAttributesMap(entityClass, connector.initReader(entityClass));
} catch (FileNotFoundException | ConnectorException e) {
log.warn(
"Unable to find file for entity '{}': {}", entityClass.getSimpleName(), e.getMessage());
}
return Stream.empty();
protected Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsToAttributesMap(
Class<? extends UniqueEntity> entityClass, boolean allowFileNotExisting) {
return getFilePath(entityClass)
.flatMap(
path -> buildStreamWithFieldsToAttributesMap(entityClass, path, allowFileNotExisting));
}

/**
Expand All @@ -306,13 +306,13 @@ protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
* the returning stream is a parallel stream, the order of the elements cannot be guaranteed.
*
* @param entityClass the entity class that should be build
* @param bufferedReader the reader to use
* @return a parallel stream of maps, where each map represents one row of the csv file with the
* mapping (fieldName to fieldValue)
* @param filePath the path of the file to read
* @return a try containing either a parallel stream of maps, where each map represents one row of
* the csv file with the mapping (fieldName to fieldValue) or an exception
*/
protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
Class<? extends UniqueEntity> entityClass, BufferedReader bufferedReader) {
try (BufferedReader reader = bufferedReader) {
protected Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsToAttributesMap(
Class<? extends UniqueEntity> entityClass, Path filePath, boolean allowFileNotExisting) {
try (BufferedReader reader = connector.initReader(filePath)) {
final String[] headline = parseCsvRow(reader.readLine(), csvSep);

// by default try-with-resources closes the reader directly when we leave this method (which
Expand All @@ -322,14 +322,31 @@ protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
Collection<Map<String, String>> allRows = csvRowFieldValueMapping(reader, headline);

return distinctRowsWithLog(
allRows, fieldToValues -> fieldToValues.get("uuid"), entityClass.getSimpleName(), "UUID")
.parallelStream();
allRows,
fieldToValues -> fieldToValues.get("uuid"),
entityClass.getSimpleName(),
"UUID")
.map(Set::parallelStream);
} catch (FileNotFoundException e) {
if (allowFileNotExisting) {
log.warn("Unable to find file '{}': {}", filePath, e.getMessage());
return Success.of(Stream.empty());
} else {
return Failure.of(new SourceException("Unable to find file '" + filePath + "'.", e));
}
} catch (IOException e) {
log.warn(
"Cannot read file to build entity '{}': {}", entityClass.getSimpleName(), e.getMessage());
return Failure.of(
new SourceException(
"Cannot read file to build entity '" + entityClass.getSimpleName() + "'", e));
}
}

return Stream.empty();
private Try<Path, SourceException> getFilePath(Class<? extends UniqueEntity> entityClass) {
return Try.from(
fileNamingStrategy.getFilePath(entityClass),
() ->
new SourceException(
"Cannot find a naming strategy for class '" + entityClass.getSimpleName() + "'."));
}

protected List<Map<String, String>> csvRowFieldValueMapping(
Expand Down Expand Up @@ -358,10 +375,10 @@ protected List<Map<String, String>> csvRowFieldValueMapping(
* debug String)
* @param keyDescriptor Colloquial descriptor of the key, that is meant to be unique (for debug
* String)
* @return either a set containing only unique rows or an empty set if at least two rows with the
* same UUID but different field values exist
* @return a try of either a set containing only unique rows or an exception if at least two rows
* with the same UUID but different field values exist
*/
protected Set<Map<String, String>> distinctRowsWithLog(
protected Try<Set<Map<String, String>>, SourceException> distinctRowsWithLog(
Collection<Map<String, String>> allRows,
final Function<Map<String, String>, String> keyExtractor,
String entityDescriptor,
Expand All @@ -385,18 +402,19 @@ protected Set<Map<String, String>> distinctRowsWithLog(
allRowsSet.removeAll(distinctIdSet);
String affectedCoordinateIds =
allRowsSet.stream().map(keyExtractor).collect(Collectors.joining(",\n"));
log.error(
"""
'{}' entities with duplicated {} key, but different field values found! Please review the corresponding input file!
Affected primary keys:
{}""",
entityDescriptor,
keyDescriptor,
affectedCoordinateIds);
// if this happens, we return an empty set to prevent further processing
return new HashSet<>();

// if this happens, we return a failure
return Failure.of(
new SourceException(
"'"
+ entityDescriptor
+ "' entities with duplicated "
+ keyDescriptor
+ " key, but different field "
+ "values found! Please review the corresponding input file! Affected primary keys: "
+ affectedCoordinateIds));
}

return allRowsSet;
return Success.of(allRowsSet);
}
}
Loading