Skip to content
This repository has been archived by the owner on Apr 14, 2023. It is now read-only.

Commit

Permalink
fix(#1710): Create file specific instance of distributed list
Browse files Browse the repository at this point in the history
... to try and detangle where an common distributed list is actually needed.
  • Loading branch information
matthewdunsdon committed Dec 21, 2020
1 parent 497eb5a commit a6ccb29
Show file tree
Hide file tree
Showing 16 changed files with 92 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Feature: User can specify that a field value belongs to a set of predetermined o
| "Wales" | "Cardiff" | 2 | "two" |


Scenario: Running an 'inMap' with text a restriction
Scenario: Running an 'inMap' with invalid typed data
Given the following non nullable fields exist:
| HomeNation |
| Population |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
*/
package com.scottlogic.datahelix.generator.orchestrator.cucumber.testframework.utils;

import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;
import com.scottlogic.datahelix.generator.profile.reader.CsvInputStreamReaderFactory;
import com.scottlogic.datahelix.generator.profile.reader.FileReader;

import javax.inject.Inject;
import java.io.File;
import java.util.List;
import java.util.stream.Collectors;

public class CucumberFileReader extends FileReader {
private final CucumberTestState testState;
Expand All @@ -32,8 +34,8 @@ public CucumberFileReader(CucumberTestState testState) {
}

@Override
public DistributedList<String> listFromMapFile(File file, String key) {
return DistributedList.uniform(testState.getValuesFromMap(file.getName(), key));
public List<String> listFromMapFile(File file, String key) {
return testState.getValuesFromMap(file.getName(), key);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
package com.scottlogic.datahelix.generator.profile.reader;

import com.scottlogic.datahelix.generator.common.ValidationException;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;

import java.io.*;
import java.util.List;

public class CsvFileInputReader implements CsvInputReader {
private final File path;
Expand All @@ -28,17 +29,17 @@ public CsvFileInputReader(File path) {
this.path = path;
}

public DistributedList<String> retrieveLines() {
public List<WeightedElement<String>> retrieveLines() {
try (InputStream stream = createStream()) {
return new CsvStreamInputReader(stream, path.getName()).retrieveLines();
} catch (IOException exc){
throw new UncheckedIOException(exc);
}
}

public DistributedList<String> retrieveLines(String key) {
public List<String> retrieveLinesForColumn(String key) {
try (InputStream stream = createStream()) {
return new CsvStreamInputReader(stream, path.getName()).retrieveLines(key);
return new CsvStreamInputReader(stream, path.getName()).retrieveLinesForColumn(key);
} catch (IOException exc){
throw new UncheckedIOException(exc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

package com.scottlogic.datahelix.generator.profile.reader;

import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;

import java.util.List;

public interface CsvInputReader{
DistributedList<String> retrieveLines();
DistributedList<String> retrieveLines(String key);
List<WeightedElement<String>> retrieveLines();
List<String> retrieveLinesForColumn(String key);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
package com.scottlogic.datahelix.generator.profile.reader;

import com.scottlogic.datahelix.generator.common.ValidationException;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Optional;
Expand All @@ -38,25 +39,24 @@ public CsvStreamInputReader(InputStream stream, String file) {
this.file = file;
}

public DistributedList<String> retrieveLines() {
public List<WeightedElement<String>> retrieveLines() {
List<CSVRecord> records = parse(stream);
return new DistributedList<>(records.stream()
return records.stream()
.map(this::createWeightedElementFromRecord)
.collect(Collectors.toList()));
.collect(Collectors.toList());
}

public DistributedList<String> retrieveLines(String key) {
public List<String> retrieveLinesForColumn(String key) {
List<CSVRecord> records = parse(stream);

int index = getIndexForKey(records.get(0), key);

//Remove the header
records.remove(0);

return new DistributedList<>(records.stream()
return records.stream()
.map(record -> record.get(index))
.map(record -> createWeightedElement(record, Optional.empty()))
.collect(Collectors.toList()));
.collect(Collectors.toList());
}

private static int getIndexForKey(CSVRecord header, String key) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
package com.scottlogic.datahelix.generator.profile.reader;

import com.google.inject.Inject;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;

import java.io.File;
import java.util.stream.Collectors;
import java.util.List;

public class FileReader {
private final CsvInputStreamReaderFactory csvReaderFactory;
Expand All @@ -30,24 +29,11 @@ public FileReader(CsvInputStreamReaderFactory csvReaderFactory) {
this.csvReaderFactory = csvReaderFactory;
}

public DistributedList<Object> setFromFile(File file) {
CsvInputReader reader = csvReaderFactory.getReaderForFile(file);
DistributedList<String> names = reader.retrieveLines();

return new DistributedList<>(
names.distributedList().stream()
.map(holder -> new WeightedElement<>((Object) holder.element(), holder.weight()))
.distinct()
.collect(Collectors.toList()));
public List<WeightedElement<String>> setFromFile(File file) {
return csvReaderFactory.getReaderForFile(file).retrieveLines();
}

public DistributedList<String> listFromMapFile(File file, String key) {
CsvInputReader reader = csvReaderFactory.getReaderForFile(file);
DistributedList<String> names = reader.retrieveLines(key);

return new DistributedList<>(
names.distributedList().stream()
.map(holder -> new WeightedElement<>(holder.element(), holder.weight()))
.collect(Collectors.toList()));
public List<String> listFromMapFile(File file, String key) {
return csvReaderFactory.getReaderForFile(file).retrieveLinesForColumn(key);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class ConstraintDeserializer extends JsonDeserializer<ConstraintDTO> {
private final FileReader fileReader;
Expand Down Expand Up @@ -75,7 +74,8 @@ public ConstraintDTO deserialize(JsonParser jsonParser, DeserializationContext c

private InMapConstraintDTO map(InMapFromFileConstraintDTO dto)
{
List<Object> values = fileReader.listFromMapFile(getFile(dto.file), dto.key).stream().collect(Collectors.toList());
List<Object> values = new ArrayList<>(fileReader.listFromMapFile(getFile(dto.file), dto.key));

InMapConstraintDTO inMapConstraintDTO = new InMapConstraintDTO();
inMapConstraintDTO.field = dto.field;
inMapConstraintDTO.otherField = dto.file;
Expand All @@ -85,7 +85,7 @@ private InMapConstraintDTO map(InMapFromFileConstraintDTO dto)

private InSetConstraintDTO map(InSetFromFileConstraintDTO dto)
{
List<Object> values = new ArrayList<>(fileReader.setFromFile(getFile(dto.file)).distributedList());
List<Object> values = new ArrayList<>(fileReader.setFromFile(getFile(dto.file)));
InSetConstraintDTO inSetConstraintDTO = new InSetConstraintDTO();
inSetConstraintDTO.field = dto.field;
inSetConstraintDTO.values = values;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import com.google.inject.Inject;
import com.scottlogic.datahelix.generator.common.profile.*;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.core.profile.constraints.Constraint;
import com.scottlogic.datahelix.generator.core.profile.constraints.atomic.*;
import com.scottlogic.datahelix.generator.core.profile.constraints.grammatical.AndConstraint;
Expand Down Expand Up @@ -88,13 +89,13 @@ public ConstraintService(CustomConstraintFactory customConstraintFactory, NameRe
field -> new MatchesStandardConstraint(field, StandardConstraintTypes.RIC));
fieldTypeToConstraint.put(
StandardSpecificFieldType.FIRST_NAME.getType(),
field -> new InSetConstraint(field, nameRetrievalService.loadNamesFromFile(NameConstraintTypes.FIRST)));
field -> new InSetConstraint(field, new DistributedList(nameRetrievalService.loadNamesFromFile(NameConstraintTypes.FIRST))));
fieldTypeToConstraint.put(
StandardSpecificFieldType.LAST_NAME.getType(),
field -> new InSetConstraint(field, nameRetrievalService.loadNamesFromFile(NameConstraintTypes.LAST)));
field -> new InSetConstraint(field, new DistributedList(nameRetrievalService.loadNamesFromFile(NameConstraintTypes.LAST))));
fieldTypeToConstraint.put(
StandardSpecificFieldType.FULL_NAME.getType(),
field -> new InSetConstraint(field, nameRetrievalService.loadNamesFromFile(NameConstraintTypes.FULL)));
field -> new InSetConstraint(field, new DistributedList(nameRetrievalService.loadNamesFromFile(NameConstraintTypes.FULL))));
fieldTypeToConstraint.put(
StandardSpecificFieldType.FAKER.getType(),
field -> new FakerConstraint(field, field.getSpecificType().getFakerMethod()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
package com.scottlogic.datahelix.generator.profile.services;

import com.google.inject.Inject;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;
import com.scottlogic.datahelix.generator.core.profile.constraints.atomic.NameConstraintTypes;
import com.scottlogic.datahelix.generator.profile.reader.CsvInputStreamReaderFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.stream.Collectors;

import static com.scottlogic.datahelix.generator.core.profile.constraints.atomic.NameConstraintTypes.*;
Expand All @@ -38,7 +38,7 @@ public NameRetrievalService(CsvInputStreamReaderFactory csvReaderFactory) {
this.csvReaderFactory = csvReaderFactory;
}

public DistributedList<Object> loadNamesFromFile(NameConstraintTypes configuration) {
public List<WeightedElement<Object>> loadNamesFromFile(NameConstraintTypes configuration) {
if (configuration == FULL) {
return downcastToObject(combineFirstWithLastNames(
generateNamesFromSingleFile(FIRST.getFilePath()),
Expand All @@ -48,15 +48,14 @@ public DistributedList<Object> loadNamesFromFile(NameConstraintTypes configurati
}
}

private static <T> DistributedList<Object> downcastToObject(DistributedList<T> higher) {
return new DistributedList<>(
higher.distributedList().stream()
private static <T> List<WeightedElement<Object>> downcastToObject(List<WeightedElement<T>> higher) {
return higher.stream()
.map(holder -> new WeightedElement<Object>(holder.element(), holder.weight()))
.distinct()
.collect(Collectors.toList()));
.collect(Collectors.toList());
}

private DistributedList<String> generateNamesFromSingleFile(String source) {
private List<WeightedElement<String>> generateNamesFromSingleFile(String source) {
try (InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(source))
{
return csvReaderFactory.getReaderForStream(stream, source).retrieveLines();
Expand All @@ -65,14 +64,14 @@ private DistributedList<String> generateNamesFromSingleFile(String source) {
}
}

private static DistributedList<String> combineFirstWithLastNames(DistributedList<String> firstNames,
DistributedList<String> lastNames) {
return new DistributedList<>(firstNames.distributedList().stream()
private static List<WeightedElement<String>> combineFirstWithLastNames(List<WeightedElement<String>> firstNames,
List<WeightedElement<String>> lastNames) {
return firstNames.stream()
.flatMap(
first -> lastNames.distributedList().stream()
first -> lastNames.stream()
.map(last -> mergeFrequencies(first, last)))
.distinct()
.collect(Collectors.toList()));
.collect(Collectors.toList());
}

private static WeightedElement<String> mergeFrequencies(WeightedElement<String> first,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import com.scottlogic.datahelix.generator.common.profile.FieldType;
import com.scottlogic.datahelix.generator.common.profile.NumericGranularity;
import com.scottlogic.datahelix.generator.common.util.FileUtils;
import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;
import com.scottlogic.datahelix.generator.core.profile.Profile;
import com.scottlogic.datahelix.generator.core.profile.constraints.Constraint;
import com.scottlogic.datahelix.generator.core.profile.constraints.atomic.*;
Expand Down Expand Up @@ -52,31 +52,34 @@
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;

import static com.scottlogic.datahelix.generator.common.util.Defaults.DEFAULT_DATE_FORMATTING;
import static com.scottlogic.datahelix.generator.common.whitelist.WeightedElement.withDefaultWeight;
import static java.util.Collections.singletonList;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.core.IsNull.nullValue;


public class JsonProfileReaderTests {
private DistributedList<Object> inSetReaderReturnValue = DistributedList.singleton("test");
private DistributedList<String> fromFileReaderReturnValue = DistributedList.singleton("test");
private final List<WeightedElement<String>> inSetReaderReturnValue = singletonList(withDefaultWeight("test"));
private final List<String> fromFileReaderReturnValue = singletonList("test");

private class MockFromFileReader extends FileReader {
public MockFromFileReader() {
super(null);
}

@Override
public DistributedList<Object> setFromFile(File file)
public List<WeightedElement<String>> setFromFile(File file)
{
return inSetReaderReturnValue;
}

@Override
public DistributedList<String> listFromMapFile(File file, String Key)
public List<String> listFromMapFile(File file, String Key)
{
return fromFileReaderReturnValue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
*/
package com.scottlogic.datahelix.generator.profile.reader.file;

import com.scottlogic.datahelix.generator.common.whitelist.DistributedList;
import com.scottlogic.datahelix.generator.common.whitelist.WeightedElement;
import com.scottlogic.datahelix.generator.profile.reader.CsvInputReader;
import com.scottlogic.datahelix.generator.profile.reader.CsvStreamInputReader;
import org.junit.jupiter.api.Test;

import java.io.InputStream;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand All @@ -35,11 +35,14 @@ public void testReadingLinesFromNames() {
final InputStream is = loader.getResourceAsStream("names/firstname.csv");
final CsvInputReader reader = new CsvStreamInputReader(is, "names/firstname.csv");

final DistributedList<String> names = reader.retrieveLines();
final List<WeightedElement<String>> nameElements = reader.retrieveLines();
final Set<String> names = nameElements.stream()
.map(WeightedElement::element)
.collect(Collectors.toSet());

final Set<String> sampleNames = Stream.of("Rory", "Kyle", "Grace").collect(Collectors.toSet());

assertTrue(names.list().containsAll(sampleNames));
assertTrue(names.containsAll(sampleNames));
}

@Test
Expand All @@ -48,13 +51,13 @@ public void testReadingLinesFromFileWithoutFrequencies() {
final InputStream is = loader.getResourceAsStream("csv/without-frequencies.csv");
final CsvInputReader reader = new CsvStreamInputReader(is, "csv/without-frequencies.csv");

final DistributedList<String> set = reader.retrieveLines();
final List<WeightedElement<String>> set = reader.retrieveLines();

assertTrue(checkAllWeightsAreEquals(set));
}

private <T> boolean checkAllWeightsAreEquals(DistributedList<T> set) {
return set.distributedList().stream()
private <T> boolean checkAllWeightsAreEquals(List<WeightedElement<T>> set) {
return set.stream()
.map(WeightedElement::weight)
.distinct()
.limit(2).count() <= 1;
Expand Down
Loading

0 comments on commit a6ccb29

Please sign in to comment.