Skip to content

Commit 1dba89f

Browse files
authored
Updates default constraints of data generators. (#41)
1 parent b19eebf commit 1dba89f

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

src/com/amazon/ion/benchmark/DataConstructor.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,12 @@ class DataConstructor {
7070
// The constant defined below are used as placeholder in the method WriteRandomIonValues.writeRequestedSizeFile.
7171
final static private IonSystem SYSTEM = IonSystemBuilder.standard().build();
7272
final static private List<Integer> DEFAULT_RANGE = Arrays.asList(0, 0x10FFFF);
73-
final static public Timestamp.Precision[] PRECISIONS = Timestamp.Precision.values();
7473
final static public IonStruct NO_CONSTRAINT_STRUCT = null;
7574
final static private int DEFAULT_PRECISION = 20;
75+
// The ASCII_CODE_LOWERCASE_A represents the ASCII code of character "a".
76+
final static private int ASCII_CODE_LOWERCASE_A = 97;
77+
// The ASCII_CODE_UPPERCASE_A represents the ASCII code of character "A".
78+
final static private int ASCII_CODE_UPPERCASE_A = 65;
7679
final static private int DEFAULT_SCALE_LOWER_BOUND = -20;
7780
final static private int DEFAULT_SCALE_UPPER_BOUND = 20;
7881
final static private int DEFAULT_CONTAINER_LENGTH = 20;
@@ -390,18 +393,19 @@ public static String constructString(Map<String, ReparsedConstraint> constraintM
390393
}
391394

392395
/**
393-
* Generate unicode codepoint randomly.
396+
* Generate unicode codepoint randomly which matches the character from [A-Z] and [a-z].
394397
* @return generated codepoint.
395398
*/
396399
private static int getCodePoint() {
397-
Random random = new Random();
398-
int type;
399-
int codePoint;
400-
do {
401-
codePoint = random.nextInt(DEFAULT_RANGE.get(1) - DEFAULT_RANGE.get(0) + 1) + DEFAULT_RANGE.get(0);
402-
type = Character.getType(codePoint);
403-
} while (type == Character.PRIVATE_USE || type == Character.SURROGATE || type == Character.UNASSIGNED);
404-
return codePoint;
400+
int index = ThreadLocalRandom.current().nextInt(20);
401+
int randomIndex = ThreadLocalRandom.current().nextInt(26);
402+
if (index < 10) {
403+
// Randomly generate the unicode of character from [A-Z].
404+
return randomIndex + ASCII_CODE_UPPERCASE_A;
405+
} else {
406+
// Randomly generate the unicode of character from [a-z].
407+
return randomIndex + ASCII_CODE_LOWERCASE_A;
408+
}
405409
}
406410

407411
/**
@@ -497,7 +501,15 @@ public static long constructInt(Map<String, ReparsedConstraint> constraintMapClo
497501
return validValues.getRange().getRandomQuantifiableValueFromRange().longValue();
498502
} else {
499503
// If there is no constraint provided, the generator will construct a random value.
500-
return ThreadLocalRandom.current().nextLong();
504+
// Randomly generate integers in the distribution that more than 80% of integers would be smaller than 1024.
505+
// In this case, the generated integers would be more similar to the real world data.
506+
Random random = new Random();
507+
int index = random.nextInt(20);
508+
if (index < 16) {
509+
return ThreadLocalRandom.current().nextInt(1024);
510+
} else {
511+
return ThreadLocalRandom.current().nextLong();
512+
}
501513
}
502514
}
503515

@@ -512,8 +524,8 @@ public static Timestamp constructTimestamp(Map<String, ReparsedConstraint> const
512524
Range range = DEFAULT_TIMESTAMP_IN_MILLIS_DECIMAL_RANGE;
513525
// Preset the local offset.
514526
Integer localOffset = localOffset(random);
515-
// Preset the default precision.
516-
Timestamp.Precision precision = PRECISIONS[random.nextInt(PRECISIONS.length)];
527+
// Preset the default precision as 'Day'.
528+
Timestamp.Precision precision = Timestamp.Precision.DAY;
517529
TimestampPrecision timestampPrecision = (TimestampPrecision) constraintMapClone.remove("timestamp_precision");
518530
ValidValues validValues = (ValidValues) constraintMapClone.remove("valid_values");
519531
if (!constraintMapClone.isEmpty()) {

tst/com/amazon/ion/benchmark/DataGeneratorTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class DataGeneratorTest {
5050
private final static String INPUT_SCHEMA_CONTAINS_CODEPOINT_LENGTH = "./tst/com/amazon/ion/benchmark/testStringCodepointLength.isl";
5151
private final static String INPUT_ION_STRUCT_SCHEMA_CONTAINS_ELEMENT_FILE_PATH = "./tst/com/amazon/ion/benchmark/testSchemaContainsElement.isl";
5252
private final static String INPUT_ION_SEXP_FILE_PATH = "./tst/com/amazon/ion/benchmark/testSexp.isl";
53+
private final static String INPUT_ION_INT_SCHEMA = "./tst/com/amazon/ion/benchmark/testIntWithoutConstraint.isl";
5354
private final static String INPUT_TEST_ELEMENT_SCHEMA = "./tst/com/amazon/ion/benchmark/testElement.isl";
5455
private final static String INPUT_ION_CLOB_FILE_PATH = "./tst/com/amazon/ion/benchmark/testClob.isl";
5556
private final static String INPUT_ION_BLOB_FILE_PATH = "./tst/com/amazon/ion/benchmark/testBlob.isl";
@@ -222,6 +223,15 @@ public void testViolationOfNestedIonList() throws Exception {
222223
DataGeneratorTest.violationDetect(INPUT_NESTED_ION_LIST_PATH);
223224
}
224225

226+
/**
227+
* Test if there's violation detected when generating IonInt from ISL without constraint.
228+
* @throws Exception if error occurs during the violation detecting process.
229+
*/
230+
@Test
231+
public void testViolationOfIonInt() throws Exception {
232+
DataGeneratorTest.violationDetect(INPUT_ION_INT_SCHEMA);
233+
}
234+
225235
/**
226236
* Test if there's violation when generating IonList based on ISL that specifies constraint 'element' without specifying 'container_length'.
227237
* @throws Exception if error occurs during the violation detecting process.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
type::{
2+
name: Int,
3+
type: int,
4+
}

0 commit comments

Comments
 (0)