Skip to content

Commit

Permalink
more checkstyle changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jmalkin committed Feb 14, 2024
1 parent d9166ff commit c33333e
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ class EbppsItemsSample<T> {
// does NOT copy the incoming ArrayList since this is an internal
// class's package-private constructor, not something directly
// taking user data
EbppsItemsSample(ArrayList<T> data, T partialItem, final double c) {
if (c < 0.0 || Double.isNaN(c) || Double.isInfinite(c))
EbppsItemsSample(final ArrayList<T> data, final T partialItem, final double c) {
if (c < 0.0 || Double.isNaN(c) || Double.isInfinite(c)) {
throw new SketchesArgumentException("C must be nonnegative and finite. Found: " + c);
}

c_ = c;
partialItem_ = partialItem;
Expand All @@ -72,8 +73,9 @@ class EbppsItemsSample<T> {
// rand_ is not set since it is not expected to be used from
// this object
void replaceContent(final T item, final double theta) {
if (theta < 0.0 || theta > 1.0 || Double.isNaN(theta))
if (theta < 0.0 || theta > 1.0 || Double.isNaN(theta)) {
throw new SketchesArgumentException("Theta must be in the range [0.0, 1.0]. Found: " + theta);
}

c_ = theta;
if (theta == 1.0) {
Expand Down Expand Up @@ -101,15 +103,18 @@ ArrayList<T> getSample() {
final boolean includePartial = partialItem_ != null && rand_.nextDouble() < cFrac;
final int resultSize = (data_ != null ? data_.size() : 0) + (includePartial ? 1 : 0);

if (resultSize == 0)
if (resultSize == 0) {
return null;
}

ArrayList<T> result = new ArrayList<>(resultSize);
if (data_ != null)
final ArrayList<T> result = new ArrayList<>(resultSize);
if (data_ != null) {
result.addAll(data_);
}

if (includePartial)
if (includePartial) {
result.add(partialItem_);
}

return result;
}
Expand All @@ -126,8 +131,9 @@ T[] getAllSamples(final Class<?> clazz) {
}
}
}
if (partialItem_ != null)
if (partialItem_ != null) {
itemsArray[i] = partialItem_; // no need to increment i again
}

return itemsArray;
}
Expand All @@ -147,18 +153,18 @@ T getPartialItem() {
boolean hasPartialItem() { return partialItem_ != null; }

// for testing to allow setting the seed
void replaceRandom(Random r) {
void replaceRandom(final Random r) {
rand_ = r;
}

void downsample(final double theta) {
if (theta >= 1.0) return;
if (theta >= 1.0) { return; }

double newC = theta * c_;
double newCInt = Math.floor(newC);
double newCFrac = newC % 1;
double cInt = Math.floor(c_);
double cFrac = c_ % 1;
final double newC = theta * c_;
final double newCInt = Math.floor(newC);
final double newCFrac = newC % 1;
final double cInt = Math.floor(c_);
final double cFrac = c_ % 1;

if (newCInt == 0.0) {
// no full items retained
Expand All @@ -168,7 +174,7 @@ void downsample(final double theta) {
data_.clear();
} else if (newCInt == cInt) {
// no items deleted
if (rand_.nextDouble() > (1 - theta * cFrac)/(1 - newCFrac)) {
if (rand_.nextDouble() > (1 - theta * cFrac) / (1 - newCFrac)) {
swapWithPartialItem();
}
} else {
Expand All @@ -184,22 +190,24 @@ void downsample(final double theta) {
}
}

if (newC == newCInt)
if (newC == newCInt) {
partialItem_ = null;
}

c_ = newC;
}

void merge(final EbppsItemsSample<T> other) {
//double cInt = Math.floor(c_);
double cFrac = c_ % 1;
double otherCFrac = other.c_ % 1;
final double cFrac = c_ % 1;
final double otherCFrac = other.c_ % 1;

// update c_ here but do NOT recompute fractional part yet
c_ += other.c_;

if (other.data_ != null)
if (other.data_ != null) {
data_.addAll(other.data_);
}

// This modifies the original algorithm slightly due to numeric
// precision issues. Specifically, the test if cFrac + otherCFrac == 1.0
Expand Down Expand Up @@ -243,13 +251,15 @@ public String toString() {

sb.append(" sample:").append(LS);
int idx = 0;
for (T item : data_)
for (T item : data_) {
sb.append("\t").append(idx++).append(":\t").append(item.toString()).append(LS);
}
sb.append(" partial: ");
if (partialItem_ != null)
if (partialItem_ != null) {
sb.append(partialItem_).append(LS);
else
} else {
sb.append("NULL").append(LS);
}

return sb.toString();
}
Expand All @@ -261,13 +271,13 @@ void subsample(final int numSamples) {
// point from anywhere in the initial array would be eligible
// to end up in the final subsample.

if (numSamples == data_.size()) return;
if (numSamples == data_.size()) { return; }

int dataLen = data_.size();
final int dataLen = data_.size();
for (int i = 0; i < numSamples; ++i) {
int j = i + rand_.nextInt(dataLen - i);
final int j = i + rand_.nextInt(dataLen - i);
// swap i and j
T tmp = data_.get(i);
final T tmp = data_.get(i);
data_.set(i, data_.get(j));
data_.set(j, tmp);
}
Expand All @@ -280,19 +290,19 @@ void swapWithPartialItem() {
if (partialItem_ == null) {
moveOneToPartialItem();
} else {
int idx = rand_.nextInt(data_.size());
T tmp = partialItem_;
final int idx = rand_.nextInt(data_.size());
final T tmp = partialItem_;
partialItem_ = data_.get(idx);
data_.set(idx, tmp);
}
}

void moveOneToPartialItem() {
int idx = rand_.nextInt(data_.size());
final int idx = rand_.nextInt(data_.size());
// swap selected item to end so we can delete it easily
int lastIdx = data_.size() - 1;
final int lastIdx = data_.size() - 1;
if (idx != lastIdx) {
T tmp = data_.get(idx);
final T tmp = data_.get(idx);
data_.set(idx, data_.get(lastIdx));
partialItem_ = tmp;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
/**
* An implementation of an Exact and Bounded Sampling Proportional to Size sketch.
*
* From: "Exact PPS Sampling with Bounded Sample Size",
* <p>From: "Exact PPS Sampling with Bounded Sample Size",
* B. Hentschel, P. J. Haas, Y. Tian. Information Processing Letters, 2023.
*
* This sketch samples data from a stream of items proportional to the weight of each item.
* <p>This sketch samples data from a stream of items proportional to the weight of each item.
* The sample guarantees the presence of an item in the result is proportional to that item's
* portion of the total weight seen by the sketch, and returns a sample no larger than size k.
*
* The sample may be smaller than k and the resulting size of the sample potentially includes
* <p>The sample may be smaller than k and the resulting size of the sample potentially includes
* a probabilistic component, meaning the resulting sample size is not always constant.
*
* @author Jon Malkin
Expand Down Expand Up @@ -77,7 +77,7 @@ public EbppsItemsSketch(final int k) {
}

// private copy constructor
private EbppsItemsSketch(EbppsItemsSketch<T> other) {
private EbppsItemsSketch(final EbppsItemsSketch<T> other) {
k_ = other.k_;
n_ = other.n_;
rho_ = other.rho_;
Expand Down Expand Up @@ -151,8 +151,9 @@ public static <T> EbppsItemsSketch<T> heapify(final Memory srcMem,
+ "and less than " + MAX_K + ". Found: " + k);
}

if (isEmpty)
if (isEmpty) {
return new EbppsItemsSketch<>(k);
}

final long n = PreambleUtil.extractN(srcMem);
if (n < 0) {
Expand Down Expand Up @@ -182,17 +183,18 @@ public static <T> EbppsItemsSketch<T> heapify(final Memory srcMem,
}

// extract items
int numTotalItems = (int) Math.ceil(c);
int numFullItems = (int) Math.floor(c); // floor() not strictly necessary
final int numTotalItems = (int) Math.ceil(c);
final int numFullItems = (int) Math.floor(c); // floor() not strictly necessary
final int offsetBytes = EBPPS_ITEMS_START;
final T[] rawItems = serDe.deserializeFromMemory(
srcMem.region(offsetBytes, srcMem.getCapacity() - offsetBytes), 0, numTotalItems);
final List<T> itemsList = Arrays.asList(rawItems);
final ArrayList<T> data;
final T partialItem;
if (hasPartialItem) {
if (numFullItems >= numTotalItems)
if (numFullItems >= numTotalItems) {
throw new SketchesArgumentException("Possible Corruption: Expected partial item but none found");
}

data = new ArrayList<>(itemsList.subList(0, numFullItems));
partialItem = itemsList.get(numFullItems); // 0-based, so last item
Expand All @@ -201,7 +203,7 @@ public static <T> EbppsItemsSketch<T> heapify(final Memory srcMem,
partialItem = null; // just to be explicit
}

EbppsItemsSample<T> sample = new EbppsItemsSample<>(data, partialItem, c);
final EbppsItemsSample<T> sample = new EbppsItemsSample<>(data, partialItem, c);

return new EbppsItemsSketch<>(sample, k, n, cumWt, maxWt, rho);
}
Expand All @@ -220,18 +222,21 @@ public void update(final T item) {
* @param weight the weight of the item
*/
public void update(final T item, final double weight) {
if (weight < 0.0 || Double.isNaN(weight) || Double.isInfinite(weight))
if (weight < 0.0 || Double.isNaN(weight) || Double.isInfinite(weight)) {
throw new SketchesArgumentException("Item weights must be nonnegative and finite. "
+ "Found: " + weight);
if (weight == 0.0)
}
if (weight == 0.0) {
return;
}

final double newCumWt = cumulativeWt_ + weight;
final double newWtMax = Math.max(wtMax_, weight);
final double newRho = Math.min(1.0 / newWtMax, k_ / newCumWt);

if (cumulativeWt_ > 0.0)
if (cumulativeWt_ > 0.0) {
sample_.downsample((newRho / rho_));
}

tmp_.replaceContent(item, newRho * weight);
sample_.merge(tmp_);
Expand Down Expand Up @@ -267,12 +272,12 @@ public void update(final T item, final double weight) {
* @param other the sketch to merge into the current object
*/
public void merge(final EbppsItemsSketch<T> other) {
if (other.getCumulativeWeight() == 0.0)
if (other.getCumulativeWeight() == 0.0) {
return;
else if (other.getCumulativeWeight() > cumulativeWt_) {
} else if (other.getCumulativeWeight() > cumulativeWt_) {
// need to swap this with other
// make a copy of other, merge into it, and take the result
EbppsItemsSketch<T> copy = new EbppsItemsSketch<>(other);
final EbppsItemsSketch<T> copy = new EbppsItemsSketch<>(other);
copy.internalMerge(this);
k_ = copy.k_;
n_ = copy.n_;
Expand All @@ -286,7 +291,7 @@ else if (other.getCumulativeWeight() > cumulativeWt_) {
}

// merge implementation called exclusively from public merge()
private void internalMerge(EbppsItemsSketch<T> other) {
private void internalMerge(final EbppsItemsSketch<T> other) {
// assumes that other.cumulativeWeight_ <= cumulativeWt_m
// which must be checked before calling this

Expand All @@ -305,15 +310,16 @@ private void internalMerge(EbppsItemsSketch<T> other) {
// it as a full item would be correct on average but would
// introduce bias for any specific merge operation.
final double avgWt = other.cumulativeWt_ / other.getC();
ArrayList<T> items = other.sample_.getFullItems();
final ArrayList<T> items = other.sample_.getFullItems();
if (items != null) {
for (T item : items) {
// newWtMax is pre-computed
final double newCumWt = cumulativeWt_ + avgWt;
final double newRho = Math.min(1.0 / newWtMax, k_ / newCumWt);

if (cumulativeWt_ > 0.0)
if (cumulativeWt_ > 0.0) {
sample_.downsample(newRho / rho_);
}

tmp_.replaceContent(item, newRho * avgWt);
sample_.merge(tmp_);
Expand All @@ -329,8 +335,9 @@ private void internalMerge(EbppsItemsSketch<T> other) {
final double newCumWt = cumulativeWt_ + (otherCFrac * avgWt);
final double newRho = Math.min(1.0 / newWtMax, k_ / newCumWt);

if (cumulativeWt_ > 0.0)
if (cumulativeWt_ > 0.0) {
sample_.downsample(newRho / rho_);
}

tmp_.replaceContent(other.sample_.getPartialItem(), newRho * otherCFrac * avgWt);
sample_.merge(tmp_);
Expand Down Expand Up @@ -385,8 +392,8 @@ public String toString() {
* getResult(). The number is a floating point value, where the
* fractional portion represents the probability of including a
* "partial item" from the sample.
*
* The value C should be no larger than the sketch's configured
*
* <p>The value C should be no larger than the sketch's configured
* value of k, although numerical precision limitations mean it
* may exceed k by double precision floating point error margins
* in certain cases.
Expand Down Expand Up @@ -418,12 +425,13 @@ public void reset() {
* @return the length of a byte array representation of this sketch
*/
public int getSerializedSizeBytes(final ArrayOfItemsSerDe<? super T> serDe) {
if (isEmpty())
if (isEmpty()) {
return Family.EBPPS.getMinPreLongs() << 3;
else if (sample_.getC() < 1.0)
} else if (sample_.getC() < 1.0) {
return getSerializedSizeBytes(serDe, sample_.getPartialItem().getClass());
else
} else {
return getSerializedSizeBytes(serDe, sample_.getSample().get(0).getClass());
}
}

/**
Expand All @@ -435,8 +443,9 @@ else if (sample_.getC() < 1.0)
* @return the length of a byte array representation of this sketch
*/
public int getSerializedSizeBytes(final ArrayOfItemsSerDe<? super T> serDe, final Class<?> clazz) {
if (n_ == 0)
if (n_ == 0) {
return Family.EBPPS.getMinPreLongs() << 3;
}

final int preLongs = Family.EBPPS.getMaxPreLongs();
final byte[] itemBytes = serDe.serializeToByteArray(sample_.getAllSamples(clazz));
Expand All @@ -451,13 +460,14 @@ public int getSerializedSizeBytes(final ArrayOfItemsSerDe<? super T> serDe, fina
* @return a byte array representation of this sketch
*/
public byte[] toByteArray(final ArrayOfItemsSerDe<? super T> serDe) {
if (n_ == 0)
if (n_ == 0) {
// null class is ok since empty -- no need to call serDe
return toByteArray(serDe, null);
else if (sample_.getC() < 1.0)
} else if (sample_.getC() < 1.0) {
return toByteArray(serDe, sample_.getPartialItem().getClass());
else
} else {
return toByteArray(serDe, sample_.getSample().get(0).getClass());
}
}

/**
Expand Down Expand Up @@ -512,7 +522,8 @@ public byte[] toByteArray(final ArrayOfItemsSerDe<? super T> serDe, final Class<
}

private static void checkK(final int k) {
if (k <= 0 || k > MAX_K)
if (k <= 0 || k > MAX_K) {
throw new SketchesArgumentException("k must be strictly positive and less than " + MAX_K);
}
}
}
Loading

0 comments on commit c33333e

Please sign in to comment.