dlrm_utils.py

import time, zlib
import torch

# The following function is a wrapper to avoid checking this multiple times in th
# loop below.
def unpack_batch(b):
    # Experiment with unweighted samples
    return b[0], b[1], b[2], b[3], torch.ones(b[3].size()), None


def div_round_up(x, y):
    return (x + y - 1) // y


def time_wrap(use_gpu):
    if use_gpu:
        torch.cuda.synchronize()
    return time.time()


def get_reuse_factor(indices):
    _, indices_counts = torch.unique(indices, return_counts=True)
    unique_counts, counts_counts = torch.unique(indices_counts, return_counts=True)
    total_counts = counts_counts.sum().item()

    if total_counts == 0:
        bin_counts = [0.0 for _ in range(17)]
    else:
        start, end = 0, 1
        bin_counts = []
        for _ in range(16):
            bin_counts.append(counts_counts[(unique_counts > start) & (unique_counts <= end)].sum().item())
            start = end
            end *= 2
        bin_counts.append(counts_counts[unique_counts > start].sum().item())
        bin_counts = [x/total_counts for x in bin_counts]
    return bin_counts


def get_bin_counts_and_Ls(offsets, indices, T, batch_size):
    bin_counts = []
    Ls = []
    idx = 0
    if offsets.dim() == 0:
        bin_counts = [[0.0 for _ in range(17)] for _ in range(T)]
        Ls = [0.0 for _ in range(T)]
    else:
        while idx < len(offsets) - 1: # Per table
            start = offsets[idx]
            end = offsets[idx + batch_size]
            bin_counts.append(get_reuse_factor(indices[start:end]))
            Ls.append((end - start).item() / batch_size)
            idx += batch_size
    return bin_counts, Ls


def get_encoded_info(Es, Ds):
    s = str(zlib.compress('/'.join([
        '-'.join([str(s) for s in Es]), # Es
        '-'.join([str(s) for s in Ds]), # Ds
    ]).encode())).replace('\\', '\\\\').replace('"', '\\"')
    return s