Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Test Vector Generation #8

Merged
merged 4 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 53 additions & 15 deletions PyITA/ITA.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from .gelu import gelu_requantize, i_gelu_requantized, get_i_gelu_constants, get_i_gelu_requantized_constants
from .util import (generate_matrix_mem, pack_8b_to_word, pack_array_8b_to_word, pack_hex_24b, pack_multihead_8b_to_word,
pack_multihead_24b_to_word, random_shuffled_tensor, requantize, split_matrix, to_hex, write_matrix,
write_matrix_mem, write_matrix_mem_hex, write_vector_mem_hex, get_almost_symmetric_scaling_factor)
write_matrix_mem, write_matrix_mem_hex, write_vector_mem_hex, get_almost_symmetric_scaling_factor,
error_MAEP)


class Transformer:
Expand Down Expand Up @@ -133,35 +134,35 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff,

self.exp_sum = np.zeros(self.S, dtype = np.int32)

self.Q_in = random_shuffled_tensor((self.S, self.E), self.WI - 1) if Q is None else Q
self.Q_in = random_shuffled_tensor((self.S, self.E), self.WI) if Q is None else Q
self.Q = np.pad(self.Q_in, ((0, self.S_ITA - self.S), (0, self.E_ITA - self.E)))

self.V_in = random_shuffled_tensor((self.S, self.E), self.WI - 1) if V is None else V
self.V_in = random_shuffled_tensor((self.S, self.E), self.WI) if V is None else V
self.V = np.pad(self.V_in, ((0, self.S_ITA - self.S), (0, self.E_ITA - self.E)))

# WIESEP: K is the same as V because we do cross-attention
self.K_in = self.V_in
self.K = self.V

self.FF_in = random_shuffled_tensor((self.S, self.E), self.WI - 1) if FF_in is None else FF_in
self.FF_in = random_shuffled_tensor((self.S, self.E), self.WI) if FF_in is None else FF_in
self.FF = np.pad(self.FF_in, ((0, self.S_ITA - self.S), (0, self.E_ITA - self.E)))

#### Weight matrices ####
self.Wq_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI - 1) if Wq is None else Wq
self.Wq_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI) if Wq is None else Wq
self.Wq = np.pad(self.Wq_in, ((0, 0), (0, self.E_ITA - self.E), (0, self.P_ITA - self.P)))

self.Wk_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI - 1) if Wk is None else Wk
self.Wk_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI) if Wk is None else Wk
self.Wk = np.pad(self.Wk_in, ((0, 0), (0, self.E_ITA - self.E), (0, self.P_ITA - self.P)))

self.Wv_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI - 1) if Wv is None else Wv
self.Wv_in = random_shuffled_tensor((self.H, self.E, self.P), self.WI) if Wv is None else Wv
self.Wv = np.pad(self.Wv_in, ((0, 0), (0, self.E_ITA - self.E), (0, self.P_ITA - self.P)))

self.Wo_in = random_shuffled_tensor((self.H, self.P, self.E), self.WI - 1) if Wo is None else Wo
self.Wo_in = random_shuffled_tensor((self.H, self.P, self.E), self.WI) if Wo is None else Wo
self.Wo = np.pad(self.Wo_in, ((0, 0), (0, self.P_ITA - self.P), (0, self.E_ITA - self.E)))

self.Wff_in = random_shuffled_tensor((1, self.E, self.F), self.WI - 1) if Wff is None else Wff
self.Wff_in = random_shuffled_tensor((1, self.E, self.F), self.WI) if Wff is None else Wff
self.Wff = np.pad(self.Wff_in, ((0, 0), (0, self.E_ITA - self.E), (0, self.F_ITA - self.F)))
self.Wff2_in = random_shuffled_tensor((1, self.F, self.E), self.WI - 1) if Wff2 is None else Wff2
self.Wff2_in = random_shuffled_tensor((1, self.F, self.E), self.WI) if Wff2 is None else Wff2
self.Wff2 = np.pad(self.Wff2_in, ((0, 0), (0, self.F_ITA - self.F), (0, self.E_ITA - self.E)))

#### Bias matrices ####
Expand Down Expand Up @@ -258,7 +259,7 @@ def _initialize_quantization_parameters(self):
elif i == 3: # QK
max_bit_width = np.log2(self.requant_eps_mult[i, :].astype(np.uint32) * self.P * 2**8).astype(np.uint32)
elif i == 4: # AV
max_bit_width = np.log2(self.requant_eps_mult[i, :].astype(np.uint32) * self.S * 2**8).astype(np.uint32)
max_bit_width = np.log2(self.requant_eps_mult[i, :].astype(np.uint32) * self.S * 2**5).astype(np.uint32)
gamzeisl marked this conversation as resolved.
Show resolved Hide resolved
elif i == 5: # OW
max_bit_width = np.log2(self.requant_eps_mult[i, :].astype(np.uint32) * self.E * 2**9).astype(np.uint32)
elif i == 6: # Sum OW
Expand Down Expand Up @@ -996,17 +997,53 @@ def generateTestVectors(path, **kwargs):
acc1.export_hwpe()
acc1.export_numpy()

def print_tensor_stats(tensor):
def calculate_tensor_stats(tensor, name, tol = 1e-1):
# Calculate the similarly of elements within one row and over all columns
similarity_row = np.mean(np.abs(np.diff(tensor, axis = -2)))
similarity_column = np.mean(np.abs(np.diff(tensor, axis = -1)))

if (similarity_row < tol) or (similarity_column < tol):
if name is not None:
print(f"WARNING: {name} is constant!")
print(f"{name} Mean-Squared Difference (row) : {similarity_row:5.1f}")
print(f"{name} Mean-Squared Difference (column): {similarity_column:5.1f}")
raise ValueError(f"Tensor {name} is constant! This is a bad test vector!")
else:
print(" WARNING: Tensor is constant!")
print(f" Mean-Squared Difference (row) : {similarity_row:5.1f}")
print(f" Mean-Squared Difference (column): {similarity_column:5.1f}")

return similarity_row, similarity_column

def print_tensor_stats(tensor, name = None):
print(f" Min: {np.min(tensor)}")
print(f" Max: {np.max(tensor)}")

# Calculate the simmilarty of elements witin one row and over all comumns
similarity_row = np.mean(np.abs(np.diff(tensor, axis = -2)))
similarity_column = np.mean(np.abs(np.diff(tensor, axis = -1)))
similarity_row, similarity_column = calculate_tensor_stats(tensor, name)

print(f" Mean-Squared Difference (row) : {similarity_row:5.1f}")
print(f" Mean-Squared Difference (column): {similarity_column:5.1f}")

# Calculate all tensor statistics
tensors = {
"Qp": acc1.Qp_requant,
"Kp": acc1.Kp_requant,
"Vp": acc1.Vp_requant,
"A": acc1.A_requant,
"A_soft": acc1.A_partial_softmax,
"O_soft": acc1.O_soft_requant,
"Out_soft": acc1.Out_soft_requant,
"Out_soft_sum": acc1.Out_soft_sum_requant
}

for name, tensor in tensors.items():
calculate_tensor_stats(tensor, name)

# Check if softmax is sufficiently precise
maep_softmax = error_MAEP(acc1.A_partial_softmax, acc1.A_real_softmax)
if maep_softmax > 5:
print(f"WARNING: Softmax is not precise enough! MAEP Error to Integer Softmax: {maep_softmax:.2f}%")

if kwargs['verbose'] > 1:
print("=> Qp")
print_tensor_stats(acc1.Qp_requant)
Expand Down Expand Up @@ -1038,6 +1075,7 @@ def print_tensor_stats(tensor):

print("=> A (partial softmax)")
print_tensor_stats(acc1.A_partial_softmax)
print(f" MAEP Error to Integer Softmax: {maep_softmax:.2f}%")
if kwargs['verbose'] > 3:
print(acc1.A_partial_softmax)

Expand Down
2 changes: 0 additions & 2 deletions PyITA/softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ def streamingPartialSoftmax(x, integerize = True):

## STAGE 2: Calculate the softmax activation
# Invert the partial sum
# WIESEP: Scale Softmax to 127
# The Softmax values are maximum 127 as sumdotp modules can only do signed-signed operations for now. This is a temporary fix until sumdotp is fixed.
if integerize:
exp_partial_sum_inverse = np.floor((2**8 - 1) * 2**8 / exp_partial_sum).astype(np.int32)
else:
Expand Down
18 changes: 18 additions & 0 deletions PyITA/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,3 +517,21 @@ def almost_symmetric_dequantize(quantized_activations: np.ndarray, clip_lo: f32,
S, _ = get_almost_symmetric_scaling_factor(clip_lo, n_bits)
activations = quantized_activations * S
return activations


def error_MAEP(a: np.ndarray, b: np.ndarray):
"""
Compute the mean absolute error percentage (MAEP) between two tensors.
A value of 0 indicates that the two tensors are equal.
A value of 100 indicates that the second tensor is on average twice as large as the first tensor.

Parameters:
a (np.ndarray): The first tensor.
b (np.ndarray): The second tensor.

Returns:
np.ndarray: The mean absolute error percentage between the two tensors.
"""
return 100 * np.mean(np.abs(a - b)) / max(
np.abs(np.max(a)) + np.abs(np.min(a)),
np.abs(np.max(b)) + np.abs(np.min(b)))
Loading