Skip to content

make a real python2 supporting #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions blocksparse/ewops.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def dropout(x, keep_prob, mask=None, mask_shape=None):
size = 1
for m_dim, x_dim in zip(mask_shape, x.shape.as_list()):
# we don't currently support placeholder dims when broadcasting the dropout mask
assert m_dim == 1 or m_dim == x_dim, f"incompatible mask_shape: {mask_shape} x.shape: {x.shape}"
assert m_dim == 1 or m_dim == x_dim, "incompatible mask_shape: %s x.shape: %s" % (mask_shape, x.shape)
size *= m_dim
else:
size = 0
Expand Down Expand Up @@ -439,4 +439,3 @@ def assign_add(y, x, name=None):
# f8 = (1 + frac) * 2**(exp - ebias)
# l8 = 2**(exp + frac - ebias)
# print("%2d %.3f %9.5f %9.5f" % (exp-ebias, frac, f8, l8))

7 changes: 3 additions & 4 deletions blocksparse/grads.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def gradients(ys, xs, grad_ys=None, stop_grads=None, group_aggregations=8, custo
for i, dy in enumerate(grad_ys):
if dy is None:
# float grads start at ones by default
grad_ys[i] = tf.fill(tf.shape(ys[i]), tf.constant(1.0, dtype=ys[i].dtype, name=f"grad_ys_{i}"))
grad_ys[i] = tf.fill(tf.shape(ys[i]), tf.constant(1.0, dtype=ys[i].dtype, name="grad_ys_%s" % (i)))

ys_ops = [t.op for t in ys]
xs_ops = [t.op for t in xs]
Expand Down Expand Up @@ -261,7 +261,7 @@ def gradients(ys, xs, grad_ys=None, stop_grads=None, group_aggregations=8, custo
else:
grad_fn = ops.get_gradient_function(op)
except LookupError:
raise LookupError(f"No gradient defined for operation '{op.name}' (op type: {op.type})")
raise LookupError("No gradient defined for operation '%s' (op type: %s)" % (op.name, op.type))

# for any missing input grads, build a zero input of the right dtype/shape
for i, dy in enumerate(dys):
Expand All @@ -273,7 +273,7 @@ def gradients(ys, xs, grad_ys=None, stop_grads=None, group_aggregations=8, custo
dxs = _AsList(grad_fn(op, *dys))

if len(dxs) != len(op.inputs):
raise ValueError(f"Num gradients {len(dxs)} generated for op {op.node_def} do not match num inputs {len(op.inputs)}")
raise ValueError("Num gradients %s generated for op %s do not match num inputs %s" % (len(dxs), op.node_def, len(op.inputs)))

#_LogOpGradients(op, dys, dxs)
else:
Expand Down Expand Up @@ -316,4 +316,3 @@ def gradients(ys, xs, grad_ys=None, stop_grads=None, group_aggregations=8, custo

return [_GetGrad(grads, x) for x in xs]


4 changes: 2 additions & 2 deletions blocksparse/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
lstm_gates4_grad_op = _op_module.lstm_gates4_grad
bias_grad_op = _op_module.bias_grad

def fused_lstm_gates(c, *args, bias=None, forget_bias=1.0, name=None):
def fused_lstm_gates(c, bias=None, forget_bias=1.0, name=None, *args):
# returns c_next, h_next

dev = args[0].op.device.lower()
Expand Down Expand Up @@ -297,4 +297,4 @@ def group_lstm_grads(grads, params, scope="grouped_lstm", group_size=None):
# with tf.variable_scope(bias_scope, reuse=bias_reuse):
# b = tf.get_variable('bias', shape=[4 * width])
# if layernorm:
# g = tf.get_variable('gain', shape=[4 * width])
# g = tf.get_variable('gain', shape=[4 * width])
10 changes: 5 additions & 5 deletions blocksparse/matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_constant(lut, name):
#print(name, lut.size)
#tf_entry = tf.constant(lut, name=name+"_lut")
with tf.control_dependencies(None):
tf_entry = tf.get_variable(f"{name}_lut_{g_lut_idx}", initializer=lut.view(np.int64), trainable=False)
tf_entry = tf.get_variable("%s_lut_%s" % (name, g_lut_idx), initializer=lut.view(np.int64), trainable=False)
g_lut_idx += 1

g_lookup_cache[name].append( (lut, tf_entry) )
Expand Down Expand Up @@ -736,14 +736,14 @@ def group_dg_grads(bsmm_dw_op, dw, scope):
# that takes in the final accumulated dw value
dg_op = bsmm_dw_op.outputs[0].consumers()[0]
assert dg_op.type == "BlocksparseMatmulDG"
dw, dg = blocksparse_matmul_dg(dw, *dg_op.inputs[1:], name=f"{scope}/BlocksparseMatmulDG")
dw, dg = blocksparse_matmul_dg(dw, *dg_op.inputs[1:], name="%s/BlocksparseMatmulDG" % (scope))

# splice old add_n op out of graph
addn_op = dg_op.outputs[1].consumers()[0]
addn_ops = list()
addn_ops.append(addn_op)
if addn_op.type[0:3] != "Add":
raise ValueError(f"bad type: {addn_ops[0].type} Cause: this segment does not share a broadcasted gate.")
raise ValueError("bad type: %s Cause: this segment does not share a broadcasted gate." % (addn_ops[0].type))
elif addn_op.type == "AddN8":
while True:
addn_op = addn_op.outputs[0].consumers()[0]
Expand All @@ -768,12 +768,12 @@ def group_dg_grads(bsmm_dw_op, dw, scope):
for i, t in enumerate(dg_consumer.inputs):
#print(i, t.name)
if t is addn:
#print(f"splicing dg into: {dg_consumer.name} at {i}")
#print("splicing dg into: %s at %s" % (dg_consumer.name, i))
dg_consumer._update_input(i, dg)
found = True
break
if not found:
print(f"splice failed for {dg_consumer.name}")
print("splice failed for %s" % (dg_consumer.name))
return dw


Expand Down
12 changes: 6 additions & 6 deletions blocksparse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ def bst_conv_layout(input_h=1, input_w=1, filter_h=1, filter_w=1, stride=1, blk_
break
assert pad_s >= 0, "Even size filters only work with stride 2."

print(f"P:{P} Q:{Q} H:{H} W:{W} R:{R} S:{S} std:{stride} pad_r:{pad_r} pad_s:{pad_s}")
print("P:%s Q:%s H:%s W:%s R:%s S:%s std:%s pad_r:%s pad_s:%s" % (P, Q, H, W, R, S, stride, pad_r, pad_s))

assert P*Q % blk_size == 0, f"P:{P} Q:{Q}"
assert H*W % blk_size == 0, f"H:{H} W:{W}"
assert P*Q % blk_size == 0, "P:%s Q:%s" % (P, Q)
assert H*W % blk_size == 0, "H:%s W:%s" % (H, W)

mask_set = set()
layout = np.zeros((P*Q//blk_size, H*W//blk_size), dtype=np.bool)
Expand Down Expand Up @@ -294,10 +294,10 @@ def bst_deconv_layout(output_h=1, output_w=1, filter_h=1, filter_w=1, stride=1,
break
assert pad_s >= 0, "Even size filters only work with stride 2."

print(f"P:{P} Q:{Q} H:{H} W:{W} R:{R} S:{S} std:{stride} pad_r:{pad_r} pad_s:{pad_s}")
print("P:%s Q:%s H:%s W:%s R:%s S:%s std:%s pad_r:%s pad_s:%s" % (P, Q, H, W, R, S, stride, pad_r, pad_s))

assert P*Q % blk_size == 0, f"P:{P} Q:{Q}"
assert H*W % blk_size == 0, f"H:{H} W:{W}"
assert P*Q % blk_size == 0, "P:%s Q:%s" % (P, Q)
assert H*W % blk_size == 0, "H:%s W:%s" % (H, W)

mask_set = set()
layout = np.zeros((H*W//blk_size, P*Q//blk_size), dtype=np.bool)
Expand Down