Skip to content
This repository was archived by the owner on Mar 2, 2025. It is now read-only.

Making Upsample native ops #118

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions basalt/__init__.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ from basalt.utils.collection import Collection
alias dtype = DType.float32
alias nelts = 2 * simdwidthof[dtype]()
alias seed = 42
alias epsilon = 1e-12
25 changes: 8 additions & 17 deletions basalt/autograd/attributes.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import Optional, OptionalReg
from utils.static_tuple import StaticTuple

from basalt.nn.tensor import Tensor, TensorShape, MAX_RANK
from basalt.utils.bytes import Bytes, scalar_to_bytes, bytes_to_scalar
Expand Down Expand Up @@ -45,9 +46,8 @@ struct AttributeVector(Sized, Stringable, CollectionElement):
var attributes: StaticTuple[Attribute, MAX_ATTRS]
var size: Int

@always_inline("nodebug")
fn __init__(inout self, *attributes: Attribute):
self.attributes = StaticTuple[Attribute, MAX_ATTRS]()
self.attributes = StaticTuple[Attribute, MAX_ATTRS](Attribute("", ""))
self.size = len(attributes)
for i in range(self.size):
self.attributes[i] = attributes[i]
Expand All @@ -67,7 +67,10 @@ struct AttributeVector(Sized, Stringable, CollectionElement):
return self.attributes[i]
return None

@always_inline("nodebug")
fn append(inout self, attribute: Attribute):
self.attributes[self.size] = attribute
self.size += 1

fn __str__(self) -> String:
var s: String = "["
for i in range(self.size):
Expand All @@ -85,15 +88,13 @@ struct Attribute(Stringable, CollectionElement):
var type: AttributeType
var size: Int

@always_inline("nodebug")
fn __init__(inout self, name: String, value: String):
self.data_shape = StaticIntTuple[MAX_RANK]()
self.name = Bytes[MAX_NAME_CHARS](name)
self.data = Bytes[MAX_DATA_BYTES](value)
self.type = AttributeType.STRING
self.size = len(value)

@always_inline("nodebug")
fn __init__(inout self, name: String, value: TensorShape):
self.data_shape = StaticIntTuple[MAX_RANK]()
self.name = Bytes[MAX_NAME_CHARS](name)
Expand All @@ -104,7 +105,6 @@ struct Attribute(Stringable, CollectionElement):
for i in range(self.size):
self.data_shape[i] = value._shape[i]

@always_inline("nodebug")
fn __init__[N: Int](inout self, name: String, value: StaticIntTuple[N]):
constrained[N < MAX_RANK, "Attribute rank must be less than MAX_RANK."]()

Expand All @@ -115,9 +115,8 @@ struct Attribute(Stringable, CollectionElement):
self.size = N

for i in range(self.size):
self.data[i] = value[i]
self.data_shape[i] = value[i]

@always_inline("nodebug")
fn __init__[dtype: DType](inout self, name: String, value: Scalar[dtype]):
constrained[dtype.is_numeric(), "Attribute value must be numeric."]()

Expand All @@ -127,46 +126,38 @@ struct Attribute(Stringable, CollectionElement):
self.type = AttributeType(dtype)
self.size = 1

@always_inline("nodebug")
fn __init__(inout self, name: String, value: Int):
self.__init__(name, Int64(value))
self.data_shape[0] = 1

@always_inline("nodebug")
fn __init__(inout self, name: String, value: FloatLiteral):
self.__init__(name, Float64(value))
self.data_shape[0] = 1

@always_inline("nodebug")
fn __str__(self) -> String:
return "Attribute(" + str(self.name) + ", " + "..." + ")"

@always_inline("nodebug")
fn to_string(self) -> String:
return str(self.data)

@always_inline("nodebug")
fn to_shape(self) -> TensorShape:
return TensorShape(rank=self.size, shape=self.data_shape)

@always_inline("nodebug")
fn to_static[N: Int](self) -> StaticIntTuple[N]:
constrained[N < MAX_RANK, "Attribute rank must be less than MAX_RANK."]()

var result = StaticIntTuple[N]()

for i in range(N):
result[i] = int(self.data[i])
result[i] = int(self.data_shape[i])

return result

@always_inline("nodebug")
fn to_scalar[dtype: DType](self) -> Scalar[dtype]:
constrained[dtype.is_numeric(), "Attribute value must be numeric."]()

return bytes_to_scalar[dtype](self.data)

@always_inline("nodebug")
fn to_int(self) -> Int:
return int(self.to_scalar[DType.int64]())

Expand Down
116 changes: 58 additions & 58 deletions basalt/autograd/graph.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ from basalt import seed, dtype
from basalt import Tensor, TensorShape


@value
struct Graph:
var inputs: List[Symbol]
var params: ParamDict
Expand All @@ -28,41 +27,42 @@ struct Graph:
self.loss_out = None
self.symbol_count = 0

fn input(inout self, shape: TensorShape, trainable: Bool = False) -> Symbol:
var inp = Symbol(self.symbol_count, dtype, shape, trainable)
self.inputs.append(inp)
self.symbol_count += 1
return inp
fn __moveinit__(inout self, owned other: Graph):
self.inputs = other.inputs^
self.params = other.params^
self.nodes = other.nodes^
self.outputs = other.outputs^
self.loss_out = other.loss_out
self.symbol_count = other.symbol_count

fn param(
inout self, shape: TensorShape, init: Param, trainable: Bool = True
) -> Symbol:
var param_id = Symbol(self.symbol_count, dtype, shape, trainable)
self.params.put(param_id, init)
fn create_symbol(inout self, shape: TensorShape, data: Optional[Param] = None, trainable: Bool = False, is_input: Bool = False) -> Symbol:
var symbol = Symbol(self.symbol_count, dtype, shape, trainable)
self.symbol_count += 1
return param_id

if is_input:
self.inputs.append(symbol)
else:
if data is not None:
self.params.put(symbol, data.value()[])
else:
self.params.put(symbol)

return symbol

fn input(inout self, shape: TensorShape, trainable: Bool = False) -> Symbol:
return self.create_symbol(shape, trainable=trainable, is_input=True)

fn param(inout self, shape: TensorShape, init: Param, trainable: Bool = True) -> Symbol:
return self.create_symbol(shape, init, trainable)

fn param(inout self, shape: TensorShape, trainable: Bool = True) -> Symbol:
var param_id = Symbol(self.symbol_count, dtype, shape, trainable)
self.params.put(param_id)
self.symbol_count += 1
return param_id
return self.create_symbol(shape, trainable=trainable)

fn scalar(inout self, value: Scalar[dtype]) -> Symbol:
var scal = Param(value)
var scalar_id = Symbol(
self.symbol_count, dtype, TensorShape(1), trainable=False
)
self.params.put(scalar_id, scal)
self.symbol_count += 1
return scalar_id
return self.create_symbol(TensorShape(1), Param(value), trainable=False)

fn constant(inout self, shape: TensorShape, data: List[Scalar[dtype]]) -> Symbol:
var cst = Param(data)
var constant_id = Symbol(self.symbol_count, dtype, shape, trainable=False)
self.params.put(constant_id, cst)
self.symbol_count += 1
return constant_id
return self.create_symbol(shape, Param(data), trainable=False)

fn out(inout self, symbol: Symbol):
self.outputs.append(symbol)
Expand All @@ -77,14 +77,15 @@ struct Graph:
attributes: AttributeVector = AttributeVector(),
) -> Symbol:
var res_shape = static_result_shape(op, operands, attributes)
var res = Symbol(
self.symbol_count, dtype, res_shape, self.result_trainable(operands)
)
var res = Symbol(self.symbol_count, dtype, res_shape, self.result_trainable(operands))
self.symbol_count += 1

var inputs = List[Symbol]()
inputs.reserve(len(operands))

for operand in operands:
inputs.append(operand)

self.nodes.append(Node(op, inputs, List[Symbol](res), attributes))
return res

Expand All @@ -95,8 +96,7 @@ struct Graph:
operand_2: Float64,
attributes: AttributeVector = AttributeVector(),
) -> Symbol:
var operand_2_symbol = self.scalar(operand_2)
return self.op(op, operand_1, operand_2_symbol, attributes=attributes)
return self.op(op, operand_1, self.scalar(operand_2), attributes=attributes)

fn op(
inout self,
Expand All @@ -105,43 +105,43 @@ struct Graph:
operand_2: Symbol,
attributes: AttributeVector = AttributeVector(),
) -> Symbol:
var operand_1_symbol = self.scalar(operand_1)
return self.op(op, operand_1_symbol, operand_2, attributes=attributes)
return self.op(op, self.scalar(operand_1), operand_2, attributes=attributes)

fn create_symbols(inout self, shapes: List[TensorShape], trainable: Bool = False) -> List[Symbol]:
var symbols = List[Symbol]()
symbols.reserve(len(shapes))

for shape in shapes:
symbols.append(Symbol(self.symbol_count, dtype, shape[], trainable))
self.symbol_count += 1

return symbols

fn add_node(inout self, op: OP, inputs: List[Symbol], outputs: List[Symbol], attributes: AttributeVector):
self.nodes.append(Node(op, inputs, outputs, attributes))

# Dynamic ops
fn concat(inout self, *operands: Symbol, dim: Int = 0) -> Symbol:
# NOTE: Concat could fit into g.op() given a different static_result_shape is called
var attributes = AttributeVector(Attribute("dim", dim))

var res_shape = dynamic_result_shape(OP.CONCAT, operands, attributes)[0]
var res = Symbol(
self.symbol_count, dtype, res_shape, self.result_trainable(operands)
)
self.symbol_count += 1
var res_symbols = self.create_symbols(List[TensorShape](res_shape), self.result_trainable(operands))

var inputs = List[Symbol]()
var operand_list = List[Symbol]()
operand_list.reserve(len(operands))
for operand in operands:
inputs.append(operand)
self.nodes.append(Node(OP.CONCAT, inputs, List[Symbol](res), attributes))
return res
operand_list.append(operand)

self.add_node(OP.CONCAT, operand_list, res_symbols, attributes)
return res_symbols[0]

fn split(
inout self, operand: Symbol, sections: List[Int], dim: Int = 0
) -> List[Symbol]:
var attributes = AttributeVector(
Attribute("sections", TensorShape(sections)), Attribute("dim", dim)
)
var attributes = AttributeVector(Attribute("sections", TensorShape(sections)), Attribute("dim", dim))
var res_shapes = dynamic_result_shape(OP.SPLIT, operand, attributes)
var trainable = self.result_trainable(operand)

var results = List[Symbol]()
for i in range(len(res_shapes)):
var symbol = Symbol(self.symbol_count, dtype, res_shapes[i], trainable)
results.append(symbol)
self.symbol_count += 1

self.nodes.append(Node(OP.SPLIT, List[Symbol](operand), results, attributes))
return results
var result_symbols = self.create_symbols(res_shapes, trainable)
self.add_node(OP.SPLIT, List[Symbol](operand), result_symbols, attributes)
return result_symbols

@staticmethod
fn result_trainable(operands: VariadicList[Symbol]) -> Bool:
Expand Down
21 changes: 15 additions & 6 deletions basalt/autograd/ops/basics.mojo
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from math import add, sub, mul, div, log, exp
from math import log, exp
from algorithm import vectorize
from memory import memcpy
from utils.numerics import isinf

from basalt import Tensor, TensorShape
from basalt.nn.tensor import MAX_RANK
from basalt.utils.tensorutils import *
from basalt.autograd.attributes import Attribute, AttributeVector
from basalt.autograd.ops.matmul import dot, dot_transpose_t1, dot_transpose_t2
from basalt.utils.math_util import add, sub, mul, div


"""
Implement forward and backward operations for basic tensor manipulations.
Expand Down Expand Up @@ -315,28 +319,33 @@ struct POW:
# d(x^y) / dx = y * x^(y-1)
# d(x^y) / dy = sum( x^y * log(x) )
var res_grad: Tensor[dtype]
var a = int(t2[0])
var a = t2[0]

alias epsilon = 1e-12

@parameter
if tensor_id == 0:
res_grad = Tensor[dtype](t1_shape)

@parameter
fn vec_pow_bw_x[nelts: Int](i: Int):
res_grad.store[nelts](
i, a * (t1.load[nelts](i) ** (a - 1)) * ug.load[nelts](i)
)
res_grad.store[nelts](i, a * ((t1.load[nelts](i) + epsilon) ** (a - 1)) * ug.load[nelts](i))

vectorize[vec_pow_bw_x, nelts](t1_shape.num_elements())

else:
# Gradient of the exponent
res_grad = Tensor[dtype](t2_shape) # t2_shape == TensorShape(1)

@parameter
fn vec_pow_bw_y[nelts: Int](i: Int):
# the case when the value passed to log is 0.0
var temp_log = log(t1.load[nelts](i))
var temp_log_is_inf = isinf(temp_log)
temp_log = temp_log_is_inf.select(0, temp_log)
res_grad[0] += (
(t1.load[nelts](i) ** a)
* log(t1.load[nelts](i))
* temp_log
* ug.load[nelts](i)
).reduce_add()

Expand Down
2 changes: 0 additions & 2 deletions basalt/autograd/ops/conv.mojo
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from basalt import Tensor, TensorShape
from basalt.autograd.attributes import AttributeVector
from basalt.utils.tensorutils import dot, dot_transpose_t1, dot_transpose_t2

from algorithm import parallelize, vectorize, tile
from math import divmod
from utils.loop import unroll


Expand Down
8 changes: 4 additions & 4 deletions basalt/autograd/ops/dynamics.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct CONCAT:
fn forward[attributes: AttributeVector](
inputs: List[Symbol],
outputs: List[Symbol],
parameters: Parameters,
inout parameters: Parameters,
):
alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0
var n_chunks = Self.calc_chunks(inputs[0].shape, dim)
Expand All @@ -58,7 +58,7 @@ struct CONCAT:
fn backward[input_id: Int, attributes: AttributeVector](
inputs: List[Symbol],
outputs: List[Symbol],
parameters: Parameters,
inout parameters: Parameters,
) -> Tensor[dtype]:
alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0
var n_chunks = Self.calc_chunks(inputs[0].shape, dim)
Expand Down Expand Up @@ -113,7 +113,7 @@ struct SPLIT:
fn forward[attributes: AttributeVector](
inputs: List[Symbol],
outputs: List[Symbol],
parameters: Parameters,
inout parameters: Parameters,
):
alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0
alias sections = attributes["sections"].value().to_shape()
Expand All @@ -139,7 +139,7 @@ struct SPLIT:
fn backward[input_id: Int, attributes: AttributeVector](
inputs: List[Symbol],
outputs: List[Symbol],
parameters: Parameters,
inout parameters: Parameters,
) -> Tensor[dtype]:
alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0
alias sections = attributes["sections"].value().to_shape()
Expand Down
Loading