diff --git a/README.md b/README.md deleted file mode 100644 index 2611af2..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Final2X - diff --git a/doc b/doc new file mode 100644 index 0000000..3d762ef Binary files /dev/null and b/doc differ diff --git a/doc/Micrograd_泛读报告.docx b/doc/Micrograd_泛读报告.docx deleted file mode 100644 index fa6ff33..0000000 Binary files a/doc/Micrograd_泛读报告.docx and /dev/null differ diff --git a/src/Micrograd_Core_Analysis.py b/src/Micrograd_Core_Analysis.py deleted file mode 100644 index 26a38af..0000000 --- a/src/Micrograd_Core_Analysis.py +++ /dev/null @@ -1,167 +0,0 @@ -import math - -# ============================================================================= -# Micrograd Core Engine -# Author: Andrej Karpathy -# Analysis Copy for Educational Purpose -# ============================================================================= - -class Value: - """ - 核心类:Value - 它封装了一个标量(scalar)值,并跟踪其梯度(gradient)以及生成它的计算历史。 - 这实际上就是 PyTorch 中 Tensor 的微缩版。 - """ - - def __init__(self, data, _children=(), _op=''): - self.data = data # 存储实际的数值 - self.grad = 0 # 存储关于最终损失函数的导数(梯度) - - # --- 自动微分图构建所需的内部变量 --- - # _backward 是一个函数闭包,存储了如何求该节点梯度的逻辑 - self._backward = lambda: None - # _prev 存储了产生当前节点的父节点集合(用于构建计算图 DAG) - self._prev = set(_children) - # _op 记录了生成该节点的操作符(如 +, *, ReLU 等),用于调试可视化 - self._op = _op - - def __add__(self, other): - """实现加法: self + other""" - other = other if isinstance(other, Value) else Value(other) - - # 前向传播 (Forward Pass): 计算结果值 - out = Value(self.data + other.data, (self, other), '+') - - # 反向传播 (Backward Pass): 定义局部梯度计算逻辑 - # 链式法则: out = a + b - # d(out)/da = 1, d(out)/db = 1 - # 所以局部梯度直接传递给父节点 - def _backward(): - self.grad += 1.0 * out.grad - other.grad += 1.0 * out.grad - out._backward = _backward - - return out - - def __mul__(self, other): - """实现乘法: self * other""" - other = other if isinstance(other, Value) else Value(other) - - # 前向传播 - out = Value(self.data * other.data, (self, other), '*') - - # 反向传播 - # 链式法则: out = a * b - # d(out)/da = b - # d(out)/db = a - def _backward(): - self.grad += other.data * out.grad - other.grad += self.data * out.grad - out._backward = _backward - - return out - - def __pow__(self, other): - """实现幂运算: self ** other""" - assert isinstance(other, (int, float)), "only supporting int/float powers for now" - out = Value(self.data**other, (self,), f'**{other}') - - # 链式法则: out = a^b - # d(out)/da = b * a^(b-1) - def _backward(): - self.grad += (other * self.data**(other-1)) * out.grad - out._backward = _backward - - return out - - def relu(self): - """实现激活函数 ReLU: max(0, x)""" - out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU') - - # 链式法则: - # 如果 x > 0, 导数为 1; 否则为 0 - def _backward(): - self.grad += (out.data > 0) * out.grad - out._backward = _backward - - return out - - def backward(self): - """ - 核心调度函数:执行反向传播 - 从当前节点(通常是 Loss)开始,自动计算所有前驱节点的梯度 - """ - - # 1. 拓扑排序 (Topological Sort) - # 必须确保在计算节点 v 的梯度前,所有依赖 v 的节点的梯度都已经计算完毕 - topo = [] - visited = set() - def build_topo(v): - if v not in visited: - visited.add(v) - for child in v._prev: - build_topo(child) - topo.append(v) - build_topo(self) - - # 2. 按照拓扑序的逆序应用链式法则 - self.grad = 1 # 最终输出对自己的导数永远是 1 - for v in reversed(topo): - v._backward() - - # --- 以下是辅助运算符重载,方便像数学公式一样写代码 --- - - def __neg__(self): # -self - return self * -1 - - def __radd__(self, other): # other + self - return self + other - - def __sub__(self, other): # self - other - return self + (-other) - - def __rsub__(self, other): # other - self - return other + (-self) - - def __rmul__(self, other): # other * self - return self * other - - def __truediv__(self, other): # self / other - return self * other**-1 - - def __rtruediv__(self, other): # other / self - return other * self**-1 - - def __repr__(self): - return f"Value(data={self.data}, grad={self.grad})" - -# ============================================================================= -# 使用示例 -# ============================================================================= -if __name__ == '__main__': - # 定义简单的计算图 - # inputs - x1 = Value(2.0, _op='x1') - x2 = Value(0.0, _op='x2') - # weights - w1 = Value(-3.0, _op='w1') - w2 = Value(1.0, _op='w2') - # bias - b = Value(6.8813735870195432, _op='b') - - # x1*w1 + x2*w2 + b - x1w1 = x1*w1; x1w1._op = 'x1*w1' - x2w2 = x2*w2; x2w2._op = 'x2*w2' - x1w1x2w2 = x1w1 + x2w2; x1w1x2w2._op = 'x1*w1 + x2*w2' - n = x1w1x2w2 + b; n._op = 'n' - - # activation function - o = n.relu(); o._op = 'o' - - # backward pass - o.backward() - - print("Execution Graph Result:") - print(f"Output (o): {o.data}") - print(f"Gradient w1: {w1.grad}") - print(f"Gradient x1: {x1.grad}") diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..101a66c --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,28 @@ +from .datasets import datasets # noqa: F401,F403 +from .testing import MathTest, MathTestVariable # noqa: F401,F403 + +# MODULE 0 +from .module import * # noqa: F401,F403 + + +# MODULE 1 +from .scalar import * # noqa: F401,F403 +from .autodiff import * # noqa: F401,F403 + +# MODULE 2 +from .tensor import * # noqa: F401,F403 +from .tensor_data import * # noqa: F401,F403 +from .tensor_functions import * # noqa: F401,F403 +from .tensor_ops import * # noqa: F401,F403 + +# MODULE 3 +from .fast_ops import * # noqa: F401,F403 +from .cuda_ops import * # noqa: F401,F403 + +# MODULE 4 +from .nn import * # noqa: F401,F403 +from .fast_conv import * # noqa: F401,F403 + +from .optim import * # noqa: F401,F403 + +version = "0.2" diff --git a/src/autodiff.py b/src/autodiff.py new file mode 100644 index 0000000..370f9ba --- /dev/null +++ b/src/autodiff.py @@ -0,0 +1,346 @@ +from collections import defaultdict + +variable_count = 1 + + +# ## Module 1 + +# Variable is the main class for autodifferentiation logic for scalars +# and tensors. + + +class Variable: + """ + Attributes: + history (:class:`History` or None) : the Function calls that created this variable or None if constant + derivative (variable type): the derivative with respect to this variable + grad (variable type) : alias for derivative, used for tensors + name (string) : a globally unique name of the variable + """ + + def __init__(self, history, name=None): + global variable_count + assert history is None or isinstance(history, History), history + + self.history = history + self._derivative = None + + # This is a bit simplistic, but make things easier. + variable_count += 1 + self.unique_id = "Variable" + str(variable_count) + + # For debugging can have a name. + if name is not None: + self.name = name + else: + self.name = self.unique_id + self.used = 0 + + def requires_grad_(self, val): + """ + Set the requires_grad flag to `val` on variable. + + Ensures that operations on this variable will trigger + backpropagation. + + Args: + val (bool): whether to require grad + """ + self.history = History() + + def backward(self, d_output=None): + """ + Calls autodiff to fill in the derivatives for the history of this object. + + Args: + d_output (number, opt): starting derivative to backpropagate through the model + (typically left out, and assumed to be 1.0). + """ + if d_output is None: + d_output = 1.0 + backpropagate(self, d_output) + + @property + def derivative(self): + return self._derivative + + def is_leaf(self): + "True if this variable created by the user (no `last_fn`)" + return self.history.last_fn is None + + def accumulate_derivative(self, val): + """ + Add `val` to the the derivative accumulated on this variable. + Should only be called during autodifferentiation on leaf variables. + + Args: + val (number): value to be accumulated + """ + assert self.is_leaf(), "Only leaf variables can have derivatives." + if self._derivative is None: + self._derivative = self.zeros() + self._derivative += val + + def zero_derivative_(self): # pragma: no cover + """ + Reset the derivative on this variable. + """ + self._derivative = self.zeros() + + def zero_grad_(self): # pragma: no cover + """ + Reset the derivative on this variable. + """ + self.zero_derivative_() + + def expand(self, x): + "Placeholder for tensor variables" + return x + + # Helper functions for children classes. + + def __radd__(self, b): + return self + b + + def __rmul__(self, b): + return self * b + + def zeros(self): + return 0.0 + + +# Some helper functions for handling optional tuples. + + +def wrap_tuple(x): + "Turn a possible value into a tuple" + if isinstance(x, tuple): + return x + return (x,) + + +def unwrap_tuple(x): + "Turn a singleton tuple into a value" + if len(x) == 1: + return x[0] + return x + + +# Classes for Functions. + + +class Context: + """ + Context class is used by `Function` to store information during the forward pass. + + Attributes: + no_grad (bool) : do not save gradient information + saved_values (tuple) : tuple of values saved for backward pass + saved_tensors (tuple) : alias for saved_values + """ + + def __init__(self, no_grad=False): + self._saved_values = None + self.no_grad = no_grad + + def save_for_backward(self, *values): + """ + Store the given `values` if they need to be used during backpropagation. + + Args: + values (list of values) : values to save for backward + """ + if self.no_grad: + return + self._saved_values = values + + @property + def saved_values(self): + assert not self.no_grad, "Doesn't require grad" + assert self._saved_values is not None, "Did you forget to save values?" + return unwrap_tuple(self._saved_values) + + @property + def saved_tensors(self): # pragma: no cover + return self.saved_values + + +class History: + """ + `History` stores the history of `Function` operations that was + used to construct the current Variable. + + Attributes: + last_fn (:class:`FunctionBase`) : The last Function that was called. + ctx (:class:`Context`): The context for that Function. + inputs (list of inputs) : The inputs that were given when `last_fn.forward` was called. + + """ + + def __init__(self, last_fn=None, ctx=None, inputs=None): + self.last_fn = last_fn + self.ctx = ctx + self.inputs = inputs + + def backprop_step(self, d_output): + """ + Run one step of backpropagation by calling chain rule. + + Args: + d_output : a derivative with respect to this variable + + Returns: + list of numbers : a derivative with respect to `inputs` + """ + + return self.last_fn.chain_rule(self.ctx, self.inputs, d_output) + + +class FunctionBase: + """ + A function that can act on :class:`Variable` arguments to + produce a :class:`Variable` output, while tracking the internal history. + + Call by :func:`FunctionBase.apply`. + + """ + + @staticmethod + def variable(raw, history): + # Implement by children class. + raise NotImplementedError() + + @classmethod + def apply(cls, *vals): + """ + Apply is called by the user to run the Function. + Internally it does three things: + + a) Creates a Context for the function call. + b) Calls forward to run the function. + c) Attaches the Context to the History of the new variable. + + There is a bit of internal complexity in our implementation + to handle both scalars and tensors. + + Args: + vals (list of Variables or constants) : The arguments to forward + + Returns: + `Variable` : The new variable produced + + """ + # Go through the variables to see if any needs grad. + raw_vals = [] + need_grad = False + for v in vals: + if isinstance(v, Variable): + if v.history is not None: + need_grad = True + v.used += 1 + raw_vals.append(v.get_data()) + else: + raw_vals.append(v) + + # Create the context. + ctx = Context(not need_grad) + + # Call forward with the variables. + c = cls.forward(ctx, *raw_vals) + assert isinstance(c, cls.data_type), "Expected return typ %s got %s" % ( + cls.data_type, + type(c), + ) + + # Create a new variable from the result with a new history. + back = None + if need_grad: + back = History(cls, ctx, vals) + return cls.variable(cls.data(c), back) + + @classmethod + def chain_rule(cls, ctx, inputs, d_output): + """ + Implement the derivative chain-rule. + + Args: + ctx (:class:`Context`) : The context from running forward + inputs (list of args) : The args that were passed to :func:`FunctionBase.apply` (e.g. :math:`x, y`) + d_output (number) : The `d_output` value in the chain rule. + + Returns: + list of (`Variable`, number) : A list of non-constant variables with their derivatives + (see `is_constant` to remove unneeded variables) + + """ + # Tip: Note when implementing this function that + # cls.backward may return either a value or a tuple. + + d = cls.backward(ctx, d_output) + for i, input in enumerate(inputs): + if not is_constant(input): + yield (input, d[i] if isinstance(d, tuple) else d) + + +# Algorithms for backpropagation + + +def is_constant(val): + return not isinstance(val, Variable) or val.history is None + + +def topological_sort(variable): + """ + Computes the topological order of the computation graph. + + Args: + variable (:class:`Variable`): The right-most variable + + Returns: + list of Variables : Non-constant Variables in topological order + starting from the right. + """ + + sorted = [] + visited = set() + + def visit(var): + if var.unique_id in visited: + return + if not var.is_leaf(): + for input in var.history.inputs: + if not is_constant(input): + visit(input) + visited.add(var.unique_id) + sorted.insert(0, var) + + visit(variable) + return sorted + + +def backpropagate(variable, deriv): + """ + Runs backpropagation on the computation graph in order to + compute derivatives for the leave nodes. + + See :doc:`backpropagate` for details on the algorithm. + + Args: + variable (:class:`Variable`): The right-most variable + deriv (number) : Its derivative that we want to propagate backward to the leaves. + + No return. Should write to its results to the derivative values of each leaf through `accumulate_derivative`. + """ + + sorted = topological_sort(variable) + + d_dict = defaultdict(float) + d_dict[variable.unique_id] = deriv + for var in sorted: + d = d_dict[var.unique_id] + if not var.is_leaf(): + for v, d_part in var.history.backprop_step(d): + d_dict[v.unique_id] += d_part + else: + var.accumulate_derivative(d) + diff --git a/src/cuda_conv.py b/src/cuda_conv.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cuda_ops.py b/src/cuda_ops.py new file mode 100644 index 0000000..7022b93 --- /dev/null +++ b/src/cuda_ops.py @@ -0,0 +1,512 @@ +# Credit (not tested): https://github.com/ethanglaser/minitorch/blob/8bfdac956d/module4/minitorch/cuda_ops.py + +from numba import cuda +import numba +from .tensor_data import ( + to_index, + index_to_position, + TensorData, + broadcast_index, + shape_broadcast, + MAX_DIMS, +) + +# This code will CUDA compile fast versions your tensor_data functions. +# If you get an error, read the docs for NUMBA as to what is allowed +# in these functions. + +to_index = cuda.jit(device=True)(to_index) +index_to_position = cuda.jit(device=True)(index_to_position) +broadcast_index = cuda.jit(device=True)(broadcast_index) + +THREADS_PER_BLOCK = 32 + + +def tensor_map(fn): + """ + CUDA higher-order tensor map function. :: + + fn_map = tensor_map(fn) + fn_map(out, ... ) + + Args: + fn: function mappings floats-to-floats to apply. + out (array): storage for out tensor. + out_shape (array): shape for out tensor. + out_strides (array): strides for out tensor. + out_size (array): size for out tensor. + in_storage (array): storage for in tensor. + in_shape (array): shape for in tensor. + in_strides (array): strides for in tensor. + + Returns: + None : Fills in `out` + """ + + def _map(out, out_shape, out_strides, out_size, in_storage, in_shape, in_strides): + out_i = numba.cuda.blockIdx.x * THREADS_PER_BLOCK + numba.cuda.threadIdx.x + if out_i < out.size: + out_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + in_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + to_index(out_i, out_shape, out_index) + broadcast_index(out_index, out_shape, in_shape, in_index) + in_position = index_to_position(in_index, in_strides) + out_position = index_to_position(out_index, out_strides) + out[out_position] = fn(in_storage[in_position]) + + return cuda.jit()(_map) + + +def map(fn): + # CUDA compile your kernel + f = tensor_map(cuda.jit(device=True)(fn)) + + def ret(a, out=None): + if out is None: + out = a.zeros(a.shape) + + # Instantiate and run the cuda kernel. + threadsperblock = THREADS_PER_BLOCK + blockspergrid = (out.size + THREADS_PER_BLOCK - 1) // THREADS_PER_BLOCK + f[blockspergrid, threadsperblock](*out.tuple(), out.size, *a.tuple()) + return out + + return ret + + +def tensor_zip(fn): + """ + CUDA higher-order tensor zipWith (or map2) function :: + + fn_zip = tensor_zip(fn) + fn_zip(out, ...) + + Args: + fn: function mappings two floats to float to apply. + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + out_size (array): size for `out` tensor. + a_storage (array): storage for `a` tensor. + a_shape (array): shape for `a` tensor. + a_strides (array): strides for `a` tensor. + b_storage (array): storage for `b` tensor. + b_shape (array): shape for `b` tensor. + b_strides (array): strides for `b` tensor. + + Returns: + None : Fills in `out` + """ + + def _zip( + out, + out_shape, + out_strides, + out_size, + a_storage, + a_shape, + a_strides, + b_storage, + b_shape, + b_strides, + ): + + out_i = numba.cuda.blockIdx.x * THREADS_PER_BLOCK + numba.cuda.threadIdx.x + if out_i < out.size: + out_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + a_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + b_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + + to_index(out_i, out_shape, out_index) + broadcast_index(out_index, out_shape, a_shape, a_index) + broadcast_index(out_index, out_shape, b_shape, b_index) + a_position, b_position = ( + index_to_position(a_index, a_strides), + index_to_position(b_index, b_strides), + ) + out_position = index_to_position(out_index, out_strides) + out[out_position] = fn(a_storage[a_position], b_storage[b_position]) + + return cuda.jit()(_zip) + + +def zip(fn): + f = tensor_zip(cuda.jit(device=True)(fn)) + + def ret(a, b): + c_shape = shape_broadcast(a.shape, b.shape) + out = a.zeros(c_shape) + threadsperblock = THREADS_PER_BLOCK + blockspergrid = (out.size + (threadsperblock - 1)) // threadsperblock + f[blockspergrid, threadsperblock]( + *out.tuple(), out.size, *a.tuple(), *b.tuple() + ) + return out + + return ret + + +def _sum_practice(out, a, size): + """ + This is a practice sum kernel to prepare for reduce. + + Given an array of length :math:`n` and out of size :math:`n // blockDIM` + it should sum up each blockDim values into an out cell. + + [a_1, a_2, ..., a_100] + + | + + [a_1 +...+ a_32, a_32 + ... + a_64, ... ,] + + Note: Each block must do the sum using shared memory! + + Args: + out (array): storage for `out` tensor. + a (array): storage for `a` tensor. + size (int): length of a. + + """ + BLOCK_DIM = 32 + local_idx = numba.cuda.threadIdx.x + block_idx = numba.cuda.blockIdx.x + shared_block = numba.cuda.shared.array(BLOCK_DIM, numba.float64) + offset = 1 + + if block_idx * THREADS_PER_BLOCK + local_idx < size: + shared_block[local_idx] = a[block_idx * THREADS_PER_BLOCK + local_idx] + else: + shared_block[local_idx] = 0 + + while offset < BLOCK_DIM: + numba.cuda.syncthreads() + if local_idx % (offset * 2) == 0: + shared_block[local_idx] += shared_block[local_idx + offset] + offset *= 2 + + out[block_idx] = shared_block[0] + + +jit_sum_practice = cuda.jit()(_sum_practice) + + +def sum_practice(a): + (size,) = a.shape + threadsperblock = THREADS_PER_BLOCK + blockspergrid = (size // THREADS_PER_BLOCK) + 1 + out = TensorData([0.0 for i in range(2)], (2,)) + out.to_cuda_() + jit_sum_practice[blockspergrid, threadsperblock]( + out.tuple()[0], a._tensor._storage, size + ) + return out + + +def tensor_reduce(fn): + """ + CUDA higher-order tensor reduce function. + + Args: + fn: reduction function maps two floats to float. + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + out_size (array): size for `out` tensor. + a_storage (array): storage for `a` tensor. + a_shape (array): shape for `a` tensor. + a_strides (array): strides for `a` tensor. + reduce_dim (int): dimension to reduce out + + Returns: + None : Fills in `out` + """ + + def _reduce( + out, + out_shape, + out_strides, + out_size, + a_storage, + a_shape, + a_strides, + reduce_dim, + reduce_value, + ): + BLOCK_DIM = 1024 + reduce_size = a_shape[reduce_dim] + local_idx = numba.cuda.threadIdx.x + block_idx = numba.cuda.blockIdx.x + shared_block = numba.cuda.shared.array(BLOCK_DIM, numba.float64) + offset = 1 + + out_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + to_index(block_idx, out_shape, out_index) + out_position = index_to_position(out_index, out_strides) + + if local_idx < reduce_size: + out_index[reduce_dim] = local_idx + shared_block[local_idx] = a_storage[index_to_position(out_index, a_strides)] + else: + shared_block[local_idx] = reduce_value + + while offset < BLOCK_DIM: + numba.cuda.syncthreads() + if local_idx % (offset * 2) == 0: + shared_block[local_idx] = fn( + shared_block[local_idx], shared_block[local_idx + offset] + ) + offset *= 2 + + numba.cuda.syncthreads() + if local_idx == 0: + out[out_position] = shared_block[local_idx] + + return cuda.jit()(_reduce) + + +def reduce(fn, start=0.0): + """ + Higher-order tensor reduce function. :: + + fn_reduce = reduce(fn) + out = fn_reduce(a, dim) + + Simple version :: + + for j: + out[1, j] = start + for i: + out[1, j] = fn(out[1, j], a[i, j]) + + + Args: + fn: function from two floats-to-float to apply + a (:class:`Tensor`): tensor to reduce over + dim (int): int of dim to reduce + + Returns: + :class:`Tensor` : new tensor + """ + f = tensor_reduce(cuda.jit(device=True)(fn)) + + def ret(a, dim): + out_shape = list(a.shape) + out_shape[dim] = (a.shape[dim] - 1) // 1024 + 1 + out_a = a.zeros(tuple(out_shape)) + + threadsperblock = 1024 + blockspergrid = out_a.size + f[blockspergrid, threadsperblock]( + *out_a.tuple(), out_a.size, *a.tuple(), dim, start + ) + + return out_a + + return ret + + +def _mm_practice(out, a, b, size): + """ + This is a practice square MM kernel to prepare for matmul. + + Given a storage `out` and two storage `a` and `b`. Where we know + both are shape [size, size] with strides [size, 1]. + + Size is always < 32. + + Requirements: + + * All data must be first moved to shared memory. + * Only read each cell in `a` and `b` once. + * Only write to global memory once per kernel. + + Compute :: + + for i: + for j: + for k: + out[i, j] += a[i, k] * b[k, j] + + Args: + out (array): storage for `out` tensor. + a (array): storage for `a` tensor. + b (array): storage for `a` tensor. + size (int): size of the square + + """ + BLOCK_DIM = 32 + shared_a = numba.cuda.shared.array((BLOCK_DIM, BLOCK_DIM), numba.float64) + shared_b = numba.cuda.shared.array((BLOCK_DIM, BLOCK_DIM), numba.float64) + + y = numba.cuda.threadIdx.y + x = numba.cuda.threadIdx.x + if x < size and y < size: + shared_a[y, x] = a[y * size + x] + shared_b[y, x] = b[y * size + x] + else: + shared_a[y, x] = 0 + shared_b[y, x] = 0 + numba.cuda.syncthreads() + + if y < size and x < size: + temp = 0 + for val in range(size): + temp += shared_a[y, val] * shared_b[val, x] + out[y * size + x] = temp + + +jit_mm_practice = cuda.jit()(_mm_practice) + + +def mm_practice(a, b): + + (size, _) = a.shape + threadsperblock = (THREADS_PER_BLOCK, THREADS_PER_BLOCK) + blockspergrid = 1 + out = TensorData([0.0 for i in range(size * size)], (size, size)) + out.to_cuda_() + jit_mm_practice[blockspergrid, threadsperblock]( + out.tuple()[0], a._tensor._storage, b._tensor._storage, size + ) + return out + + +@cuda.jit() +def tensor_matrix_multiply( + out, + out_shape, + out_strides, + out_size, + a_storage, + a_shape, + a_strides, + b_storage, + b_shape, + b_strides, +): + """ + CUDA tensor matrix multiply function. + + Requirements: + + * All data must be first moved to shared memory. + * Only read each cell in `a` and `b` once. + * Only write to global memory once per kernel. + + Should work for any tensor shapes that broadcast as long as :: + + assert a_shape[-1] == b_shape[-2] + + Args: + out (array): storage for `out` tensor + out_shape (array): shape for `out` tensor + out_strides (array): strides for `out` tensor + out_size (array): size for `out` tensor. + a_storage (array): storage for `a` tensor + a_shape (array): shape for `a` tensor + a_strides (array): strides for `a` tensor + b_storage (array): storage for `b` tensor + b_shape (array): shape for `b` tensor + b_strides (array): strides for `b` tensor + + Returns: + None : Fills in `out` + """ + a_batch_stride = a_strides[0] if a_shape[0] > 1 else 0 + b_batch_stride = b_strides[0] if b_shape[0] > 1 else 0 + BLOCK_DIM = 32 + shared_a = numba.cuda.shared.array((BLOCK_DIM, BLOCK_DIM), numba.float64) + shared_b = numba.cuda.shared.array((BLOCK_DIM, BLOCK_DIM), numba.float64) + + y = numba.cuda.threadIdx.y + x = numba.cuda.threadIdx.x + block_x = numba.cuda.blockIdx.x * BLOCK_DIM + block_y = numba.cuda.blockIdx.y * BLOCK_DIM + z = numba.cuda.blockIdx.z + + temp = 0 + for block_index in range((a_shape[-1] + (BLOCK_DIM - 1)) // BLOCK_DIM): + block_mid = block_index * BLOCK_DIM + if (block_mid + x) < a_shape[-1] and (block_y + y) < a_shape[-2]: + shared_a[y, x] = a_storage[ + z * a_batch_stride + + (block_mid + x) * a_strides[-1] + + (block_y + y) * a_strides[-2] + ] + else: + shared_a[y, x] = 0 + if (block_x + x) < b_shape[-1] and (block_mid + y) < b_shape[-2]: + shared_b[y, x] = b_storage[ + z * b_batch_stride + + (block_x + x) * b_strides[-1] + + (block_mid + y) * b_strides[-2] + ] + else: + shared_b[y, x] = 0 + numba.cuda.syncthreads() + + for val in range(BLOCK_DIM): + temp += shared_a[y, val] * shared_b[val, x] + + if (block_y + y) < out_shape[-2] and (block_x + x) < out_shape[-1]: + out[ + z * out_strides[0] + + (block_y + y) * out_strides[-2] + + (block_x + x) * out_strides[-1] + ] = temp + + +def matrix_multiply(a, b): + """ + Tensor matrix multiply + Should work for any tensor shapes that broadcast in the first n-2 dims and + have :: + assert a.shape[-1] == b.shape[-2] + + Args: + a (:class:`Tensor`): tensor a + b (:class:`Tensor`): tensor b + + Returns: + :class:`Tensor` : new tensor + """ + + # Make these always be a 3 dimensional multiply + both_2d = 0 + if len(a.shape) == 2: + a = a.contiguous().view(1, a.shape[0], a.shape[1]) + both_2d += 1 + if len(b.shape) == 2: + b = b.contiguous().view(1, b.shape[0], b.shape[1]) + both_2d += 1 + both_2d = both_2d == 2 + + ls = list(shape_broadcast(a.shape[:-2], b.shape[:-2])) + ls.append(a.shape[-2]) + ls.append(b.shape[-1]) + assert a.shape[-1] == b.shape[-2] + out = a.zeros(tuple(ls)) + + # One block per batch, extra rows, extra col + blockspergrid = ( + (out.shape[1] + (THREADS_PER_BLOCK - 1)) // THREADS_PER_BLOCK, + (out.shape[2] + (THREADS_PER_BLOCK - 1)) // THREADS_PER_BLOCK, + out.shape[0], + ) + threadsperblock = (THREADS_PER_BLOCK, THREADS_PER_BLOCK, 1) + + tensor_matrix_multiply[blockspergrid, threadsperblock]( + *out.tuple(), out.size, *a.tuple(), *b.tuple() + ) + + # Undo 3d if we added it. + if both_2d: + out = out.view(out.shape[1], out.shape[2]) + return out + + +class CudaOps: + map = map + zip = zip + reduce = reduce + matrix_multiply = matrix_multiply \ No newline at end of file diff --git a/src/datasets.py b/src/datasets.py new file mode 100644 index 0000000..0a2f9fd --- /dev/null +++ b/src/datasets.py @@ -0,0 +1,94 @@ +from dataclasses import dataclass +import random +import math + + +def make_pts(N): + X = [] + for i in range(N): + x_1 = random.random() + x_2 = random.random() + X.append((x_1, x_2)) + return X + + +@dataclass +class Graph: + N: int + X: list + y: list + + +def simple(N): + X = make_pts(N) + y = [] + for x_1, x_2 in X: + y1 = 1 if x_1 < 0.5 else 0 + y.append(y1) + return Graph(N, X, y) + + +def diag(N): + X = make_pts(N) + y = [] + for x_1, x_2 in X: + y1 = 1 if x_1 + x_2 < 0.5 else 0 + y.append(y1) + return Graph(N, X, y) + + +def split(N): + X = make_pts(N) + y = [] + for x_1, x_2 in X: + y1 = 1 if x_1 < 0.2 or x_1 > 0.8 else 0 + y.append(y1) + return Graph(N, X, y) + + +def xor(N): + X = make_pts(N) + y = [] + for x_1, x_2 in X: + y1 = 1 if ((x_1 < 0.5 and x_2 > 0.5) or (x_1 > 0.5 and x_2 < 0.5)) else 0 + y.append(y1) + return Graph(N, X, y) + + +def circle(N): + X = make_pts(N) + y = [] + for x_1, x_2 in X: + x1, x2 = (x_1 - 0.5, x_2 - 0.5) + y1 = 1 if x1 * x1 + x2 * x2 > 0.1 else 0 + y.append(y1) + return Graph(N, X, y) + + +def spiral(N): + def x(t): + return t * math.cos(t) / 20.0 + + def y(t): + return t * math.sin(t) / 20.0 + + X = [ + (x(10.0 * (float(i) / (N // 2))) + 0.5, y(10.0 * (float(i) / (N // 2))) + 0.5) + for i in range(5 + 0, 5 + N // 2) + ] + X = X + [ + (y(-10.0 * (float(i) / (N // 2))) + 0.5, x(-10.0 * (float(i) / (N // 2))) + 0.5) + for i in range(5 + 0, 5 + N // 2) + ] + y = [0] * (N // 2) + [1] * (N // 2) + return Graph(N, X, y) + + +datasets = { + "Simple": simple, + "Diag": diag, + "Split": split, + "Xor": xor, + "Circle": circle, + "Spiral": spiral, +} diff --git a/src/fast_conv.py b/src/fast_conv.py new file mode 100644 index 0000000..a012b26 --- /dev/null +++ b/src/fast_conv.py @@ -0,0 +1,333 @@ +import numpy as np +from .tensor_data import ( + to_index, + index_to_position, + broadcast_index, + MAX_DIMS, +) +from .tensor_functions import Function +from numba import njit, prange + + +# This code will JIT compile fast versions your tensor_data functions. +# If you get an error, read the docs for NUMBA as to what is allowed +# in these functions. +to_index = njit(inline="always")(to_index) +index_to_position = njit(inline="always")(index_to_position) +broadcast_index = njit(inline="always")(broadcast_index) + + +@njit(parallel=True) +def tensor_conv1d( + out, + out_shape, + out_strides, + out_size, + input, + input_shape, + input_strides, + weight, + weight_shape, + weight_strides, + reverse, +): + """ + 1D Convolution implementation. + + Given input tensor of + + `batch, in_channels, width` + + and weight tensor + + `out_channels, in_channels, k_width` + + Computes padded output of + + `batch, out_channels, width` + + `reverse` decides if weight is anchored left (False) or right. + (See diagrams) + + Args: + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + out_size (int): size of the `out` tensor. + input (array): storage for `input` tensor. + input_shape (array): shape for `input` tensor. + input_strides (array): strides for `input` tensor. + weight (array): storage for `input` tensor. + weight_shape (array): shape for `input` tensor. + weight_strides (array): strides for `input` tensor. + reverse (bool): anchor weight at left or right + """ + batch_, out_channels, out_width = out_shape + batch, in_channels, width = input_shape + out_channels_, in_channels_, kw = weight_shape + + assert ( + batch == batch_ + and in_channels == in_channels_ + and out_channels == out_channels_ + ) + s1 = input_strides + s2 = weight_strides + for out_i in prange(out_size): + out_index = np.zeros(3, dtype=np.int32) + to_index(out_i, out_shape, out_index) + current_batch, current_out_channels, current_width = out_index + val = 0 + for current_in_channels in prange(in_channels): + for current_kw in range(kw): + i = current_kw + if reverse: + i = kw - current_kw - 1 + w = weight[ + s2[0] * current_out_channels + + s2[1] * current_in_channels + + s2[2] * i + ] + inc = 0 + if reverse: + if current_width - i >= 0: + inc = input[ + current_batch * s1[0] + + current_in_channels * s1[1] + + (current_width - i) * s1[2] + ] + else: + if i + current_width < width: + inc = input[ + current_batch * s1[0] + + current_in_channels * s1[1] + + (i + current_width) * s1[2] + ] + val += w * inc + out[ + current_batch * out_strides[0] + + current_out_channels * out_strides[1] + + current_width * out_strides[2] + ] = val + + +class Conv1dFun(Function): + @staticmethod + def forward(ctx, input, weight): + """ + Compute a 1D Convolution + + Args: + ctx : Context + input (:class:`Tensor`) : batch x in_channel x h x w + weight (:class:`Tensor`) : out_channel x in_channel x kh x kw + + Returns: + (:class:`Tensor`) : batch x out_channel x h x w + """ + ctx.save_for_backward(input, weight) + batch, in_channels, w = input.shape + out_channels, in_channels2, kw = weight.shape + assert in_channels == in_channels2 + + # Run convolution + output = input.zeros((batch, out_channels, w)) + tensor_conv1d( + *output.tuple(), output.size, *input.tuple(), *weight.tuple(), False + ) + return output + + @staticmethod + def backward(ctx, grad_output): + input, weight = ctx.saved_values + batch, in_channels, w = input.shape + out_channels, in_channels, kw = weight.shape + grad_weight = grad_output.zeros((in_channels, out_channels, kw)) + new_input = input.permute(1, 0, 2) + new_grad_output = grad_output.permute(1, 0, 2) + tensor_conv1d( + *grad_weight.tuple(), + grad_weight.size, + *new_input.tuple(), + *new_grad_output.tuple(), + False, + ) + grad_weight = grad_weight.permute(1, 0, 2) + + grad_input = input.zeros((batch, in_channels, w)) + new_weight = weight.permute(1, 0, 2) + tensor_conv1d( + *grad_input.tuple(), + grad_input.size, + *grad_output.tuple(), + *new_weight.tuple(), + True, + ) + return grad_input, grad_weight + + +conv1d = Conv1dFun.apply + + +@njit(parallel=True, fastmath=True) +def tensor_conv2d( + out, + out_shape, + out_strides, + out_size, + input, + input_shape, + input_strides, + weight, + weight_shape, + weight_strides, + reverse, +): + """ + 2D Convolution implementation. + + Given input tensor of + + `batch, in_channels, height, width` + + and weight tensor + + `out_channels, in_channels, k_height, k_width` + + Computes padded output of + + `batch, out_channels, height, width` + + `Reverse` decides if weight is anchored top-left (False) or bottom-right. + (See diagrams) + + + Args: + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + out_size (int): size of the `out` tensor. + input (array): storage for `input` tensor. + input_shape (array): shape for `input` tensor. + input_strides (array): strides for `input` tensor. + weight (array): storage for `input` tensor. + weight_shape (array): shape for `input` tensor. + weight_strides (array): strides for `input` tensor. + reverse (bool): anchor weight at top-left or bottom-right + """ + batch_, out_channels, _, _ = out_shape + batch, in_channels, height, width = input_shape + out_channels_, in_channels_, kh, kw = weight_shape + + assert ( + batch == batch_ + and in_channels == in_channels_ + and out_channels == out_channels_ + ) + + s1 = input_strides + s2 = weight_strides + # inners + s10, s11, s12, s13 = s1[0], s1[1], s1[2], s1[3] + s20, s21, s22, s23 = s2[0], s2[1], s2[2], s2[3] + + for out_i in prange(out_size): + out_index = np.zeros(4, dtype=np.int32) + to_index(out_i, out_shape, out_index) + current_batch, current_out_channels, current_height, current_width = out_index + val = 0 + for current_in_channels in prange(in_channels): + for current_kh in range(kh): + for current_kw in range(kw): + i = current_kh + j = current_kw + if reverse: + j = kw - current_kw - 1 + i = kh - current_kh - 1 + w = weight[ + s20 * current_out_channels + + s21 * current_in_channels + + s22 * i + + s23 * j + ] + inc = 0 + if reverse: + if current_height - i >= 0 and current_width - j >= 0: + inc = input[ + current_batch * s10 + + current_in_channels * s11 + + (current_height - i) * s12 + + (current_width - j) * s13 + ] + else: + if i + current_height < height and j + current_width < width: + inc = input[ + current_batch * s10 + + current_in_channels * s11 + + (i + current_height) * s12 + + (j + current_width) * s13 + ] + val += w * inc + out[ + current_batch * out_strides[0] + + current_out_channels * out_strides[1] + + current_height * out_strides[2] + + current_width * out_strides[3] + ] = val + + +class Conv2dFun(Function): + @staticmethod + def forward(ctx, input, weight): + """ + Compute a 2D Convolution + + Args: + ctx : Context + input (:class:`Tensor`) : batch x in_channel x h x w + weight (:class:`Tensor`) : out_channel x in_channel x kh x kw + + Returns: + (:class:`Tensor`) : batch x out_channel x h x w + """ + ctx.save_for_backward(input, weight) + batch, in_channels, h, w = input.shape + out_channels, in_channels2, kh, kw = weight.shape + assert in_channels == in_channels2 + output = input.zeros((batch, out_channels, h, w)) + tensor_conv2d( + *output.tuple(), output.size, *input.tuple(), *weight.tuple(), False + ) + return output + + @staticmethod + def backward(ctx, grad_output): + input, weight = ctx.saved_values + batch, in_channels, h, w = input.shape + out_channels, in_channels, kh, kw = weight.shape + + grad_weight = grad_output.zeros((in_channels, out_channels, kh, kw)) + new_input = input.permute(1, 0, 2, 3) + new_grad_output = grad_output.permute(1, 0, 2, 3) + tensor_conv2d( + *grad_weight.tuple(), + grad_weight.size, + *new_input.tuple(), + *new_grad_output.tuple(), + False, + ) + grad_weight = grad_weight.permute(1, 0, 2, 3) + + grad_input = input.zeros((batch, in_channels, h, w)) + new_weight = weight.permute(1, 0, 2, 3) + tensor_conv2d( + *grad_input.tuple(), + grad_input.size, + *grad_output.tuple(), + *new_weight.tuple(), + True, + ) + return grad_input, grad_weight + + +conv2d = Conv2dFun.apply diff --git a/src/fast_ops.py b/src/fast_ops.py new file mode 100644 index 0000000..3576611 --- /dev/null +++ b/src/fast_ops.py @@ -0,0 +1,364 @@ +import numpy as np +from numba import njit, prange + +from .tensor_data import (MAX_DIMS, broadcast_index, index_to_position, + shape_broadcast, to_index) + +# TIP: Use `NUMBA_DISABLE_JIT=1 pytest tests/ -m task3_1` to run these tests without JIT. + +# This code will JIT compile fast versions your tensor_data functions. +# If you get an error, read the docs for NUMBA as to what is allowed +# in these functions. +to_index = njit(inline="always")(to_index) +index_to_position = njit(inline="always")(index_to_position) +broadcast_index = njit(inline="always")(broadcast_index) + + +def tensor_map(fn): + """ + NUMBA low_level tensor_map function. See `tensor_ops.py` for description. + + Optimizations: + + * Main loop in parallel + * All indices use numpy buffers + * When `out` and `in` are stride-aligned, avoid indexing + + Args: + fn: function mappings floats-to-floats to apply. + out (array): storage for out tensor. + out_shape (array): shape for out tensor. + out_strides (array): strides for out tensor. + in_storage (array): storage for in tensor. + in_shape (array): shape for in tensor. + in_strides (array): strides for in tensor. + + Returns: + None : Fills in `out` + """ + + def _map(out, out_shape, out_strides, in_storage, in_shape, in_strides): + in_index = np.zeros(len(in_shape), dtype=int) + out_index = np.zeros(len(out_shape), dtype=int) + + for i in prange(len(out)): + to_index(i, out_shape, out_index) + broadcast_index(out_index, out_shape, in_shape, in_index) + + x = in_storage[index_to_position(in_index, in_strides)] + index = index_to_position(out_index, out_strides) + out[index] = fn(x) + + return njit(parallel=True)(_map) + + +def map(fn): + """ + Higher-order tensor map function :: + + fn_map = map(fn) + fn_map(a, out) + out + + Args: + fn: function from float-to-float to apply. + a (:class:`Tensor`): tensor to map over + out (:class:`Tensor`): optional, tensor data to fill in, + should broadcast with `a` + + Returns: + :class:`Tensor` : new tensor + """ + + # This line JIT compiles your tensor_map + f = tensor_map(njit()(fn)) + + def ret(a, out=None): + if out is None: + out = a.zeros(a.shape) + f(*out.tuple(), *a.tuple()) + return out + + return ret + + +def tensor_zip(fn): + """ + NUMBA higher-order tensor zip function. See `tensor_ops.py` for description. + + + Optimizations: + + * Main loop in parallel + * All indices use numpy buffers + * When `out`, `a`, `b` are stride-aligned, avoid indexing + + Args: + fn: function maps two floats to float to apply. + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + a_storage (array): storage for `a` tensor. + a_shape (array): shape for `a` tensor. + a_strides (array): strides for `a` tensor. + b_storage (array): storage for `b` tensor. + b_shape (array): shape for `b` tensor. + b_strides (array): strides for `b` tensor. + + Returns: + None : Fills in `out` + """ + + def _zip( + out, + out_shape, + out_strides, + a_storage, + a_shape, + a_strides, + b_storage, + b_shape, + b_strides, + ): + out_index = np.zeros(len(out_shape), dtype=int) + a_in = np.zeros(len(a_shape), dtype=int) + b_in = np.zeros(len(b_shape), dtype=int) + + for i in prange(len(out)): + to_index(i, out_shape, out_index) + index = index_to_position(out_index, out_strides) + + broadcast_index(out_index, out_shape, a_shape, a_in) + a = a_storage[index_to_position(a_in, a_strides)] + + broadcast_index(out_index, out_shape, b_shape, b_in) + b = b_storage[index_to_position(b_in, b_strides)] + + out[index] = fn(a, b) + + return njit(parallel=True)(_zip) + + +def zip(fn): + """ + Higher-order tensor zip function. + + fn_zip = zip(fn) + c = fn_zip(a, b) + + Args: + fn: function from two floats-to-float to apply + a (:class:`Tensor`): tensor to zip over + b (:class:`Tensor`): tensor to zip over + + Returns: + :class:`Tensor` : new tensor data + """ + f = tensor_zip(njit()(fn)) + + def ret(a, b): + c_shape = shape_broadcast(a.shape, b.shape) + out = a.zeros(c_shape) + f(*out.tuple(), *a.tuple(), *b.tuple()) + return out + + return ret + + +def tensor_reduce(fn): + """ + NUMBA higher-order tensor reduce function. See `tensor_ops.py` for description. + + Optimizations: + + * Main loop in parallel + * All indices use numpy buffers + * Inner-loop should not call any functions or write non-local variables + + Args: + fn: reduction function mapping two floats to float. + out (array): storage for `out` tensor. + out_shape (array): shape for `out` tensor. + out_strides (array): strides for `out` tensor. + a_storage (array): storage for `a` tensor. + a_shape (array): shape for `a` tensor. + a_strides (array): strides for `a` tensor. + reduce_dim (int): dimension to reduce out + + Returns: + None : Fills in `out` + + """ + + def _reduce(out, out_shape, out_strides, a_storage, a_shape, a_strides, reduce_dim): + out_index = np.zeros(len(a_shape), dtype=int) + for i in prange(len(out)): + to_index(i, out_shape, out_index) + index = index_to_position(out_index, out_strides) + + for j in range(a_shape[reduce_dim]): + a_index = out_index.copy() + a_index[reduce_dim] = j + + out[index] = fn( + a_storage[index_to_position(a_index, a_strides)], out[index] + ) + + return njit(parallel=True)(_reduce) + + +def reduce(fn, start=0.0): + """ + Higher-order tensor reduce function. :: + + fn_reduce = reduce(fn) + out = fn_reduce(a, dim) + + + Args: + fn: function from two floats-to-float to apply + a (:class:`Tensor`): tensor to reduce over + dim (int): int of dim to reduce + + Returns: + :class:`Tensor` : new tensor + """ + + f = tensor_reduce(njit()(fn)) + + def ret(a, dim): + out_shape = list(a.shape) + out_shape[dim] = 1 + + # Other values when not sum. + out = a.zeros(tuple(out_shape)) + out._tensor._storage[:] = start + + f(*out.tuple(), *a.tuple(), dim) + return out + + return ret + + +@njit(parallel=True, fastmath=True) +def tensor_matrix_multiply( + out, + out_shape, + out_strides, + a_storage, + a_shape, + a_strides, + b_storage, + b_shape, + b_strides, +): + """ + NUMBA tensor matrix multiply function. + + Should work for any tensor shapes that broadcast as long as :: + + assert a_shape[-1] == b_shape[-2] + + Optimizations: + + * Outer loop in parallel + * No index buffers or function calls + * Inner loop should have no global writes, 1 multiply. + + + Args: + out (array): storage for `out` tensor + out_shape (array): shape for `out` tensor + out_strides (array): strides for `out` tensor + a_storage (array): storage for `a` tensor + a_shape (array): shape for `a` tensor + a_strides (array): strides for `a` tensor + b_storage (array): storage for `b` tensor + b_shape (array): shape for `b` tensor + b_strides (array): strides for `b` tensor + + Returns: + None : Fills in `out` + """ + a_batch_stride = a_strides[0] if a_shape[0] > 1 else 0 + b_batch_stride = b_strides[0] if b_shape[0] > 1 else 0 + + assert a_shape[-1] == b_shape[-2] + + for i in prange(len(out)): + out_0 = i // (out_shape[-1] * out_shape[-2]) + out_1 = (i % (out_shape[-1] * out_shape[-2])) // out_shape[-1] + out_2 = i % out_shape[-1] + + out_i = ( + out_0 * out_strides[0] + + out_1 * out_strides[1] + + out_2 * out_strides[2] + ) + + a_start = out_0 * a_batch_stride + out_1 * a_strides[1] + b_start = out_0 * b_batch_stride + out_2 * a_strides[2] + + temp = 0 + for position in range(a_shape[-1]): + temp += ( + a_storage[a_start + position * a_strides[2]] * + b_storage[b_start + position * b_strides[1]] + ) + out[out_i] = temp + + + +def matrix_multiply(a, b): + """ + Batched tensor matrix multiply :: + + for n: + for i: + for j: + for k: + out[n, i, j] += a[n, i, k] * b[n, k, j] + + Where n indicates an optional broadcasted batched dimension. + + Should work for tensor shapes of 3 dims :: + + assert a.shape[-1] == b.shape[-2] + + Args: + a (:class:`Tensor`): tensor data a + b (:class:`Tensor`): tensor data b + + Returns: + :class:`Tensor` : new tensor data + """ + + # Make these always be a 3 dimensional multiply + both_2d = 0 + if len(a.shape) == 2: + a = a.contiguous().view(1, a.shape[0], a.shape[1]) + both_2d += 1 + if len(b.shape) == 2: + b = b.contiguous().view(1, b.shape[0], b.shape[1]) + both_2d += 1 + both_2d = both_2d == 2 + + ls = list(shape_broadcast(a.shape[:-2], b.shape[:-2])) + ls.append(a.shape[-2]) + ls.append(b.shape[-1]) + assert a.shape[-1] == b.shape[-2] + out = a.zeros(tuple(ls)) + + tensor_matrix_multiply(*out.tuple(), *a.tuple(), *b.tuple()) + + # Undo 3d if we added it. + if both_2d: + out = out.view(out.shape[1], out.shape[2]) + return out + + +class FastOps: + map = map + zip = zip + reduce = reduce + matrix_multiply = matrix_multiply diff --git a/src/module.py b/src/module.py new file mode 100644 index 0000000..4d0adbc --- /dev/null +++ b/src/module.py @@ -0,0 +1,162 @@ +class Module: + """ + Modules form a tree that store parameters and other + submodules. They make up the basis of neural network stacks. + + Attributes: + _modules (dict of name x :class:`Module`): Storage of the child modules + _parameters (dict of name x :class:`Parameter`): Storage of the module's parameters + training (bool): Whether the module is in training mode or evaluation mode + + """ + + def __init__(self): + self._modules = {} + self._parameters = {} + self.training = True + + def modules(self): + "Return the direct child modules of this module." + return self.__dict__["_modules"].values() + + def train(self): + "Set the mode of this module and all descendent modules to `train`." + + def _train(module): + module.training = True + for m in module.modules(): + _train(m) + + _train(self) + + def eval(self): + "Set the mode of this module and all descendent modules to `eval`." + + def _eval(module): + module.training = False + for m in module.modules(): + _eval(m) + + _eval(self) + + def named_parameters(self): + """ + Collect all the parameters of this module and its descendents. + + + Returns: + list of pairs: Contains the name and :class:`Parameter` of each ancestor parameter. + """ + + def _named_parameters(module, prefix=""): + for name, param in module._parameters.items(): + yield prefix + name, param + for name, module in module._modules.items(): + yield from _named_parameters(module, prefix + name + ".") + + return list(_named_parameters(self)) + + def parameters(self): + "Enumerate over all the parameters of this module and its descendents." + + # def _parameters(module): + # for param in module._parameters.values(): + # yield param + # for module in module._modules.values(): + # yield from _parameters(module) + + return [param for _, param in self.named_parameters()] + + def add_parameter(self, k, v): + """ + Manually add a parameter. Useful helper for scalar parameters. + + Args: + k (str): Local name of the parameter. + v (value): Value for the parameter. + + Returns: + Parameter: Newly created parameter. + """ + val = Parameter(v, k) + self.__dict__["_parameters"][k] = val + return val + + def __setattr__(self, key, val): + if isinstance(val, Parameter): + self.__dict__["_parameters"][key] = val + elif isinstance(val, Module): + self.__dict__["_modules"][key] = val + else: + super().__setattr__(key, val) + + def __getattr__(self, key): + if key in self.__dict__["_parameters"]: + return self.__dict__["_parameters"][key] + + if key in self.__dict__["_modules"]: + return self.__dict__["_modules"][key] + + def __call__(self, *args, **kwargs): + return self.forward(*args, **kwargs) + + def forward(self): + assert False, "Not Implemented" + + def __repr__(self): + def _addindent(s_, numSpaces): + s = s_.split("\n") + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(numSpaces * " ") + line for line in s] + s = "\n".join(s) + s = first + "\n" + s + return s + + child_lines = [] + + for key, module in self._modules.items(): + mod_str = repr(module) + mod_str = _addindent(mod_str, 2) + child_lines.append("(" + key + "): " + mod_str) + lines = child_lines + + main_str = self.__class__.__name__ + "(" + if lines: + # simple one-liner info, which most builtin Modules will use + main_str += "\n " + "\n ".join(lines) + "\n" + + main_str += ")" + return main_str + + +class Parameter: + """ + A Parameter is a special container stored in a :class:`Module`. + + It is designed to hold a :class:`Variable`, but we allow it to hold + any value for testing. + """ + + def __init__(self, x=None, name=None): + self.value = x + self.name = name + if hasattr(x, "requires_grad_"): + self.value.requires_grad_(True) + if self.name: + self.value.name = self.name + + def update(self, x): + "Update the parameter value." + self.value = x + if hasattr(x, "requires_grad_"): + self.value.requires_grad_(True) + if self.name: + self.value.name = self.name + + def __repr__(self): + return repr(self.value) + + def __str__(self): + return str(self.value) diff --git a/src/modules.py b/src/modules.py new file mode 100644 index 0000000..de7519e --- /dev/null +++ b/src/modules.py @@ -0,0 +1,60 @@ +# from .tensor import rand +# from .functions import matmul, conv2d +# from .module import Module, Parameter + + +# class tLinear(Module): +# def __init__(self, in_size, out_size): +# super().__init__() +# self.weights = Parameter(rand((in_size, out_size))) +# self.bias = Parameter(rand((out_size,))) +# self.out_size = out_size + +# def forward(self, x): +# batch, in_size = x.shape +# return ( +# self.weights.value.view(1, in_size, self.out_size) +# * x.view(batch, in_size, 1) +# ).sum(1).view(batch, self.out_size) + self.bias.value.view(1, self.out_size) + + +# class tLinear2(Module): +# def __init__(self, in_size, out_size): +# super().__init__() +# self.weights = Parameter(rand((in_size, out_size))) +# self.bias = Parameter(rand((out_size,))) +# self.out_size = out_size + +# def forward(self, x): +# batch, in_size = x.shape +# return matmul(x, self.weights.value) + self.bias.value.view(1, self.out_size) + + +# class Dropout(Module): +# def __init__(self, rate): +# super().__init__() +# self.rate = rate + +# def forward(self, x): +# return (rand(x.shape) / 2 + 0.5 < self.rate) * x + + +# class Conv2d(Module): +# def __init__(self, in_features, out_features, size): +# super().__init__() +# size1 = [size[0], size[1], in_features, out_features] +# size2 = [size[0], size[1], out_features] +# self.weights = Parameter(rand(size1)) +# self.bias = Parameter(rand(size2)) + +# def forward(self, x): +# return conv2d(x, self.weights.value, self.bias.value) + + +# # class MaxPool2d(Module): +# # def __init__(self, in_features, out_features, size): +# # super().__init__() + + +# # def forward(self, x): +# # return conv2d(x, self.weights.value, self.bias.value) diff --git a/src/nn.py b/src/nn.py new file mode 100644 index 0000000..c49e97c --- /dev/null +++ b/src/nn.py @@ -0,0 +1,175 @@ +from .fast_ops import FastOps +from .tensor_functions import rand, Function +from . import operators + + +def tile(input, kernel): + """ + Reshape an image tensor for 2D pooling + + Args: + input (:class:`Tensor`): batch x channel x height x width + kernel ( pair of ints ): height x width of pooling + + Returns: + (:class:`Tensor`, int, int) : Tensor of size batch x channel x new_height x new_width x (kernel_height * kernel_width) as well as the new_height and new_width value. + """ + + batch, channel, height, width = input.shape + kh, kw = kernel + assert height % kh == 0 + assert width % kw == 0 + + new_height = int(height / kh) + new_width = int(width / kw) + + input = ( + input.contiguous() + .view(batch, channel, height, new_width, kw) + .permute(0, 1, 3, 2, 4) + .contiguous() + .view(batch, channel, new_width, new_height, kh * kw) + ) + + return (input, new_height, new_width) + + +def avgpool2d(input, kernel): + """ + Tiled average pooling 2D + + Args: + input (:class:`Tensor`): batch x channel x height x width + kernel ( pair of ints ): height x width of pooling + + Returns: + :class:`Tensor` : pooled tensor + """ + batch, channel, height, width = input.shape + input = ( + tile(input, kernel).mean(4).view(batch, channel, input.shape[2], input.shape[3]) + ) + return input + + +max_reduce = FastOps.reduce(operators.max, -1e9) + + +def argmax(input, dim): + """ + Compute the argmax as a 1-hot tensor. + + Args: + input (:class:`Tensor`): input tensor + dim (int): dimension to apply argmax + + + Returns: + :class:`Tensor` : tensor with 1 on highest cell in dim, 0 otherwise + + """ + out = max_reduce(input, [dim]) + return out == input + + +class Max(Function): + @staticmethod + def forward(ctx, input, dim): + "Forward of max should be max reduction" + res = max_reduce(input, [dim]) + ctx.save_for_backward(input, dim) + return res + + @staticmethod + def backward(ctx, grad_output): + "Backward of max should be argmax (see above)" + input, dim = ctx.saved_values + res = argmax(input, dim) + return grad_output * res + + +max = Max.apply + + +def softmax(input, dim): + r""" + Compute the softmax as a tensor. + + .. math:: + + z_i = \frac{e^{x_i}}{\sum_i e^{x_i}} + + Args: + input (:class:`Tensor`): input tensor + dim (int): dimension to apply softmax + + Returns: + :class:`Tensor` : softmax tensor + """ + + input = input.exp() + t = input.sum(dim) + input = input / t + return input + + +def logsoftmax(input, dim): + r""" + Compute the log of the softmax as a tensor. + + .. math:: + + z_i = x_i - \log \sum_i e^{x_i} + + See https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations + + Args: + input (:class:`Tensor`): input tensor + dim (int): dimension to apply log-softmax + + Returns: + :class:`Tensor` : log of softmax tensor + """ + + t = input.exp() + t = t.sum(dim) + t = t.log() + return input - t + + +def maxpool2d(input, kernel): + """ + Tiled max pooling 2D + + Args: + input (:class:`Tensor`): batch x channel x height x width + kernel ( pair of ints ): height x width of pooling + + Returns: + :class:`Tensor` : pooled tensor + """ + batch, channel, height, width = input.shape + input = tile(input, kernel) + input = max(input, 4) + input = input.view(batch, channel, int(height / kernel[0]), int(width / kernel[1])) + return input + + +def dropout(input, rate, ignore=False): + """ + Dropout positions based on random noise. + + Args: + input (:class:`Tensor`): input tensor + rate (float): probability [0, 1) of dropping out each position + ignore (bool): skip dropout, i.e. do nothing at all + + Returns: + :class:`Tensor` : tensor with random positions dropped out + """ + if not ignore: + rand_tensor = rand(input.shape) + rand_drop = rand_tensor > rate + return input * rand_drop + else: + return input diff --git a/src/operators.py b/src/operators.py new file mode 100644 index 0000000..110c883 --- /dev/null +++ b/src/operators.py @@ -0,0 +1,212 @@ +""" +Collection of the core mathematical operators used throughout the code base. +""" + +from glob import glob +import math + +# ## Task 0.1 + +# Implementation of a prelude of elementary functions. + + +def mul(x, y): + ":math:`f(x, y) = x * y`" + return x * y + + +def id(x): + ":math:`f(x) = x`" + return x + + +def add(x, y): + ":math:`f(x, y) = x + y`" + return x + y + + +def neg(x): + ":math:`f(x) = -x`" + return float(-x) + + +def lt(x, y): + ":math:`f(x) =` 1.0 if x is less than y else 0.0" + return 1.0 if x < y else 0.0 + + +def eq(x, y): + ":math:`f(x) =` 1.0 if x is equal to y else 0.0" + return 1.0 if x == y else 0.0 + + +def max(x, y): + ":math:`f(x) =` x if x is greater than y else y" + return x if x > y else y + + +def is_close(x, y): + ":math:`f(x) = |x - y| < 1e-2`" + return abs(x - y) < 1e-2 + + +def sigmoid(x): + r""" + :math:`f(x) = \frac{1.0}{(1.0 + e^{-x})}` + + (See ``_ .) + + Calculate as + + :math:`f(x) = \frac{1.0}{(1.0 + e^{-x})}` if x >=0 else :math:`\frac{e^x}{(1.0 + e^{x})}` + + for stability. + + Args: + x (float): input + + Returns: + float : sigmoid value + """ + return 1.0 / (1.0 + math.exp(-x)) + + +def relu(x): + """ + :math:`f(x) =` x if x is greater than 0, else 0 + + (See ``_ .) + + Args: + x (float): input + + Returns: + float : relu value + """ + return (x > 0) * x + + +EPS = 1e-6 + + +def log(x): + ":math:`f(x) = log(x)`" + return math.log(x + EPS) + + +def exp(x): + ":math:`f(x) = e^{x}`" + return math.exp(x) + + +def log_back(x, d): + r"If :math:`f = log` as above, compute d :math:`d \times f'(x)`" + return d / x + + +def inv(x): + ":math:`f(x) = 1/x`" + return 1 / x + + +def inv_back(x, d): + r"If :math:`f(x) = 1/x` compute d :math:`d \times f'(x)`" + return -d / x ** 2 + + +def relu_back(x, d): + r"If :math:`f = relu` compute d :math:`d \times f'(x)`" + return d if x > 0 else 0.0 + + +def sigmoid_back(x, d): + return d * exp(-x) / ((1 + exp(-x)) ** 2) + + +# ## Task 0.3 + +# Small library of elementary higher-order functions for practice. + + +def map(fn): + """ + Higher-order map. + + .. image:: figs/Ops/maplist.png + + + See ``_ + + Args: + fn (one-arg function): Function from one value to one value. + + Returns: + function : A function that takes a list, applies `fn` to each element, and returns a + new list + """ + return lambda list: [fn(x) for x in list] + + +def negList(ls): + "Use :func:`map` and :func:`neg` to negate each element in `ls`" + return map(neg)(ls) + + +def zipWith(fn): + """ + Higher-order zipwith (or map2). + + .. image:: figs/Ops/ziplist.png + + See ``_ + + Args: + fn (two-arg function): combine two values + + Returns: + function : takes two equally sized lists `ls1` and `ls2`, produce a new list by + applying fn(x, y) on each pair of elements. + + """ + return lambda ls1, ls2: (fn(x, y) for x, y in zip(ls1, ls2)) + + +def addLists(ls1, ls2): + "Add the elements of `ls1` and `ls2` using :func:`zipWith` and :func:`add`" + return zipWith(add)(ls1, ls2) + + +def reduce(fn, start): + r""" + Higher-order reduce. + + .. image:: figs/Ops/reducelist.png + + + Args: + fn (two-arg function): combine two values + start (float): start value :math:`x_0` + + Returns: + function : function that takes a list `ls` of elements + :math:`x_1 \ldots x_n` and computes the reduction :math:`fn(x_3, fn(x_2, + fn(x_1, x_0)))` + """ + + def _reduce(ls, fn, start): + iterator = iter(ls) + for i in iterator: + start = fn(start, i) + return start + + return lambda ls: _reduce(ls, fn, start) + + +def sum(ls): + "Sum up a list using :func:`reduce` and :func:`add`." + return reduce(add, 0)(ls) + + +def prod(ls): + "Product of a list using :func:`reduce` and :func:`mul`." + return reduce(mul, 1)(ls) diff --git a/src/optim.py b/src/optim.py new file mode 100644 index 0000000..fc3a444 --- /dev/null +++ b/src/optim.py @@ -0,0 +1,19 @@ +class Optimizer: + def __init__(self, parameters): + self.parameters = parameters + + +class SGD(Optimizer): + def __init__(self, parameters, lr=1.0): + super().__init__(parameters) + self.lr = lr + + def zero_grad(self): + for p in self.parameters: + if p.value.derivative is not None: + p.value._derivative = None + + def step(self): + for p in self.parameters: + if p.value.derivative is not None: + p.update(p.value - self.lr * p.value.derivative) diff --git a/src/scalar.py b/src/scalar.py new file mode 100644 index 0000000..85313c4 --- /dev/null +++ b/src/scalar.py @@ -0,0 +1,314 @@ +from .autodiff import FunctionBase, Variable, History +from . import operators +import numpy as np + + +# ## Task 1.1 +# Central Difference calculation + + +def central_difference(f, *vals, arg=0, epsilon=1e-6): + r""" + Computes an approximation to the derivative of `f` with respect to one arg. + + See :doc:`derivative` or https://en.wikipedia.org/wiki/Finite_difference for more details. + + Args: + f : arbitrary function from n-scalar args to one value + *vals (list of floats): n-float values :math:`x_0 \ldots x_{n-1}` + arg (int): the number :math:`i` of the arg to compute the derivative + epsilon (float): a small constant + + Returns: + float : An approximation of :math:`f'_i(x_0, \ldots, x_{n-1})` + """ + + def _modify_arg(e): + return [x + e if i == arg else x for i, x in enumerate(vals)] + + return (f(*_modify_arg(epsilon)) - f(*_modify_arg(-epsilon))) / (2 * epsilon) + + +# ## Task 1.2 and 1.4 +# Scalar Forward and Backward + + +class Scalar(Variable): + """ + A reimplementation of scalar values for autodifferentiation + tracking. Scalar Variables behave as close as possible to standard + Python numbers while also tracking the operations that led to the + number's creation. They can only be manipulated by + :class:`ScalarFunction`. + + Attributes: + data (float): The wrapped scalar value. + """ + + def __init__(self, v, back=History(), name=None): + super().__init__(back, name=name) + self.data = float(v) + + def __repr__(self): + return "Scalar(%f)" % self.data + + def __mul__(self, b): + return Mul.apply(self, b) + + def __truediv__(self, b): + return Mul.apply(self, Inv.apply(b)) + + def __rtruediv__(self, b): + return Mul.apply(b, Inv.apply(self)) + + def __add__(self, b): + return Add.apply(self, b) + + def __bool__(self): + return bool(self.data) + + def __lt__(self, b): + return LT.apply(self, b) + + def __gt__(self, b): + return LT.apply(b, self) + + def __eq__(self, b): + return EQ.apply(self, b) + + def __sub__(self, b): + return Add.apply(self, Neg.apply(b)) + + def __neg__(self): + return Neg.apply(self) + + def log(self): + return Log.apply(self) + + def exp(self): + return Exp.apply(self) + + def sigmoid(self): + return Sigmoid.apply(self) + + def relu(self): + return ReLU.apply(self) + + def get_data(self): + "Returns the raw float value" + return self.data + + +class ScalarFunction(FunctionBase): + """ + A wrapper for a mathematical function that processes and produces + Scalar variables. + + This is a static class and is never instantiated. We use `class` + here to group together the `forward` and `backward` code. + """ + + @staticmethod + def forward(ctx, *inputs): + r""" + Forward call, compute :math:`f(x_0 \ldots x_{n-1})`. + + Args: + ctx (:class:`Context`): A container object to save + any information that may be needed + for the call to backward. + *inputs (list of floats): n-float values :math:`x_0 \ldots x_{n-1}`. + + Should return float the computation of the function :math:`f`. + """ + pass # pragma: no cover + + @staticmethod + def backward(ctx, d_out): + r""" + Backward call, computes :math:`f'_{x_i}(x_0 \ldots x_{n-1}) \times d_{out}`. + + Args: + ctx (Context): A container object holding any information saved during in the corresponding `forward` call. + d_out (float): :math:`d_out` term in the chain rule. + + Should return the computation of the derivative function + :math:`f'_{x_i}` for each input :math:`x_i` times `d_out`. + + """ + pass # pragma: no cover + + # Checks. + variable = Scalar + data_type = float + + @staticmethod + def data(a): + return a + + +# Examples +class Add(ScalarFunction): + "Addition function :math:`f(x, y) = x + y`" + + @staticmethod + def forward(ctx, a, b): + return a + b + + @staticmethod + def backward(ctx, d_output): + return d_output, d_output + + +class Log(ScalarFunction): + "Log function :math:`f(x) = log(x)`" + + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return operators.log(a) + + @staticmethod + def backward(ctx, d_output): + a = ctx.saved_values + return operators.log_back(a, d_output) + + +# To implement. + + +class Mul(ScalarFunction): + "Multiplication function" + + @staticmethod + def forward(ctx, a, b): + ctx.save_for_backward(a, b) + return a * b + + @staticmethod + def backward(ctx, d_output): + a, b = ctx.saved_values + return b * d_output, a * d_output + + +class Inv(ScalarFunction): + "Inverse function" + + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return operators.inv(a) + + @staticmethod + def backward(ctx, d_output): + a = ctx.saved_values + return operators.inv_back(a, d_output) + + +class Neg(ScalarFunction): + "Negation function" + + @staticmethod + def forward(ctx, a): + return operators.neg(a) + + @staticmethod + def backward(ctx, d_output): + return operators.neg(d_output) + + +class Sigmoid(ScalarFunction): + "Sigmoid function" + + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return operators.sigmoid(a) + + @staticmethod + def backward(ctx, d_output): + a = ctx.saved_values + return operators.sigmoid_back(a, d_output) + + +class ReLU(ScalarFunction): + "ReLU function" + + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return operators.relu(a) + + @staticmethod + def backward(ctx, d_output): + a = ctx.saved_values + return operators.relu_back(a, d_output) + + +class Exp(ScalarFunction): + "Exp function" + + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return operators.exp(a) + + @staticmethod + def backward(ctx, d_output): + a = ctx.saved_values + return operators.exp(a) * d_output + + +class LT(ScalarFunction): + "Less-than function :math:`f(x) =` 1.0 if x is less than y else 0.0" + + @staticmethod + def forward(ctx, a, b): + return operators.lt(a, b) + + @staticmethod + def backward(ctx, d_output): + return 0.0, 0.0 + + +class EQ(ScalarFunction): + "Equal function :math:`f(x) =` 1.0 if x is equal to y else 0.0" + + @staticmethod + def forward(ctx, a, b): + return operators.eq(a, b) + + @staticmethod + def backward(ctx, d_output): + return 0.0, 0.0 + + +def derivative_check(f, *scalars): + """ + Checks that autodiff works on a python function. + Asserts False if derivative is incorrect. + + Parameters: + f (function) : function from n-scalars to 1-scalar. + *scalars (list of :class:`Scalar`) : n input scalar values. + """ + for x in scalars: + x.requires_grad_(True) + out = f(*scalars) + out.backward() + + vals = [v for v in scalars] + err_msg = """ +Derivative check at arguments f(%s) and received derivative f'=%f for argument %d, +but was expecting derivative f'=%f from central difference.""" + for i, x in enumerate(scalars): + check = central_difference(f, *vals, arg=i) + print(str([x.data for x in scalars]), x.derivative, i, check) + np.testing.assert_allclose( + x.derivative, + check.data, + 1e-2, + 1e-2, + err_msg=err_msg + % (str([x.data for x in scalars]), x.derivative, i, check.data), + ) diff --git a/src/scalar_modules.py b/src/scalar_modules.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tensor.py b/src/tensor.py new file mode 100644 index 0000000..a1513b6 --- /dev/null +++ b/src/tensor.py @@ -0,0 +1,239 @@ +""" +Implementation of the core Tensor object for autodifferentiation. +""" + +from .autodiff import Variable +from .tensor_data import TensorData +from . import operators + + +# This class is very similar to Scalar so we implemented it for you. + + +class Tensor(Variable): + """ + Tensor is a generalization of Scalar in that it is a Variable that + handles multidimensional arrays. + + Attributes: + + _tensor (:class:`TensorData`) : the tensor data storage + backend : backend object used to implement tensor math (see `tensor_functions.py`) + """ + + def __init__(self, v, back=None, name=None, backend=None): + assert isinstance(v, TensorData) + assert backend is not None + super().__init__(back, name=name) + self._tensor = v + self.backend = backend + + def to_numpy(self): + """ + Returns: + narray : converted to numpy array + """ + return self.contiguous()._tensor._storage.reshape(self.shape) + + # Properties + @property + def shape(self): + """ + Returns: + tuple : shape of the tensor + """ + return self._tensor.shape + + @property + def size(self): + """ + Returns: + int : size of the tensor + """ + return self._tensor.size + + @property + def dims(self): + """ + Returns: + int : dimensionality of the tensor + """ + return self._tensor.dims + + def _ensure_tensor(self, b): + "Turns a python number into a tensor with the same backend." + if isinstance(b, (int, float)): + b = Tensor.make([b], (1,), backend=self.backend) + else: + b._type_(self.backend) + return b + + # Functions + def __add__(self, b): + return self.backend.Add.apply(self, self._ensure_tensor(b)) + + def __sub__(self, b): + return self.backend.Add.apply(self, -self._ensure_tensor(b)) + + def __mul__(self, b): + return self.backend.Mul.apply(self, self._ensure_tensor(b)) + + def __truediv__(self, b): + return self.backend.Mul.apply( + self, self.backend.Inv.apply(self._ensure_tensor(b)) + ) + + def __rtruediv__(self, b): + return self.backend.Mul.apply( + self._ensure_tensor(b), self.backend.Inv.apply(self) + ) + + def __matmul__(self, b): + "Not used until Module 3" + return self.backend.MatMul.apply(self, b) + + def __lt__(self, b): + return self.backend.LT.apply(self, self._ensure_tensor(b)) + + def __eq__(self, b): + return self.backend.EQ.apply(self, self._ensure_tensor(b)) + + def __gt__(self, b): + return self.backend.LT.apply(self._ensure_tensor(b), self) + + def __neg__(self): + return self.backend.Neg.apply(self) + + def all(self, dim=None): + return self.backend.All.apply(self, dim) + + def is_close(self, y): + return self.backend.IsClose.apply(self, y) + + def sigmoid(self): + return self.backend.Sigmoid.apply(self) + + def relu(self): + return self.backend.ReLU.apply(self) + + def log(self): + return self.backend.Log.apply(self) + + def item(self): + assert self.size == 1 + return self[0] + + def exp(self): + return self.backend.Exp.apply(self) + + def sum(self, dim=None): + "Compute the sum over dimension `dim`" + return self.backend.Sum.apply(self, dim) + + def mean(self, dim=None): + "Compute the mean over dimension `dim`" + if dim is not None: + return self.sum(dim) / self.shape[dim] + else: + return self.sum() / self.size + + def permute(self, *order): + "Permute tensor dimensions to *order" + return self.backend.Permute.apply(self, order) + + def view(self, *shape): + "Change the shape of the tensor to a new shape with the same size" + return self.backend.View.apply(self, shape) + + def contiguous(self): + "Return a contiguous tensor with the same data" + return self.backend.Copy.apply(self) + + def __repr__(self): + return self._tensor.to_string() + + def __getitem__(self, key): + return self._tensor.get(key) + + def __setitem__(self, key, val): + self._tensor.set(key, val) + + @property + def grad(self): + return self.derivative + + # Internal methods used for autodiff. + def _type_(self, backend): + self.backend = backend + if backend.cuda: # pragma: no cover + self._tensor.to_cuda_() + + def _new(self, tensor_data): + return Tensor(tensor_data, backend=self.backend) + + @staticmethod + def make(storage, shape, strides=None, backend=None): + "Create a new tensor from data" + return Tensor(TensorData(storage, shape, strides), backend=backend) + + def expand(self, other): + """ + Method used to allow for backprop over broadcasting. + This method is called when the output of `backward` + is a different size than the input of `forward`. + + + Parameters: + other (class:`Tensor`): backward tensor (must broadcast with self) + + Returns: + Expanded version of `other` with the right derivatives + + """ + + # Case 1: Both the same shape. + if self.shape == other.shape: + return other + + # Case 2: Backward is a smaller than self. Broadcast up. + true_shape = TensorData.shape_broadcast(self.shape, other.shape) + buf = self.zeros(true_shape) + self.backend._id_map(other, out=buf) + if self.shape == true_shape: + return buf + + # Case 3: Still different, reduce extra dims. + out = buf + orig_shape = [1] * (len(out.shape) - len(self.shape)) + list(self.shape) + for dim, shape in enumerate(out.shape): + if orig_shape[dim] == 1 and shape != 1: + out = self.backend._add_reduce(out, dim) + assert out.size == self.size, f"{out.shape} {self.shape}" + # START CODE CHANGE (2021) + return Tensor.make(out._tensor._storage, self.shape, backend=self.backend) + # END CODE CHANGE (2021) + + def zeros(self, shape=None): + def zero(shape): + return Tensor.make( + [0] * int(operators.prod(shape)), shape, backend=self.backend + ) + + if shape is None: + out = zero(self.shape) + else: + out = zero(shape) + out._type_(self.backend) + return out + + def tuple(self): + return self._tensor.tuple() + + def get_data(self): + return Tensor(self._tensor, backend=self.backend) + + def backward(self, grad_output=None): + if grad_output is None: + assert self.shape == (1,), "Must provide grad_output if non-scalar" + grad_output = Tensor.make([1.0], (1,), backend=self.backend) + super().backward(grad_output) diff --git a/src/tensor_data.py b/src/tensor_data.py new file mode 100644 index 0000000..0fb5c2c --- /dev/null +++ b/src/tensor_data.py @@ -0,0 +1,243 @@ +import random +from .operators import prod +from numpy import array, float64, ndarray +import numba + +MAX_DIMS = 32 + + +class IndexingError(RuntimeError): + "Exception raised for indexing errors." + pass + + +def index_to_position(index, strides): + """ + Converts a multidimensional tensor `index` into a single-dimensional position in + storage based on strides. + + Args: + index (array-like): index tuple of ints + strides (array-like): tensor strides + + Returns: + int : position in storage + """ + + assert len(index) == len(strides) + return sum([i * s for i, s in zip(index, strides)]) + + +def to_index(ordinal, shape, out_index): + """ + Convert an `ordinal` to an index in the `shape`. + Should ensure that enumerating position 0 ... size of a + tensor produces every index exactly once. It + may not be the inverse of `index_to_position`. + + Args: + ordinal (int): ordinal position to convert. + shape (tuple): tensor shape. + out_index (array): the index corresponding to position. + + Returns: + None : Fills in `out_index`. + + """ + + for i, s in enumerate(shape): + product = prod(shape[i:]) + divisor = product / s + index = int(ordinal // divisor) + + ordinal -= index * divisor + out_index[i] = index + + +def broadcast_index(big_index, big_shape, shape, out_index): + """ + Convert a `big_index` into `big_shape` to a smaller `out_index` + into `shape` following broadcasting rules. In this case + it may be larger or with more dimensions than the `shape` + given. Additional dimensions may need to be mapped to 0 or + removed. + + Args: + big_index (array-like): multidimensional index of bigger tensor + big_shape (array-like): tensor shape of bigger tensor + shape (array-like): tensor shape of smaller tensor + out_index (array-like): multidimensional index of smaller tensor + + Returns: + None : Fills in `out_index`. + """ + + for i in range(len(shape)): + out_index[i] = big_index[len(big_shape) - len(shape) + i] if shape[i] > 1 else 0 + + +def shape_broadcast(shape1, shape2): + """ + Broadcast two shapes to create a new union shape. + + Args: + shape1 (tuple) : first shape + shape2 (tuple) : second shape + + Returns: + tuple : broadcasted shape + + Raises: + IndexingError : if cannot broadcast + """ + + tuple = () + large, small = (shape1, shape2) if len(shape1) > len(shape2) else (shape2, shape1) + + for i in range(len(large)): + diff = len(large) - len(small) + l = large[i] + s = l if i < diff else small[i - diff] + l, s = (l, s) if l > s else (s, l) + + if l % s != 0: + raise IndexingError() + else: + tuple += (l,) + + return tuple + + +def strides_from_shape(shape): + layout = [1] + offset = 1 + for s in reversed(shape): + layout.append(s * offset) + offset = s * offset + return tuple(reversed(layout[:-1])) + + +class TensorData: + def __init__(self, storage, shape, strides=None): + if isinstance(storage, ndarray): + self._storage = storage + else: + self._storage = array(storage, dtype=float64) + + if strides is None: + strides = strides_from_shape(shape) + + assert isinstance(strides, tuple), "Strides must be tuple" + assert isinstance(shape, tuple), "Shape must be tuple" + if len(strides) != len(shape): + raise IndexingError(f"Len of strides {strides} must match {shape}.") + self._strides = array(strides) + self._shape = array(shape) + self.strides = strides + self.dims = len(strides) + self.size = int(prod(shape)) + self.shape = shape + assert len(self._storage) == self.size + + def to_cuda_(self): # pragma: no cover + if not numba.cuda.is_cuda_array(self._storage): + self._storage = numba.cuda.to_device(self._storage) + + def is_contiguous(self): + """ + Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. + + Returns: + bool : True if contiguous + """ + last = 1e9 + for stride in self._strides: + if stride > last: + return False + last = stride + return True + + @staticmethod + def shape_broadcast(shape_a, shape_b): + return shape_broadcast(shape_a, shape_b) + + def index(self, index): + if isinstance(index, int): + index = array([index]) + if isinstance(index, tuple): + index = array(index) + + # Check for errors + if index.shape[0] != len(self.shape): + raise IndexingError(f"Index {index} must be size of {self.shape}.") + for i, ind in enumerate(index): + if ind >= self.shape[i]: + raise IndexingError(f"Index {index} out of range {self.shape}.") + if ind < 0: + raise IndexingError(f"Negative indexing for {index} not supported.") + + # Call fast indexing. + return index_to_position(array(index), self._strides) + + def indices(self): + lshape = array(self.shape) + out_index = array(self.shape) + for i in range(self.size): + to_index(i, lshape, out_index) + yield tuple(out_index) + + def sample(self): + return tuple((random.randint(0, s - 1) for s in self.shape)) + + def get(self, key): + return self._storage[self.index(key)] + + def set(self, key, val): + self._storage[self.index(key)] = val + + def tuple(self): + return (self._storage, self._shape, self._strides) + + def permute(self, *order): + """ + Permute the dimensions of the tensor. + + Args: + order (list): a permutation of the dimensions + + Returns: + :class:`TensorData`: a new TensorData with the same storage and a new dimension order. + """ + assert list(sorted(order)) == list( + range(len(self.shape)) + ), f"Must give a position to each dimension. Shape: {self.shape} Order: {order}" + + return TensorData( + self._storage, + tuple(self.shape[x] for x in order), + tuple(self.strides[x] for x in order), + ) + + def to_string(self): + s = "" + for index in self.indices(): + l = "" + for i in range(len(index) - 1, -1, -1): + if index[i] == 0: + l = "\n%s[" % ("\t" * i) + l + else: + break + s += l + v = self.get(index) + s += f"{v:3.2f}" + l = "" + for i in range(len(index) - 1, -1, -1): + if index[i] == self.shape[i] - 1: + l += "]" + else: + break + if l: + s += l + else: + s += " " + return s diff --git a/src/tensor_functions.py b/src/tensor_functions.py new file mode 100644 index 0000000..8e4a534 --- /dev/null +++ b/src/tensor_functions.py @@ -0,0 +1,410 @@ +""" +Implementation of the autodifferentiation Functions for Tensor. +""" + + +from .autodiff import FunctionBase +from .tensor_ops import TensorOps +import numpy as np +from . import operators +from .tensor import Tensor +import random + + +# Constructors +class Function(FunctionBase): + data_type = Tensor + + @staticmethod + def variable(data, back): + return Tensor(data[0], back, backend=data[1]) + + @staticmethod + def data(a): + return (a._tensor, a.backend) + + +def make_tensor_backend(tensor_ops, is_cuda=False): + """ + Dynamically construct a tensor backend based on a `tensor_ops` object + that implements map, zip, and reduce higher-order functions. + + Args: + tensor_ops (:class:`TensorOps`) : tensor operations object see `tensor_ops.py` + is_cuda (bool) : is the operations object CUDA / GPU based + + Returns : + backend : a collection of tensor functions + + """ + # Maps + neg_map = tensor_ops.map(operators.neg) + sigmoid_map = tensor_ops.map(operators.sigmoid) + relu_map = tensor_ops.map(operators.relu) + log_map = tensor_ops.map(operators.log) + exp_map = tensor_ops.map(operators.exp) + id_map = tensor_ops.map(operators.id) + inv_map = tensor_ops.map(operators.inv) + + # Zips + add_zip = tensor_ops.zip(operators.add) + mul_zip = tensor_ops.zip(operators.mul) + lt_zip = tensor_ops.zip(operators.lt) + eq_zip = tensor_ops.zip(operators.eq) + is_close_zip = tensor_ops.zip(operators.is_close) + relu_back_zip = tensor_ops.zip(operators.relu_back) + log_back_zip = tensor_ops.zip(operators.log_back) + inv_back_zip = tensor_ops.zip(operators.inv_back) + + # Reduce + add_reduce = tensor_ops.reduce(operators.add, 0.0) + mul_reduce = tensor_ops.reduce(operators.mul, 1.0) + + class Backend: + cuda = is_cuda + _id_map = id_map + _add_reduce = add_reduce + + class Neg(Function): + @staticmethod + def forward(ctx, t1): + return neg_map(t1) + + @staticmethod + def backward(ctx, grad_output): + return neg_map(grad_output) + + class Inv(Function): + @staticmethod + def forward(ctx, t1): + ctx.save_for_backward(t1) + return inv_map(t1) + + @staticmethod + def backward(ctx, grad_output): + t1 = ctx.saved_values + return inv_back_zip(t1, grad_output) + + class Add(Function): + @staticmethod + def forward(ctx, t1, t2): + return add_zip(t1, t2) + + @staticmethod + def backward(ctx, grad_output): + return grad_output, grad_output + + class Mul(Function): + @staticmethod + def forward(ctx, a, b): + ctx.save_for_backward(a, b) + return mul_zip(a, b) + + @staticmethod + def backward(ctx, grad_output): + a, b = ctx.saved_values + return b * grad_output, a * grad_output + + class Sigmoid(Function): + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return sigmoid_map(a) + + @staticmethod + def backward(ctx, grad_output): + a = ctx.saved_values + return add_zip( + mul_zip(grad_output, sigmoid_map(a)), + neg_map( + mul_zip(grad_output, mul_zip(sigmoid_map(a), sigmoid_map(a))) + ), + ) + + class ReLU(Function): + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return relu_map(a) + + @staticmethod + def backward(ctx, grad_output): + a = ctx.saved_values + return relu_back_zip(a, grad_output) + + class Log(Function): + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return log_map(a) + + @staticmethod + def backward(ctx, grad_output): + a = ctx.saved_values + return log_back_zip(a, grad_output) + + class Exp(Function): + @staticmethod + def forward(ctx, a): + ctx.save_for_backward(a) + return exp_map(a) + + @staticmethod + def backward(ctx, grad_output): + a = ctx.saved_values + return mul_zip(exp_map(a), grad_output) + + class Sum(Function): + @staticmethod + def forward(ctx, a, dim): + ctx.save_for_backward(a.shape, dim) + if dim is not None: + return add_reduce(a, dim) + else: + return add_reduce( + a.contiguous().view(int(operators.prod(a.shape))), 0 + ) + + @staticmethod + def backward(ctx, grad_output): + a_shape, dim = ctx.saved_values + if dim is None: + out = grad_output.zeros(a_shape) + out._tensor._storage[:] = grad_output[0] + return out + else: + return grad_output + + class All(Function): + @staticmethod + def forward(ctx, a, dim): + if dim is not None: + return mul_reduce(a, dim) + else: + return mul_reduce( + a.contiguous().view(int(operators.prod(a.shape))), 0 + ) + + class LT(Function): + @staticmethod + def forward(ctx, a, b): + return lt_zip(a, b) + + @staticmethod + def backward(ctx, grad_output): + return ( + grad_output.zeros(grad_output.shape), + grad_output.zeros(grad_output.shape), + ) + + class EQ(Function): + @staticmethod + def forward(ctx, a, b): + return eq_zip(a, b) + + @staticmethod + def backward(ctx, grad_output): + return ( + grad_output.zeros(grad_output.shape), + grad_output.zeros(grad_output.shape), + ) + + class IsClose(Function): + @staticmethod + def forward(ctx, a, b): + return is_close_zip(a, b) + + class Permute(Function): + @staticmethod + def forward(ctx, a, order): + ctx.save_for_backward(a, order) + tens_store = a._tensor.permute(*order) + return Tensor.make( + tens_store._storage, + tens_store.shape, + tens_store.strides, + backend=a.backend, + ) + + @staticmethod + def backward(ctx, grad_output): + a, order = ctx.saved_values + return Tensor.make( + grad_output._tensor._storage, + a._tensor.shape, + a._tensor.strides, + backend=grad_output.backend, + ) + + class View(Function): + @staticmethod + def forward(ctx, a, shape): + ctx.save_for_backward(a.shape) + assert a._tensor.is_contiguous(), "Must be contiguous to view" + return Tensor.make(a._tensor._storage, shape, backend=a.backend) + + @staticmethod + def backward(ctx, grad_output): + original = ctx.saved_values + return Tensor.make( + grad_output._tensor._storage, original, backend=grad_output.backend + ) + + class Copy(Function): + @staticmethod + def forward(ctx, a): + return id_map(a) + + @staticmethod + def backward(ctx, grad_output): + return grad_output + + class MatMul(Function): + @staticmethod + def forward(ctx, t1, t2): + ctx.save_for_backward(t1, t2) + return tensor_ops.matrix_multiply(t1, t2) + + @staticmethod + def backward(ctx, grad_output): + t1, t2 = ctx.saved_values + + def transpose(a): + order = list(range(a.dims)) + order[-2], order[-1] = order[-1], order[-2] + return a._new(a._tensor.permute(*order)) + + return ( + tensor_ops.matrix_multiply(grad_output, transpose(t2)), + tensor_ops.matrix_multiply(transpose(t1), grad_output), + ) + + return Backend + + +TensorFunctions = make_tensor_backend(TensorOps) + + +# Helpers for Constructing tensors +def zeros(shape, backend=TensorFunctions): + """ + Produce a zero tensor of size `shape`. + + Args: + shape (tuple): shape of tensor + backend (:class:`Backend`): tensor backend + + Returns: + :class:`Tensor` : new tensor + """ + return Tensor.make([0] * int(operators.prod(shape)), shape, backend=backend) + + +def rand(shape, backend=TensorFunctions, requires_grad=False): + """ + Produce a random tensor of size `shape`. + + Args: + shape (tuple): shape of tensor + backend (:class:`Backend`): tensor backend + requires_grad (bool): turn on autodifferentiation + + Returns: + :class:`Tensor` : new tensor + """ + vals = [random.random() for _ in range(int(operators.prod(shape)))] + tensor = Tensor.make(vals, shape, backend=backend) + tensor.requires_grad_(requires_grad) + return tensor + + +def _tensor(ls, shape=None, backend=TensorFunctions, requires_grad=False): + """ + Produce a tensor with data ls and shape `shape`. + + Args: + ls (list): data for tensor + shape (tuple): shape of tensor + backend (:class:`Backend`): tensor backend + requires_grad (bool): turn on autodifferentiation + + Returns: + :class:`Tensor` : new tensor + """ + tensor = Tensor.make(ls, shape, backend=backend) + tensor.requires_grad_(requires_grad) + return tensor + + +def tensor(ls, backend=TensorFunctions, requires_grad=False): + """ + Produce a tensor with data and shape from ls + + Args: + ls (list): data for tensor + backend (:class:`Backend`): tensor backend + requires_grad (bool): turn on autodifferentiation + + Returns: + :class:`Tensor` : new tensor + """ + + def shape(ls): + if isinstance(ls, (list, tuple)): + return [len(ls)] + shape(ls[0]) + else: + return [] + + def flatten(ls): + if isinstance(ls, (list, tuple)): + return [y for x in ls for y in flatten(x)] + else: + return [ls] + + cur = flatten(ls) + shape = shape(ls) + return _tensor(cur, tuple(shape), backend=backend, requires_grad=requires_grad) + + +# Gradient check for tensors + + +def grad_central_difference(f, *vals, arg=0, epsilon=1e-6, ind=None): + x = vals[arg] + up = zeros(x.shape) + up[ind] = epsilon + vals1 = [x if j != arg else x + up for j, x in enumerate(vals)] + vals2 = [x if j != arg else x - up for j, x in enumerate(vals)] + delta = f(*vals1).sum() - f(*vals2).sum() + + return delta[0] / (2.0 * epsilon) + + +def grad_check(f, *vals): + for x in vals: + x.requires_grad_(True) + x.zero_grad_() + random.seed(10) + out = f(*vals) + out.sum().backward() + err_msg = """ + +Gradient check error for function %s. + +Input %s + +Received derivative %f for argument %d and index %s, +but was expecting derivative %f from central difference. + +""" + + for i, x in enumerate(vals): + ind = x._tensor.sample() + check = grad_central_difference(f, *vals, arg=i, ind=ind) + np.testing.assert_allclose( + x.grad[ind], + check, + 1e-2, + 1e-2, + err_msg=err_msg % (f, vals, x.grad[ind], i, ind, check), + ) diff --git a/src/tensor_ops.py b/src/tensor_ops.py new file mode 100644 index 0000000..5479269 --- /dev/null +++ b/src/tensor_ops.py @@ -0,0 +1,284 @@ +import numpy as np +from .tensor_data import ( + to_index, + index_to_position, + broadcast_index, + shape_broadcast, + MAX_DIMS, +) + + +def tensor_map(fn): + """ + Low-level implementation of tensor map between + tensors with *possibly different strides*. + + Simple version: + + * Fill in the `out` array by applying `fn` to each + value of `in_storage` assuming `out_shape` and `in_shape` + are the same size. + + Broadcasted version: + + * Fill in the `out` array by applying `fn` to each + value of `in_storage` assuming `out_shape` and `in_shape` + broadcast. (`in_shape` must be smaller than `out_shape`). + + Args: + fn: function from float-to-float to apply + out (array): storage for out tensor + out_shape (array): shape for out tensor + out_strides (array): strides for out tensor + in_storage (array): storage for in tensor + in_shape (array): shape for in tensor + in_strides (array): strides for in tensor + + Returns: + None : Fills in `out` + """ + + def _map(out, out_shape, out_strides, in_storage, in_shape, in_strides): + in_index = np.zeros(len(in_shape), dtype=int) + out_index = np.zeros(len(out_shape), dtype=int) + + for i in range(len(out)): + to_index(i, out_shape, out_index) + broadcast_index(out_index, out_shape, in_shape, in_index) + + x = in_storage[index_to_position(in_index, in_strides)] + index = index_to_position(out_index, out_strides) + out[index] = fn(x) + + return _map + + +def map(fn): + """ + Higher-order tensor map function :: + + fn_map = map(fn) + fn_map(a, out) + out + + Simple version:: + + for i: + for j: + out[i, j] = fn(a[i, j]) + + Broadcasted version (`a` might be smaller than `out`) :: + + for i: + for j: + out[i, j] = fn(a[i, 0]) + + Args: + fn: function from float-to-float to apply. + a (:class:`TensorData`): tensor to map over + out (:class:`TensorData`): optional, tensor data to fill in, + should broadcast with `a` + + Returns: + :class:`TensorData` : new tensor data + """ + + f = tensor_map(fn) + + def ret(a, out=None): + if out is None: + out = a.zeros(a.shape) + f(*out.tuple(), *a.tuple()) + return out + + return ret + + +def tensor_zip(fn): + """ + Low-level implementation of tensor zip between + tensors with *possibly different strides*. + + Simple version: + + * Fill in the `out` array by applying `fn` to each + value of `a_storage` and `b_storage` assuming `out_shape` + and `a_shape` are the same size. + + Broadcasted version: + + * Fill in the `out` array by applying `fn` to each + value of `a_storage` and `b_storage` assuming `a_shape` + and `b_shape` broadcast to `out_shape`. + + Args: + fn: function mapping two floats to float to apply + out (array): storage for `out` tensor + out_shape (array): shape for `out` tensor + out_strides (array): strides for `out` tensor + a_storage (array): storage for `a` tensor + a_shape (array): shape for `a` tensor + a_strides (array): strides for `a` tensor + b_storage (array): storage for `b` tensor + b_shape (array): shape for `b` tensor + b_strides (array): strides for `b` tensor + + Returns: + None : Fills in `out` + """ + + def _zip( + out, + out_shape, + out_strides, + a_storage, + a_shape, + a_strides, + b_storage, + b_shape, + b_strides, + ): + out_index = np.zeros(len(out_shape), dtype=int) + a_in = np.zeros(len(a_shape), dtype=int) + b_in = np.zeros(len(b_shape), dtype=int) + + for i in range(len(out)): + to_index(i, out_shape, out_index) + index = index_to_position(out_index, out_strides) + + broadcast_index(out_index, out_shape, a_shape, a_in) + a = a_storage[index_to_position(a_in, a_strides)] + + broadcast_index(out_index, out_shape, b_shape, b_in) + b = b_storage[index_to_position(b_in, b_strides)] + + out[index] = fn(a, b) + + return _zip + + +def zip(fn): + """ + Higher-order tensor zip function :: + + fn_zip = zip(fn) + out = fn_zip(a, b) + + Simple version :: + + for i: + for j: + out[i, j] = fn(a[i, j], b[i, j]) + + Broadcasted version (`a` and `b` might be smaller than `out`) :: + + for i: + for j: + out[i, j] = fn(a[i, 0], b[0, j]) + + + Args: + fn: function from two floats-to-float to apply + a (:class:`TensorData`): tensor to zip over + b (:class:`TensorData`): tensor to zip over + + Returns: + :class:`TensorData` : new tensor data + """ + + f = tensor_zip(fn) + + def ret(a, b): + if a.shape != b.shape: + c_shape = shape_broadcast(a.shape, b.shape) + else: + c_shape = a.shape + out = a.zeros(c_shape) + f(*out.tuple(), *a.tuple(), *b.tuple()) + return out + + return ret + + +def tensor_reduce(fn): + """ + Low-level implementation of tensor reduce. + + * `out_shape` will be the same as `a_shape` + except with `reduce_dim` turned to size `1` + + Args: + fn: reduction function mapping two floats to float + out (array): storage for `out` tensor + out_shape (array): shape for `out` tensor + out_strides (array): strides for `out` tensor + a_storage (array): storage for `a` tensor + a_shape (array): shape for `a` tensor + a_strides (array): strides for `a` tensor + reduce_dim (int): dimension to reduce out + + Returns: + None : Fills in `out` + """ + + def _reduce(out, out_shape, out_strides, a_storage, a_shape, a_strides, reduce_dim): + out_index = np.zeros(len(a_shape), dtype=int) + for i in range(len(out)): + to_index(i, out_shape, out_index) + index = index_to_position(out_index, out_strides) + + for j in range(a_shape[reduce_dim]): + a_index = out_index.copy() + a_index[reduce_dim] = j + + out[index] = fn( + a_storage[index_to_position(a_index, a_strides)], out[index] + ) + + return _reduce + + +def reduce(fn, start=0.0): + """ + Higher-order tensor reduce function. :: + + fn_reduce = reduce(fn) + out = fn_reduce(a, dim) + + Simple version :: + + for j: + out[1, j] = start + for i: + out[1, j] = fn(out[1, j], a[i, j]) + + + Args: + fn: function from two floats-to-float to apply + a (:class:`TensorData`): tensor to reduce over + dim (int): int of dim to reduce + + Returns: + :class:`TensorData` : new tensor + """ + + f = tensor_reduce(fn) + + def ret(a, dim): + out_shape = list(a.shape) + out_shape[dim] = 1 + + # Other values when not sum. + out = a.zeros(tuple(out_shape)) + out._tensor._storage[:] = start + + f(*out.tuple(), *a.tuple(), dim) + return out + + return ret + + +class TensorOps: + map = map + zip = zip + reduce = reduce diff --git a/src/testing.py b/src/testing.py new file mode 100644 index 0000000..8a1545b --- /dev/null +++ b/src/testing.py @@ -0,0 +1,192 @@ +import minitorch.operators as operators + + +class MathTest: + @staticmethod + def neg(a): + "Negate the argument" + return -a + + @staticmethod + def addConstant(a): + "Add contant to the argument" + return 5 + a + + @staticmethod + def square(a): + "Manual square" + return a * a + + @staticmethod + def cube(a): + "Manual cube" + return a * a * a + + @staticmethod + def subConstant(a): + "Subtract a constant from the argument" + return a - 5 + + @staticmethod + def multConstant(a): + "Multiply a constant to the argument" + return 5 * a + + @staticmethod + def div(a): + "Divide by a constant" + return a / 5 + + @staticmethod + def inv(a): + "Invert after adding" + return operators.inv(a + 3.5) + + @staticmethod + def sig(a): + "Apply sigmoid" + return operators.sigmoid(a) + + @staticmethod + def log(a): + "Apply log to a large value" + return operators.log(a + 100000) + + @staticmethod + def relu(a): + "Apply relu" + return operators.relu(a + 5.5) + + @staticmethod + def exp(a): + "Apply exp to a smaller value" + return operators.exp(a - 200) + + @staticmethod + def explog(a): + return operators.log(a + 100000) + operators.exp(a - 200) + + @staticmethod + def add2(a, b): + "Add two arguments" + return a + b + + @staticmethod + def mul2(a, b): + "Mul two arguments" + return a * b + + @staticmethod + def div2(a, b): + "Divide two arguments" + return a / (b + 5.5) + + @staticmethod + def gt2(a, b): + return operators.lt(b, a + 1.2) + + @staticmethod + def lt2(a, b): + return operators.lt(a + 1.2, b) + + @staticmethod + def eq2(a, b): + return operators.eq(a, (b + 5.5)) + + @staticmethod + def sum_red(a): + return operators.sum(a) + + @staticmethod + def mean_red(a): + return operators.sum(a) / float(len(a)) + + @staticmethod + def mean_full_red(a): + return operators.sum(a) / float(len(a)) + + @staticmethod + def complex(a): + return ( + operators.log( + operators.sigmoid( + operators.relu(operators.relu(a * 10 + 7) * 6 + 5) * 10 + ) + ) + / 50 + ) + + @classmethod + def _tests(cls): + """ + Returns a list of all the math tests. + """ + one_arg = [] + two_arg = [] + red_arg = [] + for k in dir(MathTest): + if callable(getattr(MathTest, k)) and not k.startswith("_"): + base_fn = getattr(MathTest, k) + scalar_fn = getattr(cls, k) + tup = (k, base_fn, scalar_fn) + if k.endswith("2"): + two_arg.append(tup) + elif k.endswith("red"): + red_arg.append(tup) + else: + one_arg.append(tup) + return one_arg, two_arg, red_arg + + +class MathTestVariable(MathTest): + @staticmethod + def inv(a): + return 1.0 / (a + 3.5) + + @staticmethod + def sig(x): + return x.sigmoid() + + @staticmethod + def log(x): + return (x + 100000).log() + + @staticmethod + def relu(x): + return (x + 5.5).relu() + + @staticmethod + def exp(a): + return (a - 200).exp() + + @staticmethod + def explog(a): + return (a + 100000).log() + (a - 200).exp() + + @staticmethod + def sum_red(a): + return a.sum(0) + + @staticmethod + def mean_red(a): + return a.mean(0) + + @staticmethod + def mean_full_red(a): + return a.mean() + + @staticmethod + def eq2(a, b): + return a == (b + 5.5) + + @staticmethod + def gt2(a, b): + return a + 1.2 > b + + @staticmethod + def lt2(a, b): + return a + 1.2 < b + + @staticmethod + def complex(a): + return (((a * 10 + 7).relu() * 6 + 5).relu() * 10).sigmoid().log() / 50