{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "from graphviz import Digraph\n", "\n", "def trace(root):\n", " #Builds a set of all nodes and edges in a graph\n", " nodes, edges = set(), set()\n", " def build(v):\n", " if v not in nodes:\n", " nodes.add(v)\n", " for child in v._prev:\n", " edges.add((child, v))\n", " build(child)\n", " build(root)\n", " return nodes, edges\n", "\n", "def draw_dot(root):\n", " dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n", "\n", " nodes, edges = trace(root)\n", " for n in nodes:\n", " uid = str(id(n))\n", " #For any value in the graph, create a rectangular ('record') node for it\n", " dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n", " if n._op:\n", " #If this value is a result of some operation, then create an op node for it\n", " dot.node(name = uid + n._op, label=n._op)\n", " #and connect this node to it\n", " dot.edge(uid + n._op, uid)\n", "\n", " for n1, n2 in edges:\n", " #Connect n1 to the node of n2\n", " dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n", "\n", " return dot" ], "metadata": { "id": "T0rN8d146jvF" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import math" ], "metadata": { "id": "JlYxBvFK0AjA" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "class Value:\n", "\n", " def __init__(self, data, _children=(), _op='', label=''):\n", " self.data = data\n", " self.grad = 0.0\n", " self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n", " self._prev = set(_children)\n", " self._op = _op\n", " self.label = label\n", "\n", "\n", " def __repr__(self):\n", " return f\"Value(data={self.data})\"\n", "\n", " def __add__(self, other):\n", " other = other if isinstance(other, Value) else Value(other)\n", " out = Value(self.data + other.data, (self, other), '+')\n", "\n", " def backward():\n", " self.grad += 1.0 * out.grad\n", " other.grad += 1.0 * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def __radd__(self, other): #here\n", " return self + other\n", "\n", " def __mul__(self, other):\n", " other = other if isinstance(other, Value) else Value(other)\n", " out = Value(self.data * other.data, (self, other), '*')\n", "\n", " def backward():\n", " self.grad += other.data * out.grad\n", " other.grad += self.data * out.grad\n", " out._backward = backward\n", " return out\n", "\n", " def __rmul__(self, other): #other * self\n", " return self * other\n", "\n", " def __truediv__(self, other): #self/other\n", " return self * other**-1\n", "\n", " def __neg__(self):\n", " return self * -1\n", "\n", " def __sub__(self, other): #self - other\n", " return self + (-other)\n", "\n", " def __pow__(self, other):\n", " assert isinstance(other, (int, float)), \"only supporting int/float powers for now\"\n", " out = Value(self.data ** other, (self, ), f\"**{other}\")\n", "\n", " def backward():\n", " self.grad += (other * (self.data ** (other - 1))) * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def tanh(self):\n", " x = self.data\n", " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n", " out = Value(t, (self, ), 'tanh')\n", "\n", " def backward():\n", " self.grad += 1 - (t**2) * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def exp(self):\n", " x = self.data\n", " out = Value(math.exp(x), (self, ), 'exp') #We merged t and out, into just out\n", "\n", " def backward():\n", " self.grad += out.data * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def backward(self):\n", "\n", " topo = []\n", " visited = set()\n", " def build_topo(v):\n", " if v not in visited:\n", " visited.add(v)\n", " for child in v._prev:\n", " build_topo(child)\n", " topo.append(v)\n", "\n", " build_topo(self)\n", "\n", " self.grad = 1.0\n", " for node in reversed(topo):\n", " node._backward()" ], "metadata": { "id": "tA0zbyEwFbD5" }, "execution_count": 3, "outputs": [] }, { "cell_type": "markdown", "source": [ "---------------" ], "metadata": { "id": "m9hy05zbxhLP" } }, { "cell_type": "code", "source": [ "import random" ], "metadata": { "id": "gu3tnJu1Wti5" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "class Neuron:\n", "\tdef __init__(self, nin):\n", "\t\tself.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]\n", "\t\tself.b = Value(random.uniform(-1,1))\n", "\n", "\tdef __call__(self, x):\n", "\t\t# (w*x)+b\n", "\t\tact = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )\n", "\t\tout = act.tanh()\n", "\t\treturn out\n", "\n", "class Layer:\n", "\tdef __init__(self, nin, nout):\n", "\t\tself.neurons = [Neuron(nin) for _ in range(nout)]\n", "\n", "\tdef __call__(self, x):\n", "\t\touts = [n(x) for n in self.neurons]\n", "\t\treturn outs[0] if len(outs)==1 else outs #The New added line for making the output better\n", "\n", "class MLP:\n", "\tdef __init__(self, nin, nouts):\n", "\t\tsz = [nin] + nouts\n", "\t\tself.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]\n", "\n", "\tdef __call__(self, x):\n", "\t\tfor layer in self.layers:\n", "\t\t\tx = layer(x)\n", "\t\treturn x" ], "metadata": { "id": "aCXXYNg_W680" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "x = [2.0, 3.0, -1.0]\n", "n = MLP(3, [4, 4, 1])\n", "n(x)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aG9pKV_RXsO8", "outputId": "e6f183b9-896b-458f-9322-e91bc79e9da2", "collapsed": true }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Value(data=-0.33393070997191954)" ] }, "metadata": {}, "execution_count": 23 } ] }, { "cell_type": "markdown", "source": [ "-----------" ], "metadata": { "id": "6DemdSsv_abu" } }, { "cell_type": "markdown", "source": [ "Now, we'll be returning the **parameters** from the MLP. So that will be from Neuron -> Layer -> MLP" ], "metadata": { "id": "rhKQgN2LKBf9" } }, { "cell_type": "code", "source": [ "class Neuron:\n", " def __init__(self, nin):\n", " self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]\n", " self.b = Value(random.uniform(-1, 1))\n", "\n", " def __call__(self, x):\n", " act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)\n", " out = act.tanh()\n", " return out\n", "\n", " def parameters(self):\n", " return self.w + [self.b]\n", "\n", "class Layer:\n", " def __init__(self, nin, nout):\n", " self.neurons = [Neuron(nin) for _ in range(nout)]\n", "\n", " def __call__(self, x):\n", " outs = [n(x) for n in self.neurons]\n", " return outs[0] if len(outs) == 1 else outs\n", "\n", " def parameters(self):\n", " return [p for n in self.neurons for p in n.parameters()]\n", "\n", " # Alternative way of writing the above return function:\n", " # parameters = []\n", " # for n in self.neurons:\n", " # p = n.parameters()\n", " # parameters.extend(p)\n", "\n", "class MLP:\n", " def __init__(self, nin, nouts):\n", " sz = [nin] + nouts\n", " self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]\n", "\n", " def __call__(self, x):\n", " for layer in self.layers:\n", " x = layer(x)\n", " return x\n", "\n", " def parameters(self):\n", " return [p for l in self.layers for p in l.parameters()]" ], "metadata": { "id": "HmEO8Gi1KN_m" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "x = [2.0, 3.0, -1.0]\n", "n = MLP(3, [4, 4, 1])\n", "n(x)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "e2VaJPFdMVUs", "outputId": "0a229e8c-2084-4037-e808-cc27cb3fd2ca" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Value(data=0.7625252102576119)" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "n.parameters()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cfOp08kYNmDX", "outputId": "fe98dfd7-0e2b-4dd7-fb08-6f4cf60161ff" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[Value(data=0.31785584973173164),\n", " Value(data=0.2998372553774835),\n", " Value(data=-0.8029008199517247),\n", " Value(data=-0.39340060142531286),\n", " Value(data=0.23322412084873956),\n", " Value(data=0.29891813550514534),\n", " Value(data=-0.5314862907700675),\n", " Value(data=0.19661072911432642),\n", " Value(data=0.9142418954398666),\n", " Value(data=0.041208786424172805),\n", " Value(data=-0.23983634992214187),\n", " Value(data=-0.593538786941121),\n", " Value(data=0.39482399486723296),\n", " Value(data=-0.9880306400643504),\n", " Value(data=-0.8097855189886964),\n", " Value(data=0.4629484174790124),\n", " Value(data=0.31168805444961634),\n", " Value(data=-0.9828138115624934),\n", " Value(data=0.5221437252554255),\n", " Value(data=-0.19703997468926882),\n", " Value(data=-0.5504279057638468),\n", " Value(data=-0.8365261779265616),\n", " Value(data=-0.22783861276612227),\n", " Value(data=0.5666981389300718),\n", " Value(data=-0.06415010714317604),\n", " Value(data=0.845414529622897),\n", " Value(data=0.4793425135418725),\n", " Value(data=-0.38321354069020086),\n", " Value(data=-0.10963021731006206),\n", " Value(data=0.14485994942129898),\n", " Value(data=-0.19028270981146433),\n", " Value(data=0.5148204886483112),\n", " Value(data=-0.8559156650791364),\n", " Value(data=0.3778416962066449),\n", " Value(data=0.09608787032156774),\n", " Value(data=-0.8288362456839788),\n", " Value(data=0.5641592956285757),\n", " Value(data=0.13764114112689052),\n", " Value(data=-0.19625087652731277),\n", " Value(data=-0.6117936229921406),\n", " Value(data=0.7546009612155813)]" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "So these are all our parameters provided as inputs. The weights, inputs and biases" ], "metadata": { "id": "W0hGhhMaNozj" } }, { "cell_type": "code", "source": [ "len(n.parameters())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "itFmD8hFNnph", "outputId": "f43eee99-5831-4708-f203-518ddf7011e5" }, "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "41" ] }, "metadata": {}, "execution_count": 8 } ] } ] }