{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "class Value:\n", "\n", " def __init__(self, data, _children=(), _op='', label=''):\n", " self.data = data\n", " self.grad = 0.0\n", " self._prev = set(_children)\n", " self._op = _op\n", " self.label = label\n", "\n", "\n", " def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n", " return f\"Value(data={self.data})\"\n", "\n", " def __add__(self, other):\n", " out = Value(self.data + other.data, (self, other), '+')\n", " return out\n", "\n", " def __mul__(self, other):\n", " out = Value(self.data * other.data, (self, other), '*')\n", " return out" ], "metadata": { "id": "jtRAdDVT6jf2" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AIP2sPDm6Los", "outputId": "685902a5-d209-4a47-90fc-97bf59a10af2" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Value(data=-8.0)" ] }, "metadata": {}, "execution_count": 17 } ], "source": [ "a = Value(2.0, label='a')\n", "b = Value(-3.0, label='b')\n", "c = Value(10.0, label='c')\n", "e = a*b; e.label='e'\n", "d= e + c; d.label='d'\n", "f = Value(-2.0, label='f')\n", "L = d*f; L.label='L'\n", "L" ] }, { "cell_type": "code", "source": [ "from graphviz import Digraph\n", "\n", "def trace(root):\n", " #Builds a set of all nodes and edges in a graph\n", " nodes, edges = set(), set()\n", " def build(v):\n", " if v not in nodes:\n", " nodes.add(v)\n", " for child in v._prev:\n", " edges.add((child, v))\n", " build(child)\n", " build(root)\n", " return nodes, edges\n", "\n", "def draw_dot(root):\n", " dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n", "\n", " nodes, edges = trace(root)\n", " for n in nodes:\n", " uid = str(id(n))\n", " #For any value in the graph, create a rectangular ('record') node for it\n", " dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n", " if n._op:\n", " #If this value is a result of some operation, then create an op node for it\n", " dot.node(name = uid + n._op, label=n._op)\n", " #and connect this node to it\n", " dot.edge(uid + n._op, uid)\n", "\n", " for n1, n2 in edges:\n", " #Connect n1 to the node of n2\n", " dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n", "\n", " return dot" ], "metadata": { "id": "T0rN8d146jvF" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(L)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 247 }, "id": "k7wjwrfo6nUl", "outputId": "d78c4618-6574-49f9-8e80-f2faa8dad69a" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n135449920624224\n\ne\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n135449920632192+\n\n+\n\n\n\n135449920624224->135449920632192+\n\n\n\n\n\n135449920624224*\n\n*\n\n\n\n135449920624224*->135449920624224\n\n\n\n\n\n135449920621248\n\nL\n\ndata -8.0000\n\ngrad 0.0000\n\n\n\n135449920621248*\n\n*\n\n\n\n135449920621248*->135449920621248\n\n\n\n\n\n135449920632624\n\nb\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n135449920632624->135449920624224*\n\n\n\n\n\n135449920619856\n\na\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n135449920619856->135449920624224*\n\n\n\n\n\n135449920632192\n\nd\n\ndata 4.0000\n\ngrad 0.0000\n\n\n\n135449920632192->135449920621248*\n\n\n\n\n\n135449920632192+->135449920632192\n\n\n\n\n\n135449920619424\n\nc\n\ndata 10.0000\n\ngrad 0.0000\n\n\n\n135449920619424->135449920632192+\n\n\n\n\n\n135449920632288\n\nf\n\ndata -2.0000\n\ngrad 0.0000\n\n\n\n135449920632288->135449920621248*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "----------------------" ], "metadata": { "id": "UO6I8Z-_CaNv" } }, { "cell_type": "markdown", "source": [ "### **Now, let's start to fill those grad values**" ], "metadata": { "id": "wB-SONL3CltR" } }, { "cell_type": "markdown", "source": [ "--------------" ], "metadata": { "id": "EhvqPDYqF50Z" } }, { "cell_type": "markdown", "source": [ "**Let's first find the derivative of L w.r.t L**" ], "metadata": { "id": "dF0QlSFJCbsI" } }, { "cell_type": "code", "source": [ "#This is just a staging function to show how the calculation of each of the derivative is taking place\n", "def lol():\n", "\n", " h = 0.001\n", "\n", " #Here we are basically making them as local variables, to not affect the global variables on top\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " f = Value(-2.0, label='f')\n", " L = d*f; L.label='L'\n", " L1 = L.data #L is basically a node, so we need its data\n", "\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " f = Value(-2.0, label='f')\n", " L = d*f; L.label='L'\n", " L2 = L.data + h\n", "\n", " print((L2-L1)/h)\n", "\n", "lol()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AWQsdevqCUks", "outputId": "e08b83fd-b101-4fdc-a554-41561c00a08b" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.000000000000334\n" ] } ] }, { "cell_type": "markdown", "source": [ "This was theoritically obvious as well. The derivitive of L wrt L will be one.\n", "\n", " \n", "\n", "So, lets add that value manually. (Remember to run the global variables for this)" ], "metadata": { "id": "HbfBzVQ4EEHM" } }, { "cell_type": "code", "source": [ "L.grad = 1.0" ], "metadata": { "id": "3TCgz-n6DbzI" }, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(L)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 212 }, "id": "RS6YodRTEX43", "outputId": "3b58ed12-b486-4452-b700-8048b1e03e3b" }, "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n139719306605568\n\nf\n\ndata -2.0000\n\ngrad 0.0000\n\n\n\n139719306596928*\n\n*\n\n\n\n139719306605568->139719306596928*\n\n\n\n\n\n139719306596928\n\nL\n\ndata -8.0000\n\ngrad 1.0000\n\n\n\n139719306596928*->139719306596928\n\n\n\n\n\n139719306594912\n\ne\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n139719306603120+\n\n+\n\n\n\n139719306594912->139719306603120+\n\n\n\n\n\n139719306594912*\n\n*\n\n\n\n139719306594912*->139719306594912\n\n\n\n\n\n139719306603120\n\nd\n\ndata 4.0000\n\ngrad 0.0000\n\n\n\n139719306603120->139719306596928*\n\n\n\n\n\n139719306603120+->139719306603120\n\n\n\n\n\n139719306604320\n\nc\n\ndata 10.0000\n\ngrad 0.0000\n\n\n\n139719306604320->139719306603120+\n\n\n\n\n\n139719306603840\n\na\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n139719306603840->139719306594912*\n\n\n\n\n\n139719306608016\n\nb\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n139719306608016->139719306594912*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "markdown", "source": [ "-----------" ], "metadata": { "id": "4x1UEdrOHalT" } }, { "cell_type": "markdown", "source": [ "**Now, we find the derivative of L wrt to f and d**" ], "metadata": { "id": "Hhj8DrcUF7fI" } }, { "cell_type": "markdown", "source": [ "So, mathematically:\n", "\n", "dL/dd = ?\n", "\n", "**L = d * f**\n", "\n", "Therefore, dL/dd = f\n", "\n", "If we do manual calculation to verify, \\\n", "\n", "=> f(x+h) - f(x) / h \\\n", "\n", "(Remember the f(x) is basically L here) \\\n", "=> (d+h)*f - d*f / h \\\n", "=> df + hf - df / h \\\n", "=> hf/h \\\n", "= f\n" ], "metadata": { "id": "SUi_wdLTGCsq" } }, { "cell_type": "markdown", "source": [ "So here if you see,\n", "\n", "The derivative of L wrt f is the value in d \\\n", "& \\\n", "The derivative of L wrt d is the value in f\n", "\n", "So, grad f is 4.0 \\\n", "and grad d is -2.0\n", "\n", " \n", "\n", "Lets check this in code!" ], "metadata": { "id": "8ApC2l-HHfHi" } }, { "cell_type": "code", "source": [ "# STARTING WITH d\n", "\n", "#This is just a staging function to show how the calculation of each of the derivative is taking place\n", "def lol():\n", "\n", " h = 0.001\n", "\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " f = Value(-2.0, label='f')\n", " L = d*f; L.label='L'\n", " L1 = L.data #L is basically a node, so we need its data\n", "\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " d.data = d.data + h\n", " f = Value(-2.0, label='f')\n", " L = d*f; L.label='L'\n", " L2 = L.data\n", "\n", " print((L2-L1)/h)\n", "\n", "lol()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2wx02cE6EYOR", "outputId": "f284e1ac-4c6f-490b-c8f3-e94dfbd9923e" }, "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-2.000000000000668\n" ] } ] }, { "cell_type": "code", "source": [ "# NOW WITH f\n", "\n", "#This is just a staging function to show how the calculation of each of the derivative is taking place\n", "def lol():\n", "\n", " h = 0.00001\n", "\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " f = Value(-2.0, label='f')\n", " L = d*f; L.label='L'\n", " L1 = L.data #L is basically a node, so we need its data\n", "\n", " a = Value(2.0, label='a')\n", " b = Value(-3.0, label='b')\n", " c = Value(10.0, label='c')\n", " e = a*b; e.label='e'\n", " d= e + c; d.label='d'\n", " f = Value(-2.0 + h, label='f')\n", " L = d*f; L.label='L'\n", " L2 = L.data\n", "\n", " print((L2-L1)/h)\n", "\n", "lol()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5haMwvymIRxx", "outputId": "7b372e31-8fa4-42d3-c591-371d3c49c78d" }, "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4.000000000026205\n" ] } ] }, { "cell_type": "markdown", "source": [ "So, now that we have verified that mathematically and on our code. Lets manually add those variables to the graph" ], "metadata": { "id": "EB8w0lF0IofD" } }, { "cell_type": "code", "source": [ "f.grad = 4.0\n", "d.grad = -2.0" ], "metadata": { "id": "pS4NnAZVIML9" }, "execution_count": 19, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(L)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 212 }, "id": "ko5oltNPJDtc", "outputId": "d78137fe-0afa-449f-db42-0a079ebfffa4" }, "execution_count": 20, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n139719306599472\n\nc\n\ndata 10.0000\n\ngrad 0.0000\n\n\n\n139719306607920+\n\n+\n\n\n\n139719306599472->139719306607920+\n\n\n\n\n\n139719306595392\n\ne\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n139719306595392->139719306607920+\n\n\n\n\n\n139719306595392*\n\n*\n\n\n\n139719306595392*->139719306595392\n\n\n\n\n\n139719306594432\n\nb\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n139719306594432->139719306595392*\n\n\n\n\n\n139719306607920\n\nd\n\ndata 4.0000\n\ngrad -2.0000\n\n\n\n139719306602976*\n\n*\n\n\n\n139719306607920->139719306602976*\n\n\n\n\n\n139719306607920+->139719306607920\n\n\n\n\n\n139719306602352\n\nf\n\ndata -2.0000\n\ngrad 4.0000\n\n\n\n139719306602352->139719306602976*\n\n\n\n\n\n139719306602976\n\nL\n\ndata -8.0000\n\ngrad 0.0000\n\n\n\n139719306602976*->139719306602976\n\n\n\n\n\n139719306601968\n\na\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n139719306601968->139719306595392*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 20 } ] } ] }