#!pip install pydot-ng
#!pip install graphviz

 import numpy as np
from collections import deque
import numpy as np
from heapq import heapify, heappush, heappop
import networkx as nx
from matplotlib import rc
import matplotlib.pyplot as plt
#!pip install pydot
#!pip install graphviz

 class Node(np.ndarray):
    
    total_nodes_count = 0
    
    def __new__(subtype, shape,
                dtype=float,
                buffer=None,
                offset=0,
                strides=None,
                order=None):
        """
        craetes a new object that wraps an numpy.ndarray into a structure that
        represents a node in a computational graph
        """

        newobj = np.ndarray.__new__(
            subtype, shape, dtype,
            buffer, offset, strides,
            order
        )

        return newobj

    def _nodify(self, method_name, other, opname, self_first=True):
        """
        augments the operation of given arithmetic super method
        by creating and returning an OperationalNode for the operation

        Parameters:
        ----------
        method_name: String
            the name of the super method to be augmented
        other: Node | np.ndarray | Number
            the other operand to the operation
        opname: String
            the name of OperationalNode
        self_first: Boolean
            a flag indicating if self is the 1st operand in non commutative ops

        Returns: OperationalNode
        """
        if not isinstance(other, Node):
            other = ConstantNode.create_using(other)
        opvalue = getattr(np.ndarray, method_name)(self, other)

        return OperationalNode.create_using(opvalue, opname,
            self if self_first else other,
            other if self_first else self
        )


    def __add__(self, other):
        return self._nodify('__add__', other, 'add')

    def __radd__(self, other):
        return self._nodify('__radd__', other, 'add')

    def __sub__(self, other):
        return self._nodify('__sub__', other, 'sub')

    def __rsub__(self, other):
        return self._nodify('__rsub__', other, 'sub', False)

    def __mul__(self, other):
        return self._nodify('__mul__', other, 'mul')

    def __rmul__(self, other):
        return self._nodify('__rmul__', other, 'mul')

    def __div__(self, other):
        return self._nodify('__div__', other, 'div')

    def __rdiv__(self, other):
        return self._nodify('__rdiv__', other, 'div', False)

    def __truediv__(self, other):
        return self._nodify('__truediv__', other, 'div')

    def __rtruediv__(self, other):
        return self._nodify('__rtruediv__', other, 'div', False)

    def __pow__(self, other):
        return self._nodify('__pow__', other, 'pow')

    def __rpow__(self, other):
        return self._nodify('__rpow__', other, 'pow', False)

    @property
    def T(self):
        """
        augments numpy's T attribute by creating a node for the operation
        """
        opvalue = np.transpose(self)
        return OperationalNode.create_using(opvalue, 'transpose', self)

class OperationalNode(Node):

    # a static attribute to count for unnamed nodes
    nodes_counter = {}

    @staticmethod
    def create_using(opresult, opname, operand_a, operand_b=None, name=None):
        """
        craetes an graph node representing an operation

        Parameters:
        ----------
        opresult: np.ndarray
            the result of the operation
        opname: String
            the name of the operation
        operand_a: Node
            the first operand to the operation
        operand_b: Node
            the second operand to the operation if any
        name: String
            the name of the node

        Returns: OperationalNode
        """

        obj = OperationalNode(
            strides=opresult.strides,
            shape=opresult.shape,
            dtype=opresult.dtype,
            buffer=np.copy(opresult)
        )

        obj.opname = opname
        obj.operand_a = operand_a
        obj.operand_b = operand_b

        if name is not None:
            obj.name = name
        else:
            if opname not in OperationalNode.nodes_counter:
                OperationalNode.nodes_counter[opname] = 0

            node_id = OperationalNode.nodes_counter[opname]
            OperationalNode.nodes_counter[opname] += 1
            obj.name = "%s_%d" % (opname, node_id)
        
        obj.node_order = Node.total_nodes_count
        Node.total_nodes_count += 1
        
        return obj

 class ConstantNode(Node):

    # a static attribute to count the unnamed instances
    count = 0

    @staticmethod
    def create_using(val, name=None):
        """
        creates a graph node representing a constant

        Parameters:
        ----------
        val: np.ndarray | Number
         the value of the constant
        name: String
         the node's name
        """
        if not isinstance(val, np.ndarray):
            val = np.array(val, dtype=float)

        obj = ConstantNode(
            strides=val.strides,
            shape=val.shape,
            dtype=val.dtype,
            buffer=val
        )
        if name is not None:
            obj.name = name
        else:
            obj.name = "const_%d" % (ConstantNode.count)
            ConstantNode.count += 1
        
        obj.node_order = Node.total_nodes_count
        Node.total_nodes_count += 1
        
        return obj


class VariableNode(Node):

    # a static attribute to count the unnamed instances
    count = 0

    @staticmethod
    def create_using(val, name=None):
        """
        creates a graph node representing a variable

        Parameters:
        ----------
        val: np.ndarray | Number
            the value of the constant
        name: String
            the node's name
        """
        if not isinstance(val, np.ndarray):
            val = np.array(val, dtype=float)

        obj = VariableNode(
            strides=val.strides,
            shape=val.shape,
            dtype=val.dtype,
            buffer=val
        )
        if name is not None:
            obj.name = name
        else:
            obj.name = "_%d" % (VariableNode.count)
            VariableNode.count += 1
        
        obj.node_order = Node.total_nodes_count
        Node.total_nodes_count += 1
        
        return obj

 node_1 = VariableNode.create_using([2,-3,4])
node_2 = ConstantNode.create_using([1,2,4])
print(node_1)
print(node_2)

 sum_node_1_node_2 = node_1 + node_2
sum_node_1_node_2

 print("Operand A for sum: ", sum_node_1_node_2.operand_a)
print("Operand B for sum: ", sum_node_1_node_2.operand_b)

 def sum(array, axis=None, keepdims=False, name=None):
    """
    defines a node in the computational graph representing a sum operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be summed
    axis: int
        the axis to perform the sum on
    keepdims: Boolean
        a flag to determine if the dimensions are kept
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.sum(array, axis=axis, keepdims=keepdims)
    
    opnode = OperationalNode.create_using(opvalue, 'sum', array, name=name)

    # save info for gradient computation
    opnode.axis = axis
    opnode.keepdims = keepdims
    opnode.with_keepdims = np.sum(array, axis=axis, keepdims=False)

    return opnode

def mean(array, axis=None, keepdims=False, name=None):
    """
    defines a node in the computational graph representing a mean operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be averaged
    axis: int
        the axis to perform the averaging on
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.mean(array, axis=axis, keepdims=keepdims)
    
    opnode = OperationalNode.create_using(opvalue, 'mean', array, name=name)

    # save info for gradient computation
    opnode.axis = axis
    opnode.keepdims = keepdims
    opnode.with_keepdims = np.mean(array, axis=axis, keepdims=False)

    return opnode


def exp(array, name=None):
    """
    defines a node in the computational graph representing an exp operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be exp-ed
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.exp(array)

    return OperationalNode.create_using(opvalue, 'exp', array, name=name)


def log(array, name=None):
    """
    defines a node in the computational graph representing an log operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be log-ed
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.log(array)

    return OperationalNode.create_using(opvalue, 'log', array, name=name)

def sin(array, name=None):
    """
    defines a node in the computational graph representing a sin operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be sin-ed
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.sin(array)

    return OperationalNode.create_using(opvalue, 'sin', array, name=name)

def cos(array, name=None):
    """
    defines a node in the computational graph representing a cos operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be sin-ed
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.cos(array)

    return OperationalNode.create_using(opvalue, 'cos', array, name=name)

def max(array, axis=None, keepdims=False, name=None):
    """
    defines a node in the computational graph representing a max operation

    Parameters:
    ----------
    array: Node | ndarray | number
        the array to be maxed out
    axis: int
        the axis to perform the max out on
    keepdims: Boolean
        a flag to determine if the dimensions are kept
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.max(array, axis=axis, keepdims=keepdims)
    opnode = OperationalNode.create_using(opvalue, 'max', array, name=name)

    # save info for gradient computation
    opnode.axis = axis
    opnode.keepdims = keepdims
    opnode.with_keepdims = np.max(array, axis=axis, keepdims=True)

    return opnode


def node_dot(array_a, array_b, name=None):
    """
    defines a node in the computational graph representing an array product op

    Parameters:
    ----------
    array_a: Node | ndarray | number
        the first operand to the product
    array_b: Node | ndarray | number
        the second operand to the product
    name: String
        the name of the node
    """
    assert len(array_a.shape) <=2 and len(array_b.shape)<=2,\
        "N dims for array must be <=2."
    
    if not isinstance(array_a, Node):
        array_a = ConstantNode.create_using(array_a)
    if not isinstance(array_b, Node):
        array_b = ConstantNode.create_using(array_b)
    opvalue = np.dot(array_a, array_b)

    return OperationalNode.create_using(opvalue, 'dot', array_a, array_b, name)


def where(condition, array_a, array_b, name=None):
    """
    defines a node in the computational graph representing a where selection
    operation

    Parameters:
    ----------
    condition: ndarray of Boolean
        the selection condition
    array_a: Node | ndarray | number
        the value to select from when the condition is True
    array_b: Node | ndarray | number
        the value to select from when the condition is False
    name: String
        the name of the node
    """
    if not isinstance(array_a, Node):
        nd_array_a = np.full_like(condition, array_a)
        array_a = ConstantNode.create_using(nd_array_a)
    if not isinstance(array_b, Node):
        nd_array_b = np.full_like(condition, array_b)
        array_b = ConstantNode.create_using(nd_array_b)
        
    assert array_a.shape == array_b.shape,\
        "Where operation did not have same shape. First reshape."
    
    opvalue = np.where(condition, array_a, array_b)
    opnode = OperationalNode.create_using(opvalue, 'where', array_a, array_b, name=name)
    opnode.condition = condition  # save condition for gradient computation

    return opnode

def softmax_cross_entropy(logits, labels, name=None):
    """
    defines a softmax-cross-entropy op as a primitive for numerical stability

    Parameters:
    ----------
    logits: Node| ndarray| Number
        the model's prediction
    labels:
        the true labels
    name: String
        node's name in the graph
    """
    if not isinstance(logits, Node):
        logits = ConstantNode.create_using(logits)
    if not isinstance(labels, Node):
        labels = ConstantNode.create_using(labels)
    
    assert len(logits.shape) == 2
    
    logits_max = np.max(logits, axis=1, keepdims=True)
    exp_op = np.exp(logits - logits_max)
    logits_softmax = exp_op / np.sum(exp_op, axis=1, keepdims=True)

    cross_entropy = -1 * np.sum((labels * np.log(logits_softmax + 1e-7))/logits_max.shape[0])

    opnode = OperationalNode.create_using(
        cross_entropy,
        'softmax_cross_entropy',
        logits,
        name=name
    )

    # save info for gradient calculations
    opnode.softmax_val = logits_softmax
    opnode.labels = labels

    return opnode

def reshape(array, new_shape, name=None):
    """
    defines a node in the computational graph representing a reshape operation

    Parameters:
    ----------
    array: Node| ndarray
        the array to be reshaped
    new_shape: iterable
        the new shape to put the array in
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.reshape(array, new_shape)

    return OperationalNode.create_using(opvalue, 'reshape', array, name=name)

def squeeze(array, axis=None, name=None):
    """
    defines a node in the computational graph representing a squeeze operation

    Parameters:
    ----------
    array: Node| ndarray
        the array to be squeezed
    axis: iterable
        the 1 axes to be squeezed out of the array
    name: String
        node's name in the graph
    """
    if not isinstance(array, Node):
        array = ConstantNode.create_using(array)
    opvalue = np.squeeze(array, axis=axis)

    return OperationalNode.create_using(opvalue, 'squeeze', array, name=name)

 exp_node_1 = exp(node_1)
log_node_1 = log(node_1)
sin_node_1 = sin(node_1)
cos_node_1 = cos(node_1)

 sum_node_1 = sum(node_1)
print("Node 1: ", node_1)
print("Sum node 1: ", sum_node_1)
assert sum_node_1.operand_b is None #The second operator should be none as it has only one input
sum_node_1.operand_a

 reshape_node_1 = reshape(node_1,(1,3,1))
squeeze(reshape_node_1)

 dot_product = node_dot(node_1,node_2)
dot_product

 max_node_1 = max(node_1)
where(node_1>0,node_1,0) #this implements the ReLU function

 class NodesHeap:
    
    def __init__(self):
        """
        creates a node heap that returns the node of maximum order
        """

        self.nodes_heap = []
        heapify(self.nodes_heap)
        
    def push(self, node):
        """
        pushes a given node, along with its name, to the nodes_heap

        Parameters:
        ----------
        node: Node
            the node to be pushed
        """
        heappush(self.nodes_heap, (-node.node_order, node))

    def pop(self):
        """
        pops the front node from the nodes_heap, along with its name

        Returns: Node
        """
        
        return heappop(self.nodes_heap)[1]

    def __contains__(self, node):
        """
        implements the searching operator via `in` by searching in the names
        nodes_heap instead of the nodes themselves nodes_heap to capture unique nodes
        with exact numerical values

        Parameters:
        ----------
        node: Node
            the node to search for
        Returns: Boolean
        """
        return node.name in [node_it[1].name for node_it in self.nodes_heap]

    def __len__(self):
        """
        returns the length on any of the underlying deques

        Returns: int
        """
        return len(self.nodes_heap)

 #Create 4 nodes
node_1 = VariableNode.create_using([2,-3,4])
node_2 = ConstantNode.create_using([1,2,4])
node_3 = ConstantNode.create_using([-1,2,4])
node_4 = node_1 + node_2

#Check nodes order:
print("Node 1 | Order = ", node_1.node_order)
print("Node 2 | Order = ", node_2.node_order)
print("Node 3 | Order = ", node_3.node_order)
print("Node 4 | Order = ", node_4.node_order)
print()

#Create nodes heap:
nodes_heap = NodesHeap()

#Add 4 nodes:
nodes_heap.push(node_4)
nodes_heap.push(node_3)
nodes_heap.push(node_2)
nodes_heap.push(node_1)

#Remove 4 nodes step by step:
print((nodes_heap.pop()==node_4).all())
print((nodes_heap.pop()==node_3).all())
print((nodes_heap.pop()==node_2).all())
print((nodes_heap.pop()==node_1).all())

 def build_and_visualize_graph(node, figsize=None):
    """
    visualizes the graph starting from the given node back to constant and
    variable nodes

    Parameters:
    ----------
    node: nodes.Node
        the root node to visualize its computational graph
    """

    G = nx.DiGraph(graph={'rankdir': 'LR'})
    
    #Add nodes_heap of nodes to traverse backwards
    nodes_heap = NodesHeap()
    
    #Color schemes for various nodes:
    color_dict = {'VariableNode': 'lightblue', 'ConstantNode': 'orange'}
    color = lambda n: color_dict[n.__class__.__name__] if n.__class__.__name__ in color_dict else '#d5a6f9'
    
    #Add root node:
    G.add_node(node.name, label=f"${node.name}$", color=color(node))
    
    #Push node, i.e. add it to the top
    nodes_heap.push(node)

    while nodes_heap:
        #Pop left-most node, i.e. the node that was first in the nodes_heap
        #(i.e. we do breadth first search)
        current = nodes_heap.pop()

        #Variable and constant nodes have not parents, ignore them and continue
        if isinstance(current, VariableNode) or isinstance(current, ConstantNode):
            continue
        
        #Get the most parent nodes:
        previous_nodes = sorted(
            filter(lambda n: n is not None, [current.operand_a, current.operand_b]),
            key=lambda n: n.name
        )
        
        #Add them to the graph and add them to the right of the nodes_heap:
        for prev_node in previous_nodes:
            if prev_node is not None:
                G.add_node(prev_node.name, label=f"${prev_node.name}$", color=color(prev_node))
                G.add_edge(prev_node.name, current.name)

                if prev_node not in nodes_heap:
                    nodes_heap.push(prev_node)

    nodes_colors = [_node[1]["color"] for _node in G.nodes(data=True)]
    nodes_labels = {_node[0]: _node[1]["label"] for _node in G.nodes(data=True)}
    edges_labels = {_edge: '$x$' for _edge in G.edges()}

    pos=nx.nx_pydot.pydot_layout(G,prog='dot')

    rc("mathtext", fontset='cm')
    plt.figure(figsize=figsize)

    nx.draw_networkx(G, pos, with_labels=False, arrows=True, node_color=nodes_colors, node_size=2000)
    nx.draw_networkx_labels(G, pos, labels=nodes_labels, font_size=15)
    
    return G

 #Squared sum:
power = ConstantNode.create_using(10)
squared_sum = sum_node_1_node_2**power

#Path 1 - cos:
cos_sum = cos(sum_node_1_node_2)

#Path 2 - mean after reshaping:
squared_sum_reshape = reshape(squared_sum,(1,3,1))
mean_result = mean(squared_sum_reshape)

G = build_and_visualize_graph(cos_sum)
G = build_and_visualize_graph(mean_result)

Build your own Pytorch - 1: Computation graphs¶

1. The nodes in a computation graph¶

1.1. Abstract base class for a node and Operational Node¶

1.2. Variable and Constant Node¶

1.3. Arithmetic Operations with Nodes¶

2. Node mathematics: `exp,sin,etc.`¶

3. Building the computation graph¶

3.1. Nodes Heap¶

3.2. Breadth-First Search¶

Summary¶

Build your own Pytorch - 1: Computation graphs¶

1. The nodes in a computation graph¶

1.1. Abstract base class for a node and Operational Node¶

1.2. Variable and Constant Node¶

1.3. Arithmetic Operations with Nodes¶

2. Node mathematics: exp,sin,etc.¶

3. Building the computation graph¶

3.1. Nodes Heap¶

3.2. Breadth-First Search¶

Summary¶

2. Node mathematics: `exp,sin,etc.`¶