Exploracion de la data
import pandas as pd
import scipy as sc
import numpy as np
import h5py
import matplotlib.pylab as plt
from google.colab import drive
Exploracion de la data¶
data_train= "/content/train_catvnoncat.h5"
train_dataset = h5py.File(data_train, "r")
data_test= "/content/test_catvnoncat.h5"
test_dataset = h5py.File(data_test, "r")
# Read the data
xtrain_classes, xtrain, train_label =\
test_classes, xtest,test_label =\
xtrain_= np.reshape(xtrain,(209, 64*64*3))/255
xtest_ = np.reshape(xtest,(50, 64*64*3))/255
Construyamos una red neuronal con una capa de entrada, una capa de salida con una red y L-1 redes ocultas.
Con m datos de entrenamientos.¶
Para $m$ datos de entrenamiento, las expresión anteriores pueden ser resumidas en las siguientes ecuaciones
Escrito de una formas mas compacta tenemos que:
Aplicando la funcion de activación:
Las dimensiones de las expresiones anteriores, pueden ser resumidas en lo siguiente:
$\mathrm{dim(\vec{\cal{Z}}^{[l]})}=n^{[l]}\times m $
$\mathrm{dim(\vec{\Theta}^{[l]})}=n^{[l]}\times n^{[l-1]}$
$\mathrm{dim(\vec{\cal{A}}^{[l]})}=n^{[l-1]}\times m $
Topologia de la red¶
Topology = [n_x, n_h1, n_h2, n_h3, …,n_y]
n_x = 12288# -- size of the input layer
#n_h = # -- size of the hidden layer
n_y = 1# -- size of the output layer
class layer_nn():
def __init__(self,act_fun, n_layer_present, n_layer_before ):
self.W = 2*np.random.random((n_layer_before,n_layer_present)) - 1
self.B = 2*np.random.random((n_layer_present,1)) - 1
self.act_fun = act_fun
def output(self, Z,A, Ap):
self.Z = Z
self.A = A
self.Ap = Ap
def derivates(self, dW, db):
self.dW = dW
self.db = db
def act_function(x, activation):
if activation == "sigmoid":
f = lambda x: 1/(1+np.exp(-x))
fp = f(x)*(1-f(x))
elif activation == "tanh":
f = lambda x: np.tanh
return f(x), fp
layer = layer_nn("hola, ",1,1)
topology = [n_x, 3, 4, 6 ,1]
l1 = layer_nn("sigmoid", 3, n_x )
l2 = layer_nn("sigmoid", 4, 3 )
l3 = layer_nn("sigmoid", 6, 4 )
l4 = layer_nn("sigmoid", 1, 6 )
nn_red = [l1,l2,l3,l4]
# Generalizar la topologia de la red
# Forward pass y check dimensiones
A0 = xtrain_.T
# Hyperplanos de salida de la capa 1
Z1 = l1.W.T @ A0 +l1.B
# Funciones sigmoide de la capa 1
A1, A1p = act_function(Z1, "sigmoid")
# Para la capa 2
Z2 = l2.W.T @ A1 +l2.B
A2, A2p = act_function(Z2, "sigmoid")
# Para la capa 3
Z3 = l3.W.T @ A2 + l3.B
A3, A3p = act_function(Z3, "sigmoid")
# Para la capa 3
Z4 = l4.W.T @ A3 + l4.B
A4, A4p = act_function(Z4, "sigmoid")
A0 = xtrain_.T
A, nn = forward_pass(A0, nn_red)
Determinación de la Función de coste¶
$$-\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{L}\right)) \tag{7}$$
Backward Propagation¶
Para una capa $l$ arbitraria tenemos que:
- (82)¶\[\begin{equation} d\Theta^{[l]} = d{\cal Z}^{(i)[l]} A^{(i)[l-1]} = dA^{(i)[l]} f'({\cal Z}^{(i)[l]} ) A^{(i)[l-1]} \end{equation}\]
- (83)¶\[\begin{equation} db^{[l]} = d{\cal Z}^{(i)[L]} = dA^{(i)[l]} f'({\cal Z}^{(i)[l]} ) \end{equation}\]
Los valores de dA pueden ser escritos como:
- (84)¶\[\begin{equation} dA^{(i)[l-1]} = \Theta^{l} dZ^{(i)[l]} \end{equation}\]
$dZ^{[l]} = dA^{[l]} * f’^{[l]} (Z^{[l]}) $
$ d\Theta^{[l]} = \frac{\partial \mathcal{J} }{\partial \Theta^{[l]}} = \frac{1}{m} dZ^{[l]} A^{[l-1] T} \tag{1}$ $ db^{[l]} = \frac{\partial \mathcal{J} }{\partial b^{[l]}} = \frac{1}{m} \sum_{i = 1}^{m} dZ^{l}\tag{2}$ $ dA^{[l-1]} = \frac{\partial \mathcal{L} }{\partial A^{[l-1]}} = \theta^{[l] T} dZ^{[l]} \tag{3}$
#def backward_propagation(AL,Y, nn):
# Capa L-esima
dAL = -(np.divide(Y, A) - np.divide(1 - Y, 1 - A))
fp = nn[3].Ap
dZ = dAL*fp
m_ = 1/209
dW_L = m_*dZ@nn[2].Ap.T
db_L = m_*np.sum(dZ, axis=1, keepdims=True)
# Capa L-1
dAL_1 = dAL
fp = nn[2].Ap
dZ_1 = dAL_1*fp
dW_L1 = m_*dZ_1@nn[1].Ap.T
db_L1 = dZ_1.mean()
# Capa L-2
dAL_2 = dAL_1
fp = nn[1].Ap
dZ_2 = dAL_2*fp
dW_L2 = m_*dZ_2@nn[0].Ap.T
db_L2 = dZ_2.mean()
Aplicacion gradiente descendente¶
$$ W^{[l]} = W^{[l]} - \alpha \text{ } dW^{[l]} \tag{16}$$ $$ b^{[l]} = b^{[l]} - \alpha \text{ } db^{[l]} \tag{17}$$
def update_params(nn, learning_rate):
for l in range(1, L):
nn[l].W = nn[l].W - learning_rate*nn[l].dW.T
nn[l].B = nn[l].B - learning_rate*nn[l].db
return nn
nn = backward_propagation(A, Y,nn)
nn = update_params(nn, learning_rate)
A, nn = forward_pass(A0, nn)
J = cost_Function(A, np.array(train_label))