Exploracion de la data
Contents
import pandas as pd
import scipy as sc
import numpy as np
import h5py
import matplotlib.pylab as plt
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Input In [1], in <cell line: 4>()
2 import scipy as sc
3 import numpy as np
----> 4 import h5py
5 import matplotlib.pylab as plt
ModuleNotFoundError: No module named 'h5py'
from google.colab import drive
drive.mount('/content/gdrive')
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Exploracion de la data¶
data_train= "/content/train_catvnoncat.h5"
train_dataset = h5py.File(data_train, "r")
data_test= "/content/test_catvnoncat.h5"
test_dataset = h5py.File(data_test, "r")
train_dataset.keys()
<KeysViewHDF5 ['list_classes', 'train_set_x', 'train_set_y']>
# Read the data
xtrain_classes, xtrain, train_label =\
train_dataset["list_classes"],train_dataset["train_set_x"],train_dataset["train_set_y"]
test_classes, xtest,test_label =\
test_dataset["list_classes"],test_dataset["test_set_x"],test_dataset["test_set_y"]
plt.imshow(xtrain[0])
<matplotlib.image.AxesImage at 0x7f3abe9adcd0>
xtrain_= np.reshape(xtrain,(209, 64*64*3))/255
xtest_ = np.reshape(xtest,(50, 64*64*3))/255
np.shape(xtrain_)
(209, 12288)
Construyamos una red neuronal con una capa de entrada, una capa de salida con una red y L-1 redes ocultas.
Con m datos de entrenamientos.¶
Para $m$ datos de entrenamiento, las expresión anteriores pueden ser resumidas en las siguientes ecuaciones
Escrito de una formas mas compacta tenemos que:
Aplicando la funcion de activación:
Las dimensiones de las expresiones anteriores, pueden ser resumidas en lo siguiente:
$\mathrm{dim(\vec{\cal{Z}}^{[l]})}=n^{[l]}\times m $
$\mathrm{dim(\vec{\Theta}^{[l]})}=n^{[l]}\times n^{[l-1]}$
$\mathrm{dim(\vec{\cal{A}}^{[l]})}=n^{[l-1]}\times m $
$\mathrm{dim(\vec{b}^{[l]})}=n^{[l]}$
Topologia de la red¶
Topology = [n_x, n_h1, n_h2, n_h3, …,n_y]
n_x = 12288# -- size of the input layer
#n_h = # -- size of the hidden layer
n_y = 1# -- size of the output layer
class layer_nn():
def __init__(self,act_fun, n_layer_present, n_layer_before ):
self.W = 2*np.random.random((n_layer_before,n_layer_present)) - 1
self.B = 2*np.random.random((n_layer_present,1)) - 1
self.act_fun = act_fun
def output(self, Z,A, Ap):
self.Z = Z
self.A = A
self.Ap = Ap
def derivates(self, dW, db):
self.dW = dW
self.db = db
def act_function(x, activation):
if activation == "sigmoid":
f = lambda x: 1/(1+np.exp(-x))
fp = f(x)*(1-f(x))
elif activation == "tanh":
f = lambda x: np.tanh
return f(x), fp
layer = layer_nn("hola, ",1,1)
layer.W
array([[0.18083188]])
topology = [n_x, 3, 4, 6 ,1]
l1 = layer_nn("sigmoid", 3, n_x )
l2 = layer_nn("sigmoid", 4, 3 )
l3 = layer_nn("sigmoid", 6, 4 )
l4 = layer_nn("sigmoid", 1, 6 )
nn_red = [l1,l2,l3,l4]
# Generalizar la topologia de la red
# Forward pass y check dimensiones
A0 = xtrain_.T
np.shape(A0)
(12288, 209)
np.shape(l1.W.T)
(3, 12288)
# Hyperplanos de salida de la capa 1
Z1 = l1.W.T @ A0 +l1.B
# Funciones sigmoide de la capa 1
A1, A1p = act_function(Z1, "sigmoid")
np.shape(A1)
(3, 209)
np.shape(l2.W.T)
(4, 3)
# Para la capa 2
Z2 = l2.W.T @ A1 +l2.B
A2, A2p = act_function(Z2, "sigmoid")
# Para la capa 3
Z3 = l3.W.T @ A2 + l3.B
A3, A3p = act_function(Z3, "sigmoid")
# Para la capa 3
Z4 = l4.W.T @ A3 + l4.B
A4, A4p = act_function(Z4, "sigmoid")
A0 = xtrain_.T
A, nn = forward_pass(A0, nn_red)
Determinación de la Función de coste¶
$$-\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{L}\right)) \tag{7}$$
Backward Propagation¶
Para una capa $l$ arbitraria tenemos que:
- (82)¶\[\begin{equation} d\Theta^{[l]} = d{\cal Z}^{(i)[l]} A^{(i)[l-1]} = dA^{(i)[l]} f'({\cal Z}^{(i)[l]} ) A^{(i)[l-1]} \end{equation}\]
- (83)¶\[\begin{equation} db^{[l]} = d{\cal Z}^{(i)[L]} = dA^{(i)[l]} f'({\cal Z}^{(i)[l]} ) \end{equation}\]
Los valores de dA pueden ser escritos como:
- (84)¶\[\begin{equation} dA^{(i)[l-1]} = \Theta^{l} dZ^{(i)[l]} \end{equation}\]
$dZ^{[l]} = dA^{[l]} * f’^{[l]} (Z^{[l]}) $
$ d\Theta^{[l]} = \frac{\partial \mathcal{J} }{\partial \Theta^{[l]}} = \frac{1}{m} dZ^{[l]} A^{[l-1] T} \tag{1}$ $ db^{[l]} = \frac{\partial \mathcal{J} }{\partial b^{[l]}} = \frac{1}{m} \sum_{i = 1}^{m} dZ^{l}\tag{2}$ $ dA^{[l-1]} = \frac{\partial \mathcal{L} }{\partial A^{[l-1]}} = \theta^{[l] T} dZ^{[l]} \tag{3}$
#def backward_propagation(AL,Y, nn):
# Capa L-esima
dAL = -(np.divide(Y, A) - np.divide(1 - Y, 1 - A))
fp = nn[3].Ap
dZ = dAL*fp
m_ = 1/209
dW_L = m_*dZ@nn[2].Ap.T
db_L = m_*np.sum(dZ, axis=1, keepdims=True)
nn[3].dW=dW_L
nn[3].db=db_L
# Capa L-1
dAL_1 = dAL
fp = nn[2].Ap
dZ_1 = dAL_1*fp
dW_L1 = m_*dZ_1@nn[1].Ap.T
db_L1 = dZ_1.mean()
nn[2].dW=dW_L1
nn[2].db=db_L1
# Capa L-2
dAL_2 = dAL_1
fp = nn[1].Ap
dZ_2 = dAL_2*fp
dW_L2 = m_*dZ_2@nn[0].Ap.T
db_L2 = dZ_2.mean()
nn[1].dW=dW_L2
nn[1].db=db_L2
print(np.shape(nn[1].dW))
print(np.shape(nn[1].W))
(4, 3)
(3, 4)
Aplicacion gradiente descendente¶
$$ W^{[l]} = W^{[l]} - \alpha \text{ } dW^{[l]} \tag{16}$$ $$ b^{[l]} = b^{[l]} - \alpha \text{ } db^{[l]} \tag{17}$$
def update_params(nn, learning_rate):
L=len(nn)
for l in range(1, L):
nn[l].W = nn[l].W - learning_rate*nn[l].dW.T
nn[l].B = nn[l].B - learning_rate*nn[l].db
return nn
learning_rate=0.1
nn = backward_propagation(A, Y,nn)
nn = update_params(nn, learning_rate)
A, nn = forward_pass(A0, nn)
J = cost_Function(A, np.array(train_label))