(continuação da Resposta 1)
Código-fonte em Python, adaptado de Neural Network Dropout Training - James McCaffrey
import numpy as np
import matplotlib.pyplot as plt
import time
np.random.seed(10)
class NeuralNetwork():
def __init__(self, numInput, numHidden, numOutput):
self.numInput = numInput
self.numHidden = numHidden
self.numOutput = numOutput
self.numWeights = (self.numInput * self.numHidden) + (self.numHidden * self.numOutput)
self.numWeights += (self.numHidden + self.numOutput)
self.inputs = [0.0 for i in range(numInput)]
self.ihWeights = [ [0.0 for i in range(numHidden)] for j in range(numInput)]
self.hBiases = [0.0 for i in range(numHidden)]
self.hOutputs =[0.0 for i in range(numHidden)]
self.hoWeights = [ [0.0 for i in range(numOutput)] for j in range(numHidden)]
self.oBiases = [0.0 for i in range(numOutput)]
self.outputs = [0.0 for i in range(numOutput)]
self.InitializeWeights()
def InitializeWeights(self):
initialWeights = []
lo = -0.01
hi = 0.01
for i in range(self.numWeights):
initialWeights.append((hi - lo) * self.NextDouble() + lo)
self.SetWeights(initialWeights)
def SetWeights(self,weights):
k = 0
for i in range(self.numInput):
for j in range(self.numHidden):
self.ihWeights[i][j] = weights[k]
k+=1
for i in range(self.numHidden):
self.hBiases[i] = weights[k]
k+=1
for i in range(self.numHidden):
for j in range(self.numOutput):
self.hoWeights[i][j] = weights[k]
k+=1
for i in range(self.numOutput):
self.oBiases[i] = weights[k]
k+=1
def UpdateWeights(self,tValues,learnRate,dropNodes):
# update the weights and biases using back-propagation
# back-prop related arrays.
hGrads = [0.0 for i in range(self.numHidden)]
oGrads = [0.0 for i in range(self.numOutput)]
# 1. compute output gradients
for k in range(self.numOutput):
derivative = 1
oGrads[k] = derivative * (tValues[k] - self.outputs[k]);
# 2. compute hidden gradients
for j in range(self.numHidden):
if self.IsDropNode(j, dropNodes):
continue
derivative = (1 - self.hOutputs[j]) * (1 + self.hOutputs[j]); # derivative of tanh = (1 - y) * (1 + y)
summ = 0.0
for k in range(self.numOutput): # each hidden delta is the sum of numOutput terms
x = oGrads[k] * self.hoWeights[j][k]
summ += x
hGrads[j] = derivative * summ
# 3. update input-hidden weights and hidden biases
for j in range(self.numHidden):
if self.IsDropNode(j, dropNodes):
continue
for i in range(self.numInput):
delta = learnRate * hGrads[j] * self.inputs[i]
self.ihWeights[i][j] += delta
biasDelta = learnRate * hGrads[j]
self.hBiases[j] += biasDelta
# 4. update hidden-output weights and output biases
for k in range(self.numOutput):
for j in range(self.numHidden):
if self.IsDropNode(j, dropNodes):
continue;
delta = learnRate * oGrads[k] * self.hOutputs[j]
self.hoWeights[j][k] += delta
biasDelta = learnRate * oGrads[k] * 1.0
self.oBiases[k] += biasDelta
def NextDouble(self):
return np.random.uniform(low=-0.1, high=0.1)
def NextInt(self,lower=0,higher=100):
return np.random.randint(low=lower,high=higher)
def NextUniform(self):
return np.random.uniform()
def Train(self, trainData, testData, maxEpochs, learnRate, withDropout):
if withDropout:
print "Dropout"
epoch = 0
totEpoch=0
xValues = [0.0 for i in range(self.numInput)] # inputs
tValues = [0.0 for i in range(self.numOutput)] # target values
sequence = [i for i in range(len(trainData))]
for indEpoch in range(len(maxEpochs)):
lastCost = np.Inf
if indEpoch==0:
theCost = np.empty([maxEpochs[indEpoch], 1])
theEpoch = np.empty([maxEpochs[indEpoch]])
else:
theCost = np.empty([maxEpochs[indEpoch]-maxEpochs[indEpoch-1], 1])
theEpoch = np.empty([maxEpochs[indEpoch]-maxEpochs[indEpoch-1]])
idxTimes=0
while (epoch < maxEpochs[indEpoch]):
self.Shuffle(sequence) # visit each training data in random order
if withDropout:
dropNodes = self.MakeDropNodes()
else:
dropNodes = []
for i in range(len(trainData)):
idx = sequence[i]
xValues=trainData[idx][0:self.numInput]
tValues=trainData[idx][self.numInput:self.numInput+self.numOutput]
self.ComputeOutputs(xValues, dropNodes)
self.UpdateWeights(tValues, learnRate[indEpoch], dropNodes)
# Accuracy test
if withDropout:
hoWeightsCopy = [row[:] for row in self.hoWeights]
for j in range(self.numHidden):
for k in range(self.numOutput):
self.hoWeights[j][k] *= 0.5 # /= 2.0
y_hat, _ = self.Accuracy(testData)
cost = self.calcCost(testData, y_hat)
self.hoWeights=hoWeightsCopy[:] # restore the training weights
else:
y_hat, _ = self.Accuracy(trainData)
cost = self.calcCost(trainData, y_hat)
if (epoch+1)%50==0 or epoch+1==maxEpochs[indEpoch]:
print "Epoch:", epoch+1, " Cost:", cost
if cost > lastCost:
# break
pass
else:
lastCost = cost
theEpoch[idxTimes]=totEpoch
theCost[idxTimes,0]=cost
idxTimes+=1
epoch+=1
totEpoch+=1
# while
fig=plt.figure(num=None, figsize=(6, 5), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.plot(theEpoch, theCost[:, 0]*1, linewidth=2)
ax.set_ylabel('Erro')
ax.set_xlabel('Epoca (lr=' + str(learnRate[indEpoch]) +')')
ax.legend(loc='upper center', fancybox=True, shadow=True)
strTitle = 'Neural Network ' + str(self.numInput) + 'x' + str(self.numHidden)+ "x" + str(self.numOutput)
if withDropout:
strTitle += ' (With Dropout)'
ax.set_title(strTitle)
plt.show()
# for
# divide hidden-output weights by 2.0 to account for dropout
if withDropout:
for j in range(self.numHidden):
for k in range(self.numOutput):
self.hoWeights[j][k] *= 0.5 # /= 2.0
def calcCost(self,trainData, y_hat):
cost=0
for i in range(len(trainData)):
for j in range(self.numOutput):
cost += 0.5 * (trainData[i][self.numInput+j] - y_hat[i][j])**2 # 0.5 makes derivative nicer
return cost
def Shuffle(self,sequence):
for i in range(len(sequence)):
r = self.NextInt(i, len(sequence))
tmp = sequence[r];
sequence[r] = sequence[i]
sequence[i] = tmp
def MakeDropNodes(self):
resultList = []
for i in range(self.numHidden):
p=self.NextUniform()
if (p > 0.50):
resultList.append(i)
if len(resultList) == 0:
resultList.append(self.NextInt(0,self.numHidden))
elif len(resultList) == self.numHidden:
del resultList[self.NextInt(0,self.numHidden)]
return resultList
def IsDropNode(self,node,dropNodes):
if len(dropNodes) == 0:
return False
else:
for i in dropNodes:
if i==node:
return True
return False
def HyperTanFunction(self,x):
if x < -20.0:
return -1.0 # approximation is correct to 30 decimals
elif x > 20.0:
return 1.0
else:
return np.tanh(x)
def ComputeOutputs(self,xValues,dropNodes=[]):
hSums = [0.0 for i in range(self.numHidden)]
oSums = [0.0 for i in range(self.numOutput)]
self.inputs = xValues[:]
for j in range(self.numHidden): # each hidden node
if self.IsDropNode(j, dropNodes):
continue
for i in range(self.numInput):
hSums[j] += self.inputs[i] * self.ihWeights[i][j]
hSums[j] += self.hBiases[j] # add bias
self.hOutputs[j] = self.HyperTanFunction(hSums[j]) # apply activation
for k in range(self.numOutput): # each output node
for j in range(self.numHidden):
if self.IsDropNode(j, dropNodes):
continue # skip
oSums[k] += self.hOutputs[j] * self.hoWeights[j][k]
oSums[k] += self.oBiases[k] # add bias
self.outputs=oSums[:]
retResult = []
retResult = self.outputs[:]
return retResult
def Accuracy(self,testData):
numCorrect = 0
numWrong = 0
xValues = []
tValues = []
yValues = []
y_hat = []
for i in range(len(testData)):
xValues=testData[i][0:self.numInput]
tValues=testData[i][self.numInput:self.numInput+self.numOutput]
yValues = self.ComputeOutputs(xValues)
y_hat.append(yValues)
if yValues==tValues:
numCorrect += 1
else:
numWrong += 1
return (y_hat, (numCorrect * 1.0) / (numCorrect + numWrong) )
def getDataExercAula24(n,comRuido):
# f(x,y) = y.exp[ sin(x)+u, sin(x) * cos(y) + v, sin(x)+cos(y) + w ]
# u,v,w ~ N(0,0.09)
data = [ [] for i in range(n)]
for i in range(n):
x=np.random.uniform(0,2*np.pi)
y=np.random.uniform(0,2*np.pi)
if comRuido:
u=np.random.normal(0, 0.09)
v=np.random.normal(0, 0.09)
w=np.random.normal(0, 0.09)
else:
u,v,w=0,0,0
data[i]=[x, y, y*np.exp(np.sin(x)+u), y*np.exp(np.sin(x)*np.cos(y)+v), y*np.exp(np.sin(x)+np.cos(y)+w)]
return data
def graficoSaida(numSaida,rn,trainData,testData):
fig1=plt.figure(num=None, figsize=(12, 4), dpi=80, facecolor='w', edgecolor='k')
# TrainData
y_hat, trainAcc = rn.Accuracy(trainData)
ax1=plt.subplot(121)
ax1.axis([-0.5, 7, -0.5, 16])
fig1.hold()
for i in range(len(trainData)):
ax1.plot(trainData[i][numSaida],trainData[i][numSaida+2],'bo',markersize=5,markeredgewidth=0)
ax1.plot(trainData[i][numSaida],y_hat[i][numSaida],"ro",markersize=4, markeredgewidth=0)
ax1.set_xlabel('x')
if numSaida==0:
ax1.set_ylabel('y * exp[sin(x)]')
elif numSaida==1:
ax1.set_ylabel('y * exp[sin(x)*cos(y)]')
elif numSaida==2:
ax1.set_ylabel('y * exp[sin(x)+cos(y)]')
ax1.axis([-0.5, 10, -0.5, 22])
ax1.set_title('Treinamento - Saida ' + str(numSaida+1))
# Validação
y_hat, testAcc = rn.Accuracy(testData)
ax2=plt.subplot(122)
ax2.axis([-0.5, 7, -0.5, 16])
fig1.hold()
for i in range(len(testData)):
ax2.plot(testData[i][numSaida],testData[i][numSaida+2],'bo',markersize=5,markeredgewidth=0)
ax2.plot(testData[i][numSaida],y_hat[i][numSaida],"ro",markersize=4, markeredgewidth=0)
ax2.set_title('Validacao')
if numSaida==2:
ax2.axis([-0.5, 10, -0.5, 22])
def redeNeuralExercAula24(trainData, testData):
numInput,numHidden,numOutput = 2,35,3
nn = NeuralNetwork(numInput, numHidden, numOutput)
maxEpochs = [3000 , 10000 , 20000]
learnRate = [0.005, 0.0005 , 0.00005]
nn.Train(trainData, testData, maxEpochs, learnRate, withDropout=True)
print "Neural Network ", numInput, "x", numHidden, "x", numOutput
print "Setting maxEpochs = ", maxEpochs, " learnRate = ", str (learnRate)
return nn
if __name__ == "__main__":
trainData = getDataExercAula24(80,comRuido=True)
testData = getDataExercAula24(20,comRuido=False)
rn=redeNeuralExercAula24(trainData, testData)
graficoSaida(0,rn,trainData,testData)
graficoSaida(1,rn,trainData,testData)
graficoSaida(2,rn,trainData,testData)