r"""
.. versionadded:: 0.3
In this module is stored everything related to Multi-layer perceptron (MLP).
This neural network can be used for classification and regression.
Minimal Working Example
************************
.. code-block:: python
import numpy as np
import padasip as pa
# data creation
x = np.array([
[0,0,0,0], [1,0,0,0], [0,1,0,0], [1,1,0,0],
[0,0,1,0], [1,0,1,0], [0,1,1,0], [1,1,1,0],
[0,0,0,1], [1,0,0,1], [0,1,0,1], [1,1,0,1],
[0,0,1,1], [1,0,1,1], [0,1,1,1], [1,1,1,1]
])
d = np.array([0,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0])
N = len(d)
n = 4
# creation of neural network
nn = pa.ann.NetworkMLP([5,6], n, outputs=1, activation="tanh", mu="auto")
# training
e, mse = nn.train(x, d, epochs=200, shuffle=True)
# get results
y = nn.run(x)
And the result (pairs: target, output) can look like
>>> for i in zip(d, y): print i
...
(0, 0.0032477183193071906)
(1, 1.0058082383308447)
(1, 1.0047503447788306)
(0, 0.0046026142618665845)
(0, 0.0003037425037410007)
(1, 1.0017672193832869)
(0, 0.0015817734995124679)
(0, 0.0019115885715706904)
(1, 0.99342117275580499)
(0, 0.00069114178424850147)
(1, 1.0021789943501729)
(0, 0.0021355836851727717)
(1, 0.99809312951378826)
(1, 1.0071488717506856)
(1, 1.0067500768423701)
(0, -0.0045962250501771244)
>>>
Learning Rate Selection
**************************
If you select the learning rate (:math:`\mu` in equations,
or `mu` in code) manually, it will be used the same value for all nodes,
otherwise it is selected automatically :cite:`lecun2012efficient` as follows
:math:`\mu_{ij} = m^{-0.5}`
where the :math:`m` is the amount of nodes on input of given node.
The automatic selection is recomended and default option.
Default Values of Weights
****************************
The distribution from what the weights are taken is chosen automatically
:cite:`lecun2012efficient`, it has zero mean and
the standard derivation estimated as follows
:math:`\sigma_{w} = m^{-0.5}`
where the :math:`m` is the amount of nodes on input of given node.
References
***************
.. bibliography:: mlp.bib
:style: plain
Code Explanation
******************
"""
import numpy as np
[docs]class Layer():
"""
This class represents a single hidden layer of the MLP.
Args:
* `n_layer` : size of the layer (int)
* `n_input` : how many inputs the layer have (int)
* `activation_f` : what function should be used as activation function (str)
* `mu` : learning rate (float or str), it can be directly the float value,
or string `auto` for automatic selection of learning rate
:cite:`lecun2012efficient`
"""
def __init__(self, n_layer, n_input, activation_f, mu):
sigma = n_input**(-0.5)
if mu == "auto":
self.mu = sigma
else:
self.mu = mu
self.n_input = n_input
self.w = np.random.normal(0, sigma, (n_layer, n_input+1))
self.x = np.ones(n_input+1)
self.y = np.zeros(n_input+1)
self.f = activation_f
[docs] def activation(self, x, f="sigmoid", der=False):
"""
This function process values of layer outputs with activation function.
**Args:**
* `x` : array to process (1-dimensional array)
**Kwargs:**
* `f` : activation function
* `der` : normal output, or its derivation (bool)
**Returns:**
* values processed with activation function (1-dimensional array)
"""
if f == "sigmoid":
if der:
return x * (1 - x)
return 1. / (1 + np.exp(-x))
elif f == "tanh":
if der:
return 1 - x**2
return (2. / (1 + np.exp(-2*x))) - 1
[docs] def predict(self, x):
"""
This function make forward pass through this layer (no update).
**Args:**
* `x` : input vector (1-dimensional array)
**Returns:**
* `y` : output of MLP (float or 1-diemnsional array).
Size depends on number of nodes in this layer.
"""
self.x[1:] = x
self.y = self.activation(np.sum(self.w*self.x, axis=1), f=self.f)
return self.y
[docs] def update(self, w, e):
"""
This function make update according provided target
and the last used input vector.
**Args:**
* `d` : target (float or 1-dimensional array).
Size depends on number of MLP outputs.
**Returns:**
* `w` : weights of the layers (2-dimensional layer).
Every row represents one node.
* `e` : error used for update (float or 1-diemnsional array).
Size correspond to size of input `d`.
"""
if len(w.shape) == 1:
e = self.activation(self.y, f=self.f, der=True) * e * w
dw = self.mu * np.outer(e, self.x)
else:
e = self.activation(self.y, f=self.f, der=True) * (1 - self.y) * np.dot(e, w)
dw = self.mu * np.outer(e, self.x)
w = self.w[:,1:]
self.w += dw
return w, e
[docs]class NetworkMLP():
"""
This class represents a Multi-layer Perceptron neural network.
*Args:**
* `layers` : array describing hidden layers of network
(1-dimensional array of integers). Every number in array represents
one hidden layer. For example [3, 6, 2] create
network with three hidden layers. First layer will have 3 nodes,
second layer will have 6 nodes and the last hidden layer
will have 2 nodes.
* `n_input` : number of network inputs (int).
**Kwargs:**
* `outputs` : number of network outputs (int). Default is 1.
* `activation` : activation function (str)
* "sigmoid" - sigmoid
* "tanh" : hyperbolic tangens
* `mu` : learning rate (float or str), it can be:
* float value - value is directly used as `mu`
* "auto" - this will trigger automatic selection of learning rate
according to :cite:`lecun2012efficient`
"""
def __init__(self, layers, n_input, outputs=1, activation="sigmoid", mu="auto"):
sigma = layers[-1]**(-0.5)
# set learning rate
if mu == "auto":
self.mu = sigma
else:
try:
param = float(mu)
except:
raise ValueError(
'Parameter mu is not float or similar'
)
self.mu = mu
self.n_input = n_input
# create output layer
self.outputs = outputs
if self.outputs == 1:
self.w = np.random.normal(0, sigma, layers[-1]+1)
else:
self.w = np.random.normal(0, sigma, (outputs, layers[-1]+1))
self.x = np.ones(layers[-1]+1)
self.y = 0
# create hidden layers
self.n_layers = len(layers)
self.layers = []
for n in range(self.n_layers):
if n == 0:
l = Layer(layers[n], n_input, activation, mu)
self.layers.append(l)
else:
l = Layer(layers[n], layers[n-1], activation, mu)
self.layers.append(l)
[docs] def train(self, x, d, epochs=10, shuffle=False):
"""
Function for batch training of MLP.
**Args:**
* `x` : input array (2-dimensional array).
Every row represents one input vector (features).
* `d` : input array (n-dimensional array).
Every row represents target for one input vector.
Target can be one or more values (in case of multiple outputs).
**Kwargs:**
* `epochs` : amount of epochs (int). That means how many times
the MLP will iterate over the passed set of data (`x`, `d`).
* `shuffle` : if true, the order of inputs and outpust are shuffled (bool).
That means the pairs input-output are in different order in every epoch.
**Returns:**
* `e`: output vector (m-dimensional array). Every row represents
error (or errors) for an input and output in given epoch.
The size of this array is length of provided data times
amount of epochs (`N*epochs`).
* `MSE` : mean squared error (1-dimensional array). Every value
stands for MSE of one epoch.
"""
# measure the data and check if the dimmension agree
N = len(x)
if not len(d) == N:
raise ValueError('The length of vector d and matrix x must agree.')
if not len(x[0]) == self.n_input:
raise ValueError('The number of network inputs is not correct.')
if self.outputs == 1:
if not len(d.shape) == 1:
raise ValueError('For one output MLP the d must have one dimension')
else:
if not d.shape[1] == self.outputs:
raise ValueError('The number of outputs must agree with number of columns in d')
try:
x = np.array(x)
d = np.array(d)
except:
raise ValueError('Impossible to convert x or d to a numpy array')
# create empty arrays
if self.outputs == 1:
e = np.zeros(epochs*N)
else:
e = np.zeros((epochs*N, self.outputs))
MSE = np.zeros(epochs)
# shuffle data if demanded
if shuffle:
randomize = np.arange(len(x))
np.random.shuffle(randomize)
x = x[randomize]
d = d[randomize]
# adaptation loop
for epoch in range(epochs):
for k in range(N):
self.predict(x[k])
e[(epoch*N)+k] = self.update(d[k])
MSE[epoch] = np.sum(e[epoch*N:(epoch+1)*N-1]**2) / N
return e, MSE
[docs] def run(self, x):
"""
Function for batch usage of already trained and tested MLP.
**Args:**
* `x` : input array (2-dimensional array).
Every row represents one input vector (features).
**Returns:**
* `y`: output vector (n-dimensional array). Every row represents
output (outputs) for an input vector.
"""
# measure the data and check if the dimmension agree
try:
x = np.array(x)
except:
raise ValueError('Impossible to convert x to a numpy array')
N = len(x)
# create empty arrays
if self.outputs == 1:
y = np.zeros(N)
else:
y = np.zeros((N, self.outputs))
# predict data in loop
for k in range(N):
y[k] = self.predict(x[k])
return y
[docs] def test(self, x, d):
"""
Function for batch test of already trained MLP.
**Args:**
* `x` : input array (2-dimensional array).
Every row represents one input vector (features).
* `d` : input array (n-dimensional array).
Every row represents target for one input vector.
Target can be one or more values (in case of multiple outputs).
**Returns:**
* `e`: output vector (n-dimensional array). Every row represents
error (or errors) for an input and output.
"""
# measure the data and check if the dimmension agree
N = len(x)
if not len(d) == N:
raise ValueError('The length of vector d and matrix x must agree.')
if not len(x[0]) == self.n_input:
raise ValueError('The number of network inputs is not correct.')
if self.outputs == 1:
if not len(d.shape) == 1:
raise ValueError('For one output MLP the d must have one dimension')
else:
if not d.shape[1] == self.outputs:
raise ValueError('The number of outputs must agree with number of columns in d')
try:
x = np.array(x)
d = np.array(d)
except:
raise ValueError('Impossible to convert x or d to a numpy array')
# create empty arrays
if self.outputs == 1:
y = np.zeros(N)
else:
y = np.zeros((N, self.outputs))
# measure in loop
for k in range(N):
y[k] = self.predict(x[k])
return d - y
[docs] def predict(self, x):
"""
This function make forward pass through MLP (no update).
**Args:**
* `x` : input vector (1-dimensional array)
**Returns:**
* `y` : output of MLP (float or 1-diemnsional array).
Size depends on number of MLP outputs.
"""
# forward pass to hidden layers
for l in self.layers:
x = l.predict(x)
self.x[1:] = x
# forward pass to output layer
if self.outputs == 1:
self.y = np.dot(self.w, self.x)
else:
self.y = np.sum(self.w*self.x, axis=1)
return self.y
[docs] def update(self, d):
"""
This function make update according provided target
and the last used input vector.
**Args:**
* `d` : target (float or 1-dimensional array).
Size depends on number of MLP outputs.
**Returns:**
* `e` : error used for update (float or 1-diemnsional array).
Size correspond to size of input `d`.
"""
# update output layer
e = d - self.y
error = np.copy(e)
if self.outputs == 1:
dw = self.mu * e * self.x
w = np.copy(self.w)[1:]
else:
dw = self.mu * np.outer(e, self.x)
w = np.copy(self.w)[:,1:]
self.w += dw
# update hidden layers
for l in reversed(self.layers):
w, e = l.update(w, e)
return error