I am implementing the stochastic gradient descent algorithm. I think there is plenty of room for improvement.
def array2onehot(X_shape, array, start=1):
"""
transfer a column to a matrix with each row being a onehot
note that the array index defaults to start with 1 rather than 0
"""
array += 1 - start if start != 1 else 0
onehot = np.zeros(X_shape)
onehot[np.arange(X_shape[0]), array-1] = 1
return onehot
def stochastic_gradient_descent(self, batch, Y):
"""
the batch contains both the X's and targets
Y is the output produced by the forward process using the X's in the batch
"""
ones = np.ones(batch.shape[0])
# add the bias to the X's
X = np.column_stack(ones, batch[:, :-1])
T = array2onehot(X.shape, batch[:, -1])
m = self._batch_size
# batch size is the size of the mini batch
count = np.zeros(self._class_size)
# class size is the amount of classes to predict
delta = np.zeros(self._coefficients.shape)
for i in range(m):
k = batch[i, -1] - 1
count[k] += 1
delta[:, k] += X[i] * sum(np.multiply(T[i], Y[i]))
tmp = self._coefficients
for i in count:
if i == 0:
tmp[:, i] = 0
delta = np.nan_to_num(delta / count) - 2 * self._penalty_rate * tmp
self._coefficients -= delta