I have an optimization issue and I'm not sure if I can improve the overall speed of my function.
The function draw_w
is my actual implementation that gives me the right update for my vector w
. You can see that the code is running for 1000 iterations in 9s.
I had a first implementation (draw_w_fw
) that was quiet good because, well-vectorized, that is an order of magnitude quicker by 0.8s. Unfortunately, this implementation is wrong because the cache[0]
is not updated correctly.
I would like to know if someone knows a way to speed up draw_w
from what I've wrote.
import scipy.sparse as sps
import numpy as np
import timeit
def draw_w(X, w, cache):
X = X.tocsr()
for i in xrange(w.shape[0]):
x_li = X.getrow(i)
Y = w[i] * x_li
Y.data -= np.take(cache[0], Y.indices)
h = x_li.multiply(-Y)
w_mean = h.sum()
w_sigma_sqr = (x_li.multiply(x_li)).sum()
w_sigma_sqr = 1.0 / w_sigma_sqr
w_mean = - w_sigma_sqr * w_mean
# update w:
w_old = np.copy(w[i])
if np.isinf(w[i]):
w[i] = 0
elif np.isnan(w[i]):
w[i] = 0
else:
w[i] = w_mean
# update error:
cache[0] -= (w_old - w[i]) * x_li
#print 'draw_w', w
#True result w [ 2.34125626 2.37726446 4.00792293 3.71059779 4.00792293 0.11100713
# -0.28899287 -0.04393113 0.21429929]
####################
def draw_w_fw(X, w, cache):
x_li = X.tocsr()
nnz_per_row = np.diff(x_li.indptr)
Y = sps.csr_matrix((x_li.data * np.repeat(w, nnz_per_row), x_li.indices, x_li.indptr), shape=x_li.shape)
Y.data -= np.take(cache[0], Y.indices) #not good because cache[0] is updated...
h = x_li.multiply(-Y)
w_mean = np.asarray(h.sum(axis=1).transpose())[0]
w_sigma_sqr = np.asarray(( x_li.multiply(x_li) ).sum(axis=1).transpose())[0]
w_sigma_sqr = 1.0 / w_sigma_sqr
w_mean = - w_sigma_sqr * w_mean
# update w:
w_old = np.copy(w)
w[~np.isnan(w) & ~np.isinf(w)] = w_mean[~np.isnan(w) & ~np.isinf(w)]
w[np.isinf(w)] = 0.0
w[np.isnan(w)] = 0.0
# update error:
cache[0] -= (w_old - w) * x_li
#print 'draw_w_fw', w
##########################
def test():
data = [ 1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.]
cols = [ 0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14]
rows = [0,5,1,5,2,5,3,5,4,5,0,6,1,6,2,6,3,6,4,6,1,7,3,7,0,8,2,8,4,8]
X = sps.coo_matrix((data, (rows, cols)), shape = (9,15))
X = X.tocsr()
w = np.array([ 0.16243454, -0.06117564, -0.05281718, -0.10729686, 0.08654076, -0.23015387, 0.17448118, -0.07612069, 0.03190391])
cache = np.zeros((2, 15))
cache[0] = np.asarray([-5.06771933, -5.29132951, -4.28297104, -1.33745073, -2.14361311, -0.66308429,
-0.88669446, -2.878336, -4.93281569, -4.73897806, -1.13729633, -5.18341755,
-0.80566155, -5.02091327, -4.88155533])
#draw_w(X, w, cache)
draw_w_fw(X, w, cache)
if __name__ == '__main__':
print(timeit.timeit("test()", setup="from __main__ import test", number=1000))
#draw_w: 9.26 for 1000 iterations
#draw_w_fw: 0.80 for 1000 iterations