Revisions to Fastest way to iterate over Numpy array

Correction

Source Link

edited Jan 28, 2014 at 8:54

445
1
3
12

@cython.profile(False)
@cython.boundscheck(False)
@cython.nonecheck(False)
cdef unsigned int countlower(double* v1, double* v2, int n1, int n2):
    ''' Function output corresponds to np.bincount(v1 < v2)[1]'''
    ''' The upper is not correct. It rather corresponds to
    sum([np.bincount(v1[i] < v2)[1] for i in range(len(v1))])'''
    cdef unsigned int trues = 0

    cdef Py_ssize_t i, j
    with nogil, parallel():
        for i in prange(n1):
            for j in prange(n2):
                if v1[i] < v2[j]:
                    trues += 1
    return trues

@cython.profile(False)
@cython.boundscheck(False)
@cython.nonecheck(False)
cdef unsigned int countlower(double* v1, double* v2, int n1, int n2):
    ''' Function output corresponds to np.bincount(v1 < v2)[1]'''
    cdef unsigned int trues = 0

    cdef Py_ssize_t i, j
    with nogil, parallel():
        for i in prange(n1):
            for j in prange(n2):
                if v1[i] < v2[j]:
                    trues += 1
    return trues

@cython.profile(False)
@cython.boundscheck(False)
@cython.nonecheck(False)
cdef unsigned int countlower(double* v1, double* v2, int n1, int n2):
    ''' Function output corresponds to np.bincount(v1 < v2)[1]'''
    ''' The upper is not correct. It rather corresponds to
    sum([np.bincount(v1[i] < v2)[1] for i in range(len(v1))])'''
    cdef unsigned int trues = 0

    cdef Py_ssize_t i, j
    with nogil, parallel():
        for i in prange(n1):
            for j in prange(n2):
                if v1[i] < v2[j]:
                    trues += 1
    return trues

Added new version of function

Source Link

edited Jan 12, 2014 at 11:34

embert

445
1
3
12

Edit1: Passing just the pointers, instead of the arrays to the time-critical function (>99% of time is spent there) made a ~ 10% speed-up. I guess some things just cannot be made faster

@cython.profile(False)
@cython.boundscheck(False)
@cython.nonecheck(False)
cdef unsigned int countlower(double* v1, double* v2, int n1, int n2):
    ''' Function output corresponds to np.bincount(v1 < v2)[1]'''
    cdef unsigned int trues = 0

    cdef Py_ssize_t i, j
    with nogil, parallel():
        for i in prange(n1):
            for j in prange(n2):
                if v1[i] < v2[j]:
                    trues += 1
    return trues

Edit1: Passing just the pointers, instead of the arrays to the time-critical function (>99% of time is spent there) made a ~ 10% speed-up. I guess some things just cannot be made faster

@cython.profile(False)
@cython.boundscheck(False)
@cython.nonecheck(False)
cdef unsigned int countlower(double* v1, double* v2, int n1, int n2):
    ''' Function output corresponds to np.bincount(v1 < v2)[1]'''
    cdef unsigned int trues = 0

    cdef Py_ssize_t i, j
    with nogil, parallel():
        for i in prange(n1):
            for j in prange(n2):
                if v1[i] < v2[j]:
                    trues += 1
    return trues

added 94 characters in body

Source Link

edited Jan 7, 2014 at 18:46

embert

445
1
3
12

# cython: profile=True
import cython

import numpy as np
cimport numpy as np

from scipy.spatial.distance import squareform

DTYPE = np.float
DTYPEint = np.int

ctypedef np.float_t DTYPE_t
ctypedef np.int_t DTYPEint_t

@cython.profile(False)
cdef unsigned int countlower(np.ndarray[DTYPE_t, ndim=1] vec1,
                             np.ndarray[DTYPE_t, ndim=1] vec2,
                             int n1, int n2):
    # Function output corresponds to np.bincount(v1 < v2)[1]

    assert vec1.dtype == DTYPE and vec2.dtype == DTYPE

    cdef unsigned int i, j
    cdef unsigned int trues = 0
    cdef unsigned int* pointer1 = <unsigned int*> vec1.data
    cdef unsigned int* pointer2 = <unsigned int*> vec2.data

    for i in range(n1):
        for j in range(n2):
            if pointer1[i] < pointer2[j]:
                trues += 1

    return trues


def gamma(np.ndarray[DTYPE_t, ndim=2] Y, np.ndarray[DTYPEint_t, ndim=1] part):
    assert Y.dtype == DTYPE and part.dtype == DTYPEint

    if len(Y) != len(part):
        raise ValueError('Distance matrix and partition must have same shape')

    # defined locals
    cdef unsigned int K, c_label, n_, trues
    cdef unsigned int s_plus = 0
    cdef unsigned int s_minus = 0

    # assigned locals
    cdef np.ndarray n_in_ci = np.bincount(part)
    cdef int num_clust = len(n_in_ci) - 1
    cdef np.ndarray s = np.zeros(len(Y), dtype=DTYPE)

    # Partition should have at least two clusters
    K = len(set(part))
    if K < 2:
        return 0
    # Loop through clusters
    for c_label in range(1, K+1):
        dist_within = squareform(Y[part == c_label][:, part == c_label])
        dist_between = np.ravel(Y[part == c_label][:, part != c_label])
        n1 = len(dist_within)
        n2 = len(dist_between)

        trues = countlower(dist_within, dist_between, n1, n2)
        s_plus += trues
        s_minus += n1 * n2 - trues

    n_ =  s_plus + s_minus

    return (<double>s_plus - <double>s_minus) / <double>n_ if n_ != 0 else 0

# cython: profile=True
import cython

import numpy as np
cimport numpy as np

from scipy.spatial.distance import squareform

DTYPE = np.float
DTYPEint = np.int

ctypedef np.float_t DTYPE_t
ctypedef np.int_t DTYPEint_t

@cython.profile(False)
cdef unsigned int countlower(np.ndarray[DTYPE_t, ndim=1] vec1,
                 np.ndarray[DTYPE_t, ndim=1] vec2,
                 int n1, int n2):
    assert vec1.dtype == DTYPE and vec2.dtype == DTYPE

    cdef unsigned int i, j
    cdef unsigned int trues = 0
    cdef unsigned int* pointer1 = <unsigned int*> vec1.data
    cdef unsigned int* pointer2 = <unsigned int*> vec2.data

    for i in range(n1):
        for j in range(n2):
            if pointer1[i] < pointer2[j]:
                trues += 1

    return trues


def gamma(np.ndarray[DTYPE_t, ndim=2] Y, np.ndarray[DTYPEint_t, ndim=1] part):
    assert Y.dtype == DTYPE and part.dtype == DTYPEint

    if len(Y) != len(part):
        raise ValueError('Distance matrix and partition must have same shape')

    # defined locals
    cdef unsigned int K, c_label, n_, trues
    cdef unsigned int s_plus = 0
    cdef unsigned int s_minus = 0

    # assigned locals
    cdef np.ndarray n_in_ci = np.bincount(part)
    cdef int num_clust = len(n_in_ci) - 1
    cdef np.ndarray s = np.zeros(len(Y), dtype=DTYPE)

    # Partition should have at least two clusters
    K = len(set(part))
    if K < 2:
        return 0
    # Loop through clusters
    for c_label in range(1, K+1):
        dist_within = squareform(Y[part == c_label][:, part == c_label])
        dist_between = np.ravel(Y[part == c_label][:, part != c_label])
        n1 = len(dist_within)
        n2 = len(dist_between)

        trues = countlower(dist_within, dist_between, n1, n2)
        s_plus += trues
        s_minus += n1 * n2 - trues

    n_ =  s_plus + s_minus

    return (<double>s_plus - <double>s_minus) / <double>n_ if n_ != 0 else 0

# cython: profile=True
import cython

import numpy as np
cimport numpy as np

from scipy.spatial.distance import squareform

DTYPE = np.float
DTYPEint = np.int

ctypedef np.float_t DTYPE_t
ctypedef np.int_t DTYPEint_t

@cython.profile(False)
cdef unsigned int countlower(np.ndarray[DTYPE_t, ndim=1] vec1,
                             np.ndarray[DTYPE_t, ndim=1] vec2,
                             int n1, int n2):
    # Function output corresponds to np.bincount(v1 < v2)[1]

    assert vec1.dtype == DTYPE and vec2.dtype == DTYPE

    cdef unsigned int i, j
    cdef unsigned int trues = 0
    cdef unsigned int* pointer1 = <unsigned int*> vec1.data
    cdef unsigned int* pointer2 = <unsigned int*> vec2.data

    for i in range(n1):
        for j in range(n2):
            if pointer1[i] < pointer2[j]:
                trues += 1

    return trues


def gamma(np.ndarray[DTYPE_t, ndim=2] Y, np.ndarray[DTYPEint_t, ndim=1] part):
    assert Y.dtype == DTYPE and part.dtype == DTYPEint

    if len(Y) != len(part):
        raise ValueError('Distance matrix and partition must have same shape')

    # defined locals
    cdef unsigned int K, c_label, n_, trues
    cdef unsigned int s_plus = 0
    cdef unsigned int s_minus = 0

    # assigned locals
    cdef np.ndarray n_in_ci = np.bincount(part)
    cdef int num_clust = len(n_in_ci) - 1
    cdef np.ndarray s = np.zeros(len(Y), dtype=DTYPE)

    # Partition should have at least two clusters
    K = len(set(part))
    if K < 2:
        return 0
    # Loop through clusters
    for c_label in range(1, K+1):
        dist_within = squareform(Y[part == c_label][:, part == c_label])
        dist_between = np.ravel(Y[part == c_label][:, part != c_label])
        n1 = len(dist_within)
        n2 = len(dist_between)

        trues = countlower(dist_within, dist_between, n1, n2)
        s_plus += trues
        s_minus += n1 * n2 - trues

    n_ =  s_plus + s_minus

    return (<double>s_plus - <double>s_minus) / <double>n_ if n_ != 0 else 0

Tweeted twitter.com/#!/StackCodeReview/status/419645872792956928

occurred Jan 5, 2014 at 1:45

deleted 36 characters in body

Source Link

edited Jan 4, 2014 at 18:38

embert

445
1
3
12

Loading

edited tags; edited title

Link

edited Jan 4, 2014 at 18:34

Jamal

35.2k
13
134
238

Loading

added 3 characters in body

Source Link

edited Jan 4, 2014 at 18:12

embert

445
1
3
12

Loading

edited tags

Source Link

edited Jan 4, 2014 at 17:41

embert

445
1
3
12

Loading

Source Link

asked Jan 4, 2014 at 17:33

embert

445
1
3
12

Loading

Stack Exchange Network

Return to Question