How to fix exit code while using Sparse_dot_top_n?

0

I'm trying to do matching between 2 datasets. For that, I use the sparse_dot_top_n function from ing (https://github.com/ing-bank/sparse_dot_topn). When I try using it I get an exit code. Do you guys have any idea why ?

from scipy.sparse import csr_matrix
import sparse_dot_topn.sparse_dot_topn as ct
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

a = {'id': [1, 2, 3], 'exp': ['aa ble', 'bb qertt', 'cc qpoer']}
b = {'id': [10, 11, 12, 13], 'exp': ['aa nmr qemr', 'bcb gerrr', 'ccc qrtggd', 'acb glr']}

df1 = pd.DataFrame(data=a)
df2 = pd.DataFrame(data=b) 

vectorizer = tfidfVectorizer()
vec1 = vectorizer.fit_transform(df1.exp)
vec2 = vectorizer.fit_transform(df2.exp)

def awesome_cossim_top(A, B, ntop, lower_bound=0):
    # force A and B as a CSR matrix.
    # If they have already been CSR, there is no overhead
    A = A.tocsr()
    B = B.tocsr()
    M, _ = A.shape
    _, N = B.shape

    idx_dtype = np.int32

    nnz_max = M * ntop

    indptr = np.zeros(M + 1, dtype=idx_dtype)
    indices = np.zeros(nnz_max, dtype=idx_dtype)
    data = np.zeros(nnz_max, dtype=A.dtype)

    ct.sparse_dot_topn(
        M, N, np.asarray(A.indptr, dtype=idx_dtype),
        np.asarray(A.indices, dtype=idx_dtype),
        A.data,
        np.asarray(B.indptr, dtype=idx_dtype),
        np.asarray(B.indices, dtype=idx_dtype),
        B.data,
        ntop,
        lower_bound,
        indptr, indices, data)

    return csr_matrix((data, indices, indptr), shape=(M, N))

matches = awesome_cossim_top(vec1, vec2, 2, 0.4)

I get this Process finished with exit code -1073741819 (0xC0000005)

python
pandas
numpy
scipy
sparse-matrix
asked on Stack Overflow Jun 3, 2019 by Othmane Hassani • edited Jun 4, 2019 by hpaulj

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0