Unhandled exception at 0x00007FFB8367A388 : Microsoft C++ exception: thrust::system::system_error at memory location 0x000000B54DB9EBF0

-2

I have a program with some parts containing C functions and some parts containing kernel that I have used in my C functions ampersands(&) because we want new values to replace the old values in the number of duplicates in the loops. I did not directly use the ampersands(&) in the kernel, but the values that placed to argument C functions using this ampersand (&) copied to the kernel variable also include this link which is related to the program problem

Thrust exception: "thrust::system::system_error at memory location 0x00000000"

but I don't know how to replace new values with old ones without getting this error. could anyone help me?

 void encoder(const vector<double>&x_train, int row_Xtrain, int col_Xtrain,const vector< double> &a_f, int row_af, int col_af, const vector<double>&b_f, vector< double>& H_F, std::string G){
double *x_a = (double*)malloc(row_Xtrain * col_af * sizeof(double));
double *x_a_b = (double*)malloc(row_Xtrain * col_af* sizeof(double));
//
double *d_x_train, *d_a_f, *d_b_f, *d_x_a, *d_x_a_b , *d_H_F;
cudaMalloc((void **)&d_x_train, sizeof(double)* x_train.size());
cudaMalloc((void **)&d_a_f, sizeof(double)* a_f.size());
cudaMalloc((void **)&d_b_f, sizeof(double)*NERUN);
cudaMalloc((void **)&d_x_a, sizeof(double)* row_Xtrain * col_af);
cudaMalloc((void **)&d_x_a_b, sizeof(double)*row_Xtrain * col_af );
cudaMalloc((void **)&d_H_F, sizeof(double)*row_Xtrain * col_af);

// transfer data from host to device
cudaMemcpy(d_x_train, x_train.data(), sizeof(double)* x_train.size(), cudaMemcpyHostToDevice);
cudaMemcpy(d_a_f, a_f.data(), sizeof(double)*  a_f.size(), cudaMemcpyHostToDevice);
cudaMemcpy(d_b_f, b_f.data(), sizeof(double)* NERUN, cudaMemcpyHostToDevice);
//////////////////////////////////////////------------call funcation-----------//////////////////////////////////////////
multi(d_x_train, d_a_f, d_x_a, row_Xtrain, col_Xtrain, row_af, col_af, row_Xtrain, col_af);
//test
cudaMemcpy(x_a, d_x_a, sizeof(double)*NERUN*NERUN, cudaMemcpyDeviceToHost);
SUM(d_x_a, d_b_f, d_x_a_b, row_Xtrain * col_af, row_Xtrain, col_af);
//test
cudaMemcpy(x_a_b, d_x_a_b, sizeof(double)*row_Xtrain * col_af, cudaMemcpyDeviceToHost);
if (G == "sigmoid"){
//sigmoid
Sigmoid(d_x_a_b, d_H_F, row_Xtrain * col_af);
}
if (G == "sinus"){
//sin
sinus(d_x_a_b, d_H_F, row_Xtrain * col_af);
}
/////////////////////////////////////////-----------------end---------------------///////////////////////////////////////
// device 
cudaMemcpy(H_F.data(), d_H_F, sizeof(double)* row_Xtrain * col_af, cudaMemcpyDeviceToHost);
//
cudaFree(d_x_train);
cudaFree(d_a_f);
cudaFree(d_b_f);
cudaFree(d_x_a);
cudaFree(d_x_a_b);
cudaFree(d_H_F);
}

void decoder(const vector<double>&x_train, int row_Xtrain, int col_Xtrain,const  vector<double>&a_f, int row_af, int col_af, const vector<double> &b_f, vector<double> &H_F, vector<double>&HF_sudoinverse, vector<double>&a_n, int row_an, int col_an, vector<double>&b_n, std::string G, int N, int nerun){
int C;
double Max, Min;
//
double h_answer = 0, *d_mean;
cudaMalloc((void**)&d_mean, sizeof(double));
cudaMemcpy(d_mean, &h_answer, sizeof(double), cudaMemcpyHostToDevice);
printf("h_answer: %f \n", h_answer);

//host
double *H_f_T = (double*)malloc(NERUN* row_Data_train * sizeof(double));
double *H_H_T1 = (double*)malloc(NERUN* NERUN * sizeof(double));
double *I = (double*)malloc(NERUN* NERUN * sizeof(double));
double *H_H_T_I = (double*)malloc(NERUN* NERUN * sizeof(double));
//double *HF_sudoinverse = (double*)malloc(NERUN * row_Data_train * sizeof(double));
//vector<double>HF_sudoinverse(NERUN * row_Data_train);
double *x_train_norm = (double*)malloc(row_Xtrain * col_Xtrain * sizeof(double));
double *x_train_n_L = (double*)malloc(row_Xtrain * col_Xtrain* sizeof(double));
//double *a_n = (double*)malloc(NERUN* col_Xtrain * sizeof(double));
double *H_a_n = (double*)malloc(row_Data_train * col_Xtrain * sizeof(double));
double* inv = (double*)malloc(NERUN * NERUN * sizeof(double));

//device
double *d_x_train ,*d_x_train_norm, *d_x_train_n_a, *d_x_train_n_L, *d_H_F, *d_H_f_T,
*d_H_H_T1, *d_H_H_T2, *d_I, *d_H_H_T_I, *d_inv, *d_HF_sudoinverse , *d_a_n , *d_H_a_n;
cudaMalloc((void **)&d_x_train, sizeof(double)* x_train.size());
cudaMalloc((void **)&d_x_train_norm, sizeof(double)* row_Xtrain* col_Xtrain);
cudaMalloc((void **)&d_x_train_n_a, sizeof(double)* row_Xtrain* col_Xtrain);
cudaMalloc((void **)&d_x_train_n_L, sizeof(double)* row_Xtrain* col_Xtrain);
cudaMalloc((void **)&d_H_F, sizeof(double)* H_F.size());
cudaMalloc((void **)&d_H_f_T, sizeof(double)* H_F.size());
cudaMalloc((void **)&d_H_H_T1, sizeof(double)* NERUN* NERUN);
cudaMalloc((void **)&d_H_H_T2, sizeof(double)* row_Data_train * row_Data_train );
cudaMalloc((void **)&d_I, sizeof(double) * NERUN * NERUN);
cudaMalloc((void **)&d_H_H_T_I, sizeof(double) * NERUN * NERUN);
cudaMalloc((void **)&d_inv, sizeof(double) * NERUN * NERUN);
cudaMalloc((void **)&d_HF_sudoinverse, sizeof(double)* NERUN* row_Data_train);
cudaMalloc((void **)&d_a_n, sizeof(double)* NERUN* col_Xtrain);
cudaMalloc((void **)&d_H_a_n, sizeof(double)* row_Data_train* col_Xtrain);

//Transfer Data from Host To Device
//thrust::device_vector<double> ix_train(x_train);
//double* d_x_train = thrust::raw_pointer_cast(&ix_train[0]);
cudaMemcpy(d_x_train, x_train.data(), sizeof(double)* x_train.size(), cudaMemcpyHostToDevice);
//cudaMemcpy(d_x_train, x_train, sizeof(double)* row_Data_train* COL, cudaMemcpyHostToDevice);




//////////////////////////////////////////------------call funcation-----------//////////////////////////////////////////
encoder(x_train, row_Xtrain, col_Xtrain, a_f,row_af, col_af, b_f, H_F, "sigmoid");
// transfer data from host to device
cudaMemcpy(d_H_F, H_F.data(), sizeof(double)* H_F.size(), cudaMemcpyHostToDevice);
Transpose(d_H_f_T, d_H_F, row_Data_train, NERUN);
//test
cudaMemcpy(H_f_T, d_H_f_T, sizeof(double)*NERUN* row_Data_train, cudaMemcpyDeviceToHost);

if (N > nerun){
multi(d_H_f_T, d_H_F, d_H_H_T1, NERUN, row_Data_train, row_Data_train, NERUN, NERUN, NERUN);
//test
cudaMemcpy(H_H_T1, d_H_H_T1, sizeof(double)*NERUN*NERUN, cudaMemcpyDeviceToHost);
unit_matrix_cpu(d_I, NERUN, NERUN);
//test
//cudaMemcpy(I, d_I, sizeof(double)*NERUN*NERUN, cudaMemcpyDeviceToHost);
C = pow(10, 6);
divisional_cpu(C, d_I, NERUN, NERUN);
//test
cudaMemcpy(I, d_I, sizeof(double)*NERUN*NERUN, cudaMemcpyDeviceToHost);
SUM2D_cpu(d_H_H_T1, d_I, d_H_H_T_I, NERUN, NERUN);
//test
cudaMemcpy(H_H_T_I, d_H_H_T_I, sizeof(double)*NERUN*NERUN, cudaMemcpyDeviceToHost);
inv = inverse(H_H_T_I, NERUN);
cudaMemcpy(d_inv, inv, sizeof(double)* NERUN* NERUN, cudaMemcpyHostToDevice);
multi(d_inv, d_H_f_T, d_HF_sudoinverse, NERUN, NERUN, NERUN, row_Data_train, NERUN, row_Data_train);
//test
cudaMemcpy(HF_sudoinverse.data(), d_HF_sudoinverse, sizeof(double)*NERUN * row_Data_train, cudaMemcpyDeviceToHost);
}

if (N < nerun){
multi(d_H_F, d_H_f_T, d_H_H_T2, row_Data_train, NERUN, NERUN, row_Data_train, row_Data_train, row_Data_train );
unit_matrix_cpu(d_I, NERUN, NERUN);
C = pow(10, 6);
divisional_cpu(C, d_I, NERUN, NERUN);
SUM2D_cpu(d_H_H_T2, d_I, d_H_H_T_I, NERUN, NERUN);
inv = inverse(d_H_H_T_I, NERUN);
cudaMemcpy(d_inv, inv, sizeof(double)* NERUN* NERUN, cudaMemcpyHostToDevice);
multi(d_inv, d_H_f_T, d_HF_sudoinverse, NERUN, NERUN, NERUN, row_Data_train, NERUN, row_Data_train);
}

if (G == "sinus"){
MAX_MIN_Matrix(d_x_train, row_Xtrain * col_Xtrain, &Max, &Min);
Norm_alize_cpu(d_x_train, d_x_train_norm, Min, Max, row_Xtrain * col_Xtrain);
arcsinus(d_x_train_norm, d_x_train_n_a, row_Xtrain *col_Xtrain);
multi(d_HF_sudoinverse, d_x_train_n_a, d_a_n, NERUN, row_Data_train, row_Xtrain, col_Xtrain, NERUN, col_Xtrain);
}

if (G == "sigmoid"){
MAX_MIN_Matrix(d_x_train, row_Xtrain * col_Xtrain, &Min, &Max);
//std::cout << "min =" << Min << "\n";
//std::cout << "max =" << Max << "\n";
Norm_alize_cpu(d_x_train, d_x_train_norm, Min, Max, row_Xtrain * col_Xtrain);
//test
cudaMemcpy(x_train_norm, d_x_train_norm, sizeof(double)*row_Xtrain * col_Xtrain, cudaMemcpyDeviceToHost);
negative_log(d_x_train_norm, d_x_train_n_L, row_Xtrain * col_Xtrain);
//test
cudaMemcpy(x_train_n_L, d_x_train_n_L, sizeof(double)*row_Xtrain * col_Xtrain, cudaMemcpyDeviceToHost);
multi(d_HF_sudoinverse, d_x_train_n_L, d_a_n, NERUN, row_Data_train, row_Xtrain, col_Xtrain, NERUN, col_Xtrain);
//test
cudaMemcpy(a_n.data(), d_a_n, sizeof(double)* NERUN * col_Xtrain, cudaMemcpyDeviceToHost);
}
multi(d_H_F, d_a_n, d_H_a_n, row_Data_train, NERUN, NERUN, col_Xtrain, row_Data_train, col_Xtrain);
//test
cudaMemcpy(H_a_n, d_H_a_n, sizeof(double)* row_Data_train * col_Xtrain, cudaMemcpyDeviceToHost);
MSE(d_H_a_n, d_x_train_n_L, row_Data_train * col_Xtrain, d_mean);

cudaMemcpy(&h_answer, d_mean, sizeof(double), cudaMemcpyDeviceToHost);
printf("h_answer: %f \n", h_answer);
double b = sqrt(h_answer);
printf("b: %f \n", b);

for (int i = 0; i < NERUN; i++){
b_n[i] = b;
}
/////////////////////////////////////////-----------------end---------------------///////////////////////////////////////
cudaFree(d_x_train);
cudaFree(d_x_train_norm);
cudaFree(d_x_train_n_a);
cudaFree(d_x_train_n_L);
cudaFree(d_H_F);
cudaFree(d_H_f_T);
cudaFree(d_H_H_T1);
cudaFree(d_H_H_T2);
cudaFree(d_I);
cudaFree(d_H_H_T_I);
cudaFree(d_inv);
cudaFree(d_HF_sudoinverse);
cudaFree(d_a_n);
cudaFree(d_H_a_n);
}

void Autoencoder(int n , int L , vector<double>X_train){
int k = 0 , i=0;

vector<double>a_f(COL* NERUN);
vector<double>b_f(NERUN);
vector<double>H_F(row_Data_train * NERUN);
vector<double>HF_sudoinverse(NERUN * row_Data_train);
vector<double>b_n(NERUN);
vector<double>a_n;
vector<double>a_f_new(NERUN * NERUN );
vector<double>b_f_new(NERUN);


a_f_new = rand_data(NERUN * NERUN);
b_f_new = rand_data(NERUN);

while (k < L){
if (k == 0){
a_f = rand_data(COL_NERUN);
b_f = rand_data(NERUN);
decoder(X_train, row_Data_train, COL, a_f, COL, NERUN, b_f, H_F, HF_sudoinverse, a_n, NERUN, COL, b_n, "sigmoid", 300, 200);
double *d_a_n, *d_a_f;
cudaMalloc((void **)&d_a_n, sizeof(double)* NERUN* COL);
cudaMalloc((void **)&d_a_f, sizeof(double)* NERUN* COL);
cudaMemcpy(d_a_n, a_n.data(), sizeof(double)* NERUN* COL, cudaMemcpyHostToDevice);
Transpose(d_a_f, d_a_n, NERUN, COL);
cudaMemcpy(a_f.data(), d_a_f, sizeof(double)* a_n.size(), cudaMemcpyDeviceToHost);
for (i = 0; i < b_n.size(); i++){
b_f[i] = b_n[i];}
}
else if (k < L){
decoder(X_train, row_Data_train, COL, a_f, COL, NERUN, b_f, H_F, HF_sudoinverse, a_n, NERUN, COL, b_n, "sigmoid", 300, 200);
double *d_a_n, *d_a_f;
cudaMalloc((void **)&d_a_n, sizeof(double)* NERUN* COL);
cudaMalloc((void **)&d_a_f, sizeof(double)*NERUN* COL);
cudaMemcpy(d_a_n, a_n.data(), sizeof(double)* NERUN* COL, cudaMemcpyHostToDevice);
Transpose(d_a_f, d_a_n, NERUN, COL);
cudaMemcpy(a_f.data(), d_a_f, sizeof(double)* NERUN* COL, cudaMemcpyDeviceToHost);
for (i = 0; i < b_n.size(); i++){
b_f[i] = b_n[i];}
}
k++;
}
}

I didn't put the kernel code in this post because the post was taking too long. the problem occurred in this section of the code above,

else if (k < L){
decoder(X_train, row_Data_train, COL, a_f, COL, NERUN, b_f, H_F, HF_sudoinverse, a_n, NERUN, COL, b_n, "sigmoid", 300, 200);

I haven't this problem when (k==0)

if (k == 0){
a_f = rand_data(COL_NERUN);
b_f = rand_data(NERUN);
decoder(X_train, row_Data_train, COL, a_f, COL, NERUN, b_f, H_F, HF_sudoinverse, a_n, NERUN, COL, b_n, "sigmoid", 300, 200);
double *d_a_n, *d_a_f;
cudaMalloc((void **)&d_a_n, sizeof(double)* NERUN* COL);
cudaMalloc((void **)&d_a_f, sizeof(double)* NERUN* COL);
cudaMemcpy(d_a_n, a_n.data(), sizeof(double)* NERUN* COL, cudaMemcpyHostToDevice);
Transpose(d_a_f, d_a_n, NERUN, COL);
cudaMemcpy(a_f.data(), d_a_f, sizeof(double)* a_n.size(), cudaMemcpyDeviceToHost);
for (i = 0; i < b_n.size(); i++){
b_f[i] = b_n[i];}
}
cuda
thrust
asked on Stack Overflow Aug 26, 2019 by arezoo Moradi • edited Aug 26, 2019 by talonmies

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0