I have problems with my MPI Code in C. My program crashes sometimes and i get this exit code:
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-2] terminated
---- error analysis -----
[0] on LOLANODE1011
project.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
Sometimes it works i dont know why :( The code is for a job on a cluster. But it only works in commandline. I think it should be something with the arrays or Send and Receive, but Im not sure....
Main:
int main(int argc, char **argv) {
double *array_distances;
int array_points[2];
int process_count;
int rank;
int city2;
int start_point;
int end_point;
double start_time;
double end_time;
double duration;
//MPI Initiate
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &process_count);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
start_time = MPI_Wtime();
struct local_stack local_work;
struct route best_route;
int num_cities;
double *distance;
char city_names[MAX_CITIES][MAX_CITY_NAME_LENGTH];
int city;
double coord[MAX_CITIES][2];
if (argc == 2) {
num_cities = atoi(argv[1]);
}
else {
num_cities = 0;
}
// Initiate best route in process 0
if (rank == 0) {
init_best_route(&best_route);
}
// Calculate start and endpoint for the current process
if (num_cities==0) {
num_cities = 15;
}
int rest = num_cities % process_count;
if (rest == 0) {
start_point = (num_cities / process_count)* rank;
end_point = (num_cities / process_count)* (rank + 1);
}
else
{
start_point = ((num_cities - rest) / process_count)* rank;
end_point = ((num_cities - rest) / process_count)* (rank + 1);
if (rank == (process_count - 1)) {
end_point += rest;
}
}
// Calculate distances between the cities
populate_distance_matrix(process_count, rank, &distance, &num_cities, city_names, start_point,
end_point);
// current process (not process 0!) sends the calculated distances and the start- and endpoint to
process 0
if (rank > 0) {
int array_size = num_cities * (end_point - start_point);
array_distances = malloc(sizeof(double) * array_size);
//send start- and endpoint
array_points[0] = start_point;
array_points[1] = end_point;
MPI_Request req2;
MPI_Isend(&array_points, 2, MPI_INT, 0, rank, MPI_COMM_WORLD, &req2);
//put distances in array
int i = 0;
for (start_point; start_point < end_point; start_point++) {
for (city2 = 0; city2 < num_cities; city2++) {
array_distances[i] = *(distance + start_point * num_cities + city2);
i++;
}
}
//send distances to process 0
MPI_Request req;
MPI_Isend(array_distances, array_size, MPI_DOUBLE, 0, rank+1, MPI_COMM_WORLD, &req);
MPI_Wait(&req, MPI_STATUS_IGNORE);
}
//process 0 receives all distances and start- and endpoint
if (rank == 0) {
for (int i = 1; i < process_count; i++) {
//receive start- and endpoint
MPI_Recv(&array_points, 2, MPI_INT, i, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
start_point = array_points[0];
end_point = array_points[1];
//receive distances
int count = 0;
int size;
MPI_Status status;
// Wait for a message
MPI_Probe(i, i + 1, MPI_COMM_WORLD, &status);
// Find out the number of elements in the message -> size goes to "size"
MPI_Get_count(&status, MPI_DOUBLE, &size);
//receive
MPI_Recv(array_distances, size, MPI_DOUBLE, i, i + 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// save distances in memory
for (start_point; start_point < end_point; start_point++) {
for (city2 = 0; city2 < num_cities; city2++) {
*(distance + start_point * num_cities + city2) = array_distances[count];
count++;
}
}
}
}
free(array_distances);
// process 0: search for the best route
if (rank == 0) {
init_stack(&local_work);
push_first_work_local(&local_work, num_cities);
expand_top_route(&local_work, &num_cities, &best_route, distance);
//printf("[%d] Let's start the fun!\n", );fflush(stdout);
while (!empty(&local_work)) {
//printf("[%d][w %d] Has work\n", , );fflush(stdout);
expand_top_route(&local_work, &num_cities, &best_route, distance);
/*
if ( % 100 == 0) {
printf("[%d] Finished %dth loop iteration\n", , );
}
*/
}
//printf("[%d][w %d] I am out\n", , - 1);fflush(stdout);
free(distance);
/*printf("==========================\n");
printf("# cities: %d\n", num_cities);
printf("==========================\n");
print_route(best_route, "Best route:\n");
for (city = 0; city < num_cities; city++) {
printf("%2d\tCity %2d/%c\t%s\n", city, best_route.route[city],
best_route.route[city] + 48, city_names[best_route.route[city]]);
}*/
}
// MPI show duration
end_time = MPI_Wtime();
if (rank == 0) {
duration = end_time - start_time;
/*printf("==========================\n");
printf("\\\\ //\n");
printf(" \\\\ //\n");
printf(" \\\\_// Duration: %f\n", duration);*/
fprintf(stderr, "%f,%f", duration, best_route.length);
for (int index = 0; index < num_cities; index++) {
fprintf(stderr, "%2d,", best_route.route[index]);
}
printf("%d, %d, %f\n", process_count, num_cities, duration);
}
MPI_Finalize();
return 0;
}
If someone could help me it would be nice :)
User contributions licensed under CC BY-SA 3.0