#include #include #include #include "mpi.h" int main (int argc, char *argv[]) { int id, np, namelen; char name[MPI_MAX_PROCESSOR_NAME]; double **local_A, **local_B, **local_C, *local_x, *local_y; int m, local_m, ioffset; int n, local_n, joffset; int q, local_q, koffset; int i, j, k; char filename[100]; FILE *fid; void compute_locals (int *local_m, int *ioffset, int m, int np, int id); double *allocate_vector (int n); void free_vector (double *x, int n); double **allocate_matrix (int m, int n); void free_matrix (double **A, int m, int n); void parallel_Ax (double *local_y, double **local_A, double *local_x, int m, int n, int local_m, int local_n); MPI_Init (&argc, &argv); /* Check processes: */ MPI_Comm_size (MPI_COMM_WORLD, &np); MPI_Comm_rank (MPI_COMM_WORLD, &id); MPI_Get_processor_name (name, &namelen); printf ("Process %d out of %d running on %s\n", id, np, name); fflush (stdout); m = 16; n = 8; q = 8; if ( ((m % np) != 0) || ((n % np) != 0) || ((q % np) != 0) ) { if (id == 0) { printf ("Error: This program requires that the number of processes\n"); printf (" divide the numbers of rows and columns! Aborting.\n"); /* This requirement is the result of the use of Allgather below! */ } MPI_Finalize (); exit (1); } compute_locals (&local_m, &ioffset, m, np, id); compute_locals (&local_n, &joffset, n, np, id); compute_locals (&local_q, &koffset, q, np, id); if (id == 0) { printf ("m = %5d, local_m = %5d, ioffset = %5d\n", m, local_m, ioffset); printf ("n = %5d, local_n = %5d, joffset = %5d\n", n, local_n, joffset); printf ("q = %5d, local_q = %5d, koffset = %5d\n", q, local_q, koffset); } /* memory allocation: */ local_A = allocate_matrix (local_m, n); local_B = allocate_matrix (local_n, q); local_C = allocate_matrix (local_m, q); local_x = allocate_vector (local_n); local_y = allocate_vector (local_m); /* set up example: * in mathematical counting: * i = 1, 2, ..., m, j = 1, 2, ..., n, k = 1, 2, ..., q * A = (A(i,j)) m-by-n matrix, A(i,j) = i, * B = (B(j,k)) n-by-q matrix, B(j,k) = k, * x = (x(j)) n-vector, x(j) = 1, * then results * y = A x = (y(i)) m-vector, y(i) = n * i, * C = A B = (C(i,k)) m-by-q matrix, C(i,k) = n * i * k. */ for (i = 0; i < local_m; i++) { for (j = 0; j < n; j++) { local_A[i][j] = (double) (ioffset + i + 1); } } for (j = 0; j < local_n; j++) { for (k = 0; k < q; k++) { local_B[j][k] = (double) (k + 1); } } for (j = 0; j < local_n; j++) { local_x[j] = 1.0; /* (double) (joffset + j + 1); */ } parallel_Ax (local_y, local_A, local_x, m, n, local_m, local_n); /* free memory: */ free_vector (local_y, local_m); free_vector (local_x, local_n); free_matrix (local_C, local_m, q); free_matrix (local_B, local_n, q); free_matrix (local_A, local_m, n); MPI_Finalize (); return 0; } void parallel_Ax (double *local_y, double **local_A, double *local_x, int m, int n, int local_m, int local_n) { double *global_x; int i, k; double *allocate_vector (int n); void free_vector (double *x, int n); global_x = allocate_vector (n); MPI_Allgather (local_x,local_n,MPI_DOUBLE, global_x,local_n,MPI_DOUBLE, MPI_COMM_WORLD); for (i = 0; i < local_m; i++) { local_y[i] = 0.0; for (k = 0; k < n; k++) { local_y[i] += local_A[i][k] * global_x[k]; } } free_vector (global_x, n); } /* This function assumes that p divides n. */ void compute_locals (int *local_n, int *ioffset, int n, int p, int i) { int m; m = n / p; *local_n = m; *ioffset = i * m; } double *allocate_vector (int n) { double *x; if ((x = (double*) calloc (n, sizeof(double))) == NULL) { printf ("Problem allocating memory for vector\n"); exit (1); } return x; } void free_vector (double *x, int n) { free (x); } double **allocate_matrix (int m, int n) { double **A; int i; if ((A = (double**) calloc (m, sizeof(double*))) == NULL) { printf ("Problem allocating memory for matrix\n"); exit (1); } for (i = 0; i < m; i++) { if ((A[i] = (double*) calloc (n, sizeof(double))) == NULL) { printf ("Problem allocating memory for matrix\n"); exit (1); } } return A; } void free_matrix (double **A, int m, int n) { int i; for (i = 0; i < m; i++) { free (A[i]); } free (A); }