6b22641669
I know it does not make much sense but one can play around with the performance. Numbers are available at http://www.unixer.de/research/nbcoll/perf/. This is the first step towards collv2. Next step includes the addition of non-blocking functions to the MPI-Layer and the collv1 interface. It implements all MPI-1 collective algorithms in a non-blocking manner. However, the collv1 interface does not allow non-blocking collectives so that all collectives are used blocking by the ompi-glue layer. I wanted to add LibNBC as a separate subdirectory, but I could not convince the buildsystem (and had not the time). So the component looks pretty messy. It would be great if somebody could explain me how to move all nbc*{c,h}, and {hb,dict}*{c,h} to a seperate subdirectory. It's .ompi_ignored because I did not test it exhaustively yet. This commit was SVN r11401.
105 строки
2.8 KiB
C
105 строки
2.8 KiB
C
#define IB
|
|
#include <mpi.h>
|
|
#include "ib.h"
|
|
|
|
int main(int argc, char **argv) {
|
|
int rank, res, size, i, loops, j, tag;
|
|
IB_Request req;
|
|
double t1=0, t2=0, t3=0, t4=0, t5, t6, t7;
|
|
MPI_Request mpireq;
|
|
void *buf2;
|
|
|
|
MPI_Init(&argc, &argv);
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
|
|
size = 1;
|
|
loops= 50;
|
|
|
|
buf2 = malloc(size);
|
|
if(buf2 == NULL) printf("malloc() error\n");
|
|
|
|
if(rank == 0) {
|
|
res = IB_Isend(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req);
|
|
if(res) printf("Error in IB_Send (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
res = IB_Irecv(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req);
|
|
if(res) printf("Error in IB_Recv (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
} else {
|
|
res = IB_Irecv(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req);
|
|
if(res) printf("Error in IB_Recv (%i)\n", res);
|
|
res = IB_Wait(&req);
|
|
res = IB_Isend(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req);
|
|
if(res) printf("Error in IB_Send (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
}
|
|
|
|
printf("[%i] MEASUREMENT\n", rank);
|
|
t1=t2=t3=t4=t5=t6=0;
|
|
|
|
|
|
|
|
for(j=1; j<loops+1;j++) {
|
|
i = 0;
|
|
tag = j;
|
|
|
|
t1 -= MPI_Wtime();
|
|
if(!rank) {
|
|
t2 -= MPI_Wtime();
|
|
res = IB_Isend(buf2+i, size-i, MPI_BYTE, 1, tag, MPI_COMM_WORLD, &req);
|
|
t2 += MPI_Wtime();
|
|
if(res) printf("Error in IB_Send (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
|
|
t3 -= MPI_Wtime();
|
|
res = IB_Irecv(buf2+i, size-i, MPI_BYTE, 1, tag, MPI_COMM_WORLD, &req);
|
|
t3 += MPI_Wtime();
|
|
if(res) printf("Error in IB_Recv (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
} else {
|
|
t3 -= MPI_Wtime();
|
|
res = IB_Irecv(buf2+i, size-i, MPI_BYTE, 0, tag, MPI_COMM_WORLD, &req);
|
|
if(res) printf("Error in IB_Recv (%i)\n", res);
|
|
res = IB_Wait(&req);
|
|
t3 += MPI_Wtime();
|
|
|
|
t2 -= MPI_Wtime();
|
|
res = IB_Isend(buf2+i, size-i, MPI_BYTE, 0, tag, MPI_COMM_WORLD, &req);
|
|
t2 += MPI_Wtime();
|
|
if(res) printf("Error in IB_Send (%i) \n", res);
|
|
res = IB_Wait(&req);
|
|
}
|
|
t1 += MPI_Wtime();
|
|
|
|
t4 -= MPI_Wtime();
|
|
if(!rank) {
|
|
t5 -= MPI_Wtime();
|
|
MPI_Isend(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &mpireq);
|
|
t5 += MPI_Wtime();
|
|
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
|
|
|
t6 -= MPI_Wtime();
|
|
MPI_Irecv(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &mpireq);
|
|
t6 += MPI_Wtime();
|
|
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
|
} else {
|
|
t6 -= MPI_Wtime();
|
|
MPI_Irecv(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &mpireq);
|
|
t6 += MPI_Wtime();
|
|
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
|
|
|
t5 -= MPI_Wtime();
|
|
MPI_Isend(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &mpireq);
|
|
t5 += MPI_Wtime();
|
|
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
|
}
|
|
t4 += MPI_Wtime();
|
|
printf("[%i] %lf (%lf %lf) | %lf (%lf %lf) \n", rank, t1*1e6/j, t2*1e6/j, t3*1e6/j, t4*1e6/j, t5*1e6/j,t6*1e6/j);
|
|
}
|
|
free(buf2);
|
|
|
|
MPI_Finalize();
|
|
|
|
return 0;
|
|
}
|