diff --git a/ompi/mca/coll/libnbc/Makefile.am b/ompi/mca/coll/libnbc/Makefile.am index 85d7d4ba78..4d3e90186a 100644 --- a/ompi/mca/coll/libnbc/Makefile.am +++ b/ompi/mca/coll/libnbc/Makefile.am @@ -22,7 +22,6 @@ sources = \ coll_libnbc.h \ coll_libnbc_component.c \ - coll_libnbc_ireduce_scatter_block.c \ nbc.c \ nbc_internal.h \ libdict/dict.h \ @@ -49,6 +48,7 @@ sources = \ nbc_ineighbor_alltoallw.c \ nbc_ireduce.c \ nbc_ireduce_scatter.c \ + nbc_ireduce_scatter_block.c \ nbc_iscan.c \ nbc_iscatter.c \ nbc_iscatterv.c \ diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index abeca50465..0fb472d552 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -98,9 +98,16 @@ OBJ_CLASS_DECLARATION(ompi_coll_libnbc_module_t); typedef ompi_coll_libnbc_module_t NBC_Comminfo; -/* a schedule is basically a pointer to some memory location where the - * schedule array resides */ -typedef void* NBC_Schedule; +struct NBC_Schedule { + opal_object_t super; + volatile int size; + volatile int current_round_offset; + char *data; +}; + +typedef struct NBC_Schedule NBC_Schedule; + +OBJ_CLASS_DECLARATION(NBC_Schedule); struct ompi_coll_libnbc_request_t { ompi_request_t super; @@ -110,7 +117,7 @@ struct ompi_coll_libnbc_request_t { volatile int req_count; ompi_request_t **req_array; NBC_Comminfo *comminfo; - volatile NBC_Schedule *schedule; + NBC_Schedule *schedule; void *tmpbuf; /* temporary buffer e.g. used for Reduce */ /* TODO: we should make a handle pointer to a state later (that the user * can move request handles) */ @@ -134,9 +141,9 @@ typedef ompi_coll_libnbc_request_t NBC_Handle; #define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \ do { \ - OMPI_REQUEST_FINI(&request->super); \ + OMPI_REQUEST_FINI(&(req)->super); \ opal_free_list_return (&mca_coll_libnbc_component.requests, \ - (opal_free_list_item_t*) req); \ + (opal_free_list_item_t*) (req)); \ } while (0) int ompi_coll_libnbc_progress(void); diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index e8b9facb57..18bbb3f8c1 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -239,7 +239,7 @@ ompi_coll_libnbc_progress(void) OPAL_LIST_FOREACH_SAFE(request, next, &mca_coll_libnbc_component.active_requests, ompi_coll_libnbc_request_t) { - if (NBC_OK == NBC_Progress(request)) { + if (OMPI_SUCCESS == NBC_Progress(request)) { /* done, remove and complete */ opal_list_remove_item(&mca_coll_libnbc_component.active_requests, &request->super.super.super); diff --git a/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c deleted file mode 100644 index c2546bf747..0000000000 --- a/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * Author(s): Torsten Hoefler - * - */ -#include "nbc_internal.h" - -/* an reduce_csttare schedule can not be cached easily because the contents - * ot the recvcount value may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* binomial reduce to rank 0 followed by a linear scatter ... - * - * Algorithm: - * pairwise exchange - * round r: - * grp = rank % 2^r - * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value - * if grp == 1: send to rank - 2^(r-1) and exit function - * - * do this for R=log_2(p) rounds - * - */ - -int ompi_coll_libnbc_ireduce_scatter_block(void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, - MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, maxr, p, r, res, count, offset, firstred; - MPI_Aint ext; - char *redbuf, *sbuf, inplace; - NBC_Schedule *schedule; - NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; - ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res || 0 == p) { printf("MPI Error in MPI_Comm_size() (%i:%i)\n", res, p); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res || 0 == ext) { printf("MPI Error in MPI_Type_extent() (%i:%i)\n", res, (int)ext); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - maxr = (int)ceil((log((double)p)/LOG2)); - - count = p * recvcount; - - if (0 < count) { - handle->tmpbuf = malloc(ext*count*2); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - redbuf = ((char*)handle->tmpbuf)+(ext*count); - - /* copy data to redbuf if we only have a single node */ - if((p==1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, redbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - firstred = 1; - for(r=1; r<=maxr; r++) { - if((rank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { - /* take reduce data from the sendbuf in the first round -> save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); - firstred = 0; - } else { - /* perform the reduce in my local buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - } else { - /* we have to send this round */ - peer = rank - (1<<(r-1)); - if(firstred) { - /* we have to send the senbuf */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); - } else { - /* we send an already reduced value from redbuf */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* leave the game */ - break; - } - } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* rank 0 is root and sends - all others receive */ - if(rank != 0) { - res = NBC_Sched_recv(recvbuf, false, recvcount, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - - if(rank == 0) { - offset = 0; - for(r=1;rtmpbuf, true, recvcount, datatype, r, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcount, datatype, recvbuf, false, recvcount, datatype, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} - -int ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, struct ompi_communicator_t *comm, - ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, res, count, rsize; - MPI_Aint ext; - NBC_Schedule *schedule; - NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; - ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(dtype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - count = rcount * rsize; - - handle->tmpbuf = malloc(2*ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - /* send my data to the remote root */ - res = NBC_Sched_send(sbuf, false, count, dtype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - if (0 == rank) { - res = NBC_Sched_recv((void *) 0, true, count, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, peer, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, dtype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - } - - /* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */ - res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_send((void *) 0, true, count, dtype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* scatter */ - for (peer = 0 ; peer < rsize ; ++peer) { - res = NBC_Sched_send((void *)(ext * (count + peer * rcount)), true, rcount, dtype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - - /* receive my block */ - res = NBC_Sched_recv(rbuf, true, rcount, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index eb82505540..943459bb49 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -7,6 +8,8 @@ * reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -33,275 +36,255 @@ void NBC_Print_times(double div) { } #endif -/* allocates a new schedule array */ -int NBC_Sched_create(NBC_Schedule* schedule) { - int *ptr; +static void nbc_schedule_constructor (NBC_Schedule *schedule) { + /* initial total size of the schedule */ + schedule->size = sizeof (int); + schedule->current_round_offset = 0; + schedule->data = calloc (1, schedule->size); +} - *schedule=malloc(2*sizeof(int)); - if(*schedule == NULL) { return NBC_OOR; } +static void nbc_schedule_destructor (NBC_Schedule *schedule) { + free (schedule->data); + schedule->data = NULL; +} - /* initialize the schedule */ - ptr = (int*) *schedule; - ptr[0] = 2 * sizeof(int); /* initial total size of the schedule */ - ptr[1] = 0; /* initial round-schedule has num=(int)0 and no actions */ - /* The schedule's final end=(char)0 delimiter won't be added until NBC_Sched_commit(). */ +OBJ_CLASS_INSTANCE(NBC_Schedule, opal_object_t, nbc_schedule_constructor, + nbc_schedule_destructor); - return NBC_OK; +static int nbc_schedule_grow (NBC_Schedule *schedule, int additional) { + void *tmp; + int size; + + /* get current size of schedule */ + size = nbc_schedule_get_size (schedule); + + tmp = realloc (schedule->data, size + additional); + if (NULL == tmp) { + NBC_Error ("Could not increase the size of NBC schedule"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + schedule->data = tmp; + return OMPI_SUCCESS; +} + +static int nbc_schedule_round_append (NBC_Schedule *schedule, void *data, int data_size, bool barrier) { + int ret, size = nbc_schedule_get_size (schedule); + + if (barrier) { + ret = nbc_schedule_grow (schedule, data_size + 1 + sizeof (int)); + } else { + ret = nbc_schedule_grow (schedule, data_size); + } + if (OMPI_SUCCESS != ret) { + return ret; + } + + /* append to the round-schedule */ + if (data_size) { + memcpy (schedule->data + size, data, data_size); + + /* increase number of elements in round-schedule */ + nbc_schedule_inc_round (schedule); + + /* increase size of schedule */ + nbc_schedule_inc_size (schedule, data_size); + } + + if (barrier) { + /* add the barrier */ + schedule->data[size + data_size] = 1; + /* set next round counter to 0 */ + memset (schedule->data + size + data_size + 1, 0, sizeof (int)); + + NBC_DEBUG(10, "ended round at byte %i\n", size + data_size + 1); + + schedule->current_round_offset = size + data_size + 1; + + /* increase size of schedule */ + nbc_schedule_inc_size (schedule, sizeof (int) + 1); + } + + return OMPI_SUCCESS; } /* this function puts a send into the schedule */ -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = SEND; +int NBC_Sched_send (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { NBC_Args_send send_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_send)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + int ret; /* store the passed arguments */ - send_args.buf=buf; - send_args.tmpbuf=tmpbuf; - send_args.count=count; - send_args.datatype=datatype; - send_args.dest=dest; + send_args.type = SEND; + send_args.buf = buf; + send_args.tmpbuf = tmpbuf; + send_args.count = count; + send_args.datatype = datatype; + send_args.dest = dest; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,send_args); + ret = nbc_schedule_round_append (schedule, &send_args, sizeof (send_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding send - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_send))); + NBC_DEBUG(10, "added send - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_send)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts a receive into the schedule */ -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = RECV; +int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { NBC_Args_recv recv_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + int ret; /* store the passed arguments */ - recv_args.buf=buf; - recv_args.tmpbuf=tmpbuf; - recv_args.count=count; - recv_args.datatype=datatype; - recv_args.source=source; + recv_args.type = RECV; + recv_args.buf = buf; + recv_args.tmpbuf = tmpbuf; + recv_args.count = count; + recv_args.datatype = datatype; + recv_args.source = source; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,recv_args); + ret = nbc_schedule_round_append (schedule, &recv_args, sizeof (recv_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding receive - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv))); + NBC_DEBUG(10, "added receive - ends at byte %d\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts an operation into the schedule */ -int NBC_Sched_op(void *buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = OP; +int NBC_Sched_op (void *buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, bool barrier) { NBC_Args_op op_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_op)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + int ret; /* store the passed arguments */ - op_args.buf1=buf1; - op_args.buf2=buf2; - op_args.buf3=buf3; - op_args.tmpbuf1=tmpbuf1; - op_args.tmpbuf2=tmpbuf2; - op_args.tmpbuf3=tmpbuf3; - op_args.count=count; - op_args.op=op; - op_args.datatype=datatype; + op_args.type = OP; + op_args.buf1 = buf1; + op_args.buf2 = buf2; + op_args.buf3 = buf3; + op_args.tmpbuf1 = tmpbuf1; + op_args.tmpbuf2 = tmpbuf2; + op_args.tmpbuf3 = tmpbuf3; + op_args.count = count; + op_args.op = op; + op_args.datatype = datatype; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,op_args); + ret = nbc_schedule_round_append (schedule, &op_args, sizeof (op_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding op - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_op))); + NBC_DEBUG(10, "added op - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_op)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts a copy into the schedule */ -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = COPY; +int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, + MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier) { NBC_Args_copy copy_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + int ret; /* store the passed arguments */ - copy_args.src=src; - copy_args.tmpsrc=tmpsrc; - copy_args.srccount=srccount; - copy_args.srctype=srctype; - copy_args.tgt=tgt; - copy_args.tmptgt=tmptgt; - copy_args.tgtcount=tgtcount; - copy_args.tgttype=tgttype; + copy_args.type = COPY; + copy_args.src = src; + copy_args.tmpsrc = tmpsrc; + copy_args.srccount = srccount; + copy_args.srctype = srctype; + copy_args.tgt = tgt; + copy_args.tmptgt = tmptgt; + copy_args.tgtcount = tgtcount; + copy_args.tgttype = tgttype; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,copy_args); + ret = nbc_schedule_round_append (schedule, ©_args, sizeof (copy_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding copy - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy))); + NBC_DEBUG(10, "added copy - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts a unpack into the schedule */ -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = UNPACK; +int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, + NBC_Schedule *schedule, bool barrier) { NBC_Args_unpack unpack_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + int ret; /* store the passed arguments */ - unpack_args.inbuf=inbuf; - unpack_args.tmpinbuf=tmpinbuf; - unpack_args.count=count; - unpack_args.datatype=datatype; - unpack_args.outbuf=outbuf; - unpack_args.tmpoutbuf=tmpoutbuf; + unpack_args.type = UNPACK; + unpack_args.inbuf = inbuf; + unpack_args.tmpinbuf = tmpinbuf; + unpack_args.count = count; + unpack_args.datatype = datatype; + unpack_args.outbuf = outbuf; + unpack_args.tmpoutbuf = tmpoutbuf; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,unpack_args); + ret = nbc_schedule_round_append (schedule, &unpack_args, sizeof (unpack_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding unpack - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack))); + NBC_DEBUG(10, "added unpack - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function ends a round of a schedule */ -int NBC_Sched_barrier(NBC_Schedule *schedule) { - int size, num = 0; - char *ptr; - char delimiter = 1; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("round terminated at %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(char)+sizeof(int)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,delimiter); /* round-schedule delimiter */ - NBC_PUT_BYTES(ptr,num); /* initialize num=0 for next round-schedule */ - - NBC_DEBUG(10, "ending round at byte %i\n", (int)(size+sizeof(char)+sizeof(int))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)+sizeof(int)); - - return NBC_OK; +int NBC_Sched_barrier (NBC_Schedule *schedule) { + return nbc_schedule_round_append (schedule, NULL, 0, true); } /* this function ends a schedule */ int NBC_Sched_commit(NBC_Schedule *schedule) { - int size; + int size = nbc_schedule_get_size (schedule); + char *ptr; + int ret; - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule terminated at %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(char)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } + ret = nbc_schedule_grow (schedule, 1); + if (OMPI_SUCCESS != ret) { + return ret; + } /* add the barrier char (0) because this is the last round */ - *(char*)((char*)*schedule+size)=0; - NBC_DEBUG(10, "closing schedule %p at byte %i\n", *schedule, (int)(size+sizeof(char))); + ptr = schedule->data + size; + *((char *) ptr) = 0; /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)); + nbc_schedule_inc_size (schedule, 1); - return NBC_OK; + NBC_DEBUG(10, "closed schedule %p at byte %i\n", schedule, (int)(size + 1)); + + return OMPI_SUCCESS; } /* finishes a request * * to be called *only* from the progress thread !!! */ -static inline int NBC_Free(NBC_Handle* handle) { +static inline void NBC_Free (NBC_Handle* handle) { -#ifdef NBC_CACHE_SCHEDULE - /* do not free schedule because it is in the cache */ - handle->schedule = NULL; -#else - if(handle->schedule != NULL) { - /* free schedule */ - free((void*)*(handle->schedule)); - free((void*)handle->schedule); + if (NULL != handle->schedule) { + /* release schedule */ + OBJ_RELEASE (handle->schedule); handle->schedule = NULL; } -#endif /* if the nbc_I attached some data */ /* problems with schedule cache here, see comment (TODO) in * nbc_internal.h */ - if(NULL != handle->tmpbuf) { + if (NULL != handle->tmpbuf) { free((void*)handle->tmpbuf); handle->tmpbuf = NULL; } - - return NBC_OK; } /* progresses a request @@ -309,98 +292,103 @@ static inline int NBC_Free(NBC_Handle* handle) { * to be called *only* from the progress thread !!! */ int NBC_Progress(NBC_Handle *handle) { int flag, res, ret=NBC_CONTINUE; - long size; + unsigned long size; char *delim; /* the handle is done if there is no schedule attached */ - if(handle->schedule != NULL) { - - if((handle->req_count > 0) && (handle->req_array != NULL)) { - NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count); -#ifdef NBC_TIMING - Test_time -= MPI_Wtime(); -#endif - res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE); - if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); ret=res; goto error; } -#ifdef NBC_TIMING - Test_time += MPI_Wtime(); -#endif - } else { - flag = 1; /* we had no open requests -> proceed to next round */ - } - - /* a round is finished */ - if(flag) { - /* adjust delim to start of current round */ - NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset); - delim = (char*)*handle->schedule + handle->row_offset; - NBC_DEBUG(10, "delim: %p\n", delim); - NBC_GET_ROUND_SIZE(delim, size); - NBC_DEBUG(10, "size: %li\n", size); - /* adjust delim to end of current round -> delimiter */ - delim = delim + size; - - if(handle->req_array != NULL) { - /* free request array */ - free((void*)handle->req_array); - handle->req_array = NULL; - } - handle->req_count = 0; - - if(*delim == 0) { - /* this was the last round - we're done */ - NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); - - res = NBC_Free(handle); - if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); ret=res; goto error; } - - return NBC_OK; - } else { - NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n"); - /* move delim to start of next round */ - delim = delim+1; - /* initializing handle for new virgin round */ - handle->row_offset = (long)delim - (long)*handle->schedule; - /* kick it off */ - res = NBC_Start_round(handle); - if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); ret=res; goto error; } - } - } - } else { - ret= NBC_OK; + if (NULL == handle->schedule) { + return NBC_OK; + } + + if ((handle->req_count > 0) && (handle->req_array != NULL)) { + NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count); +#ifdef NBC_TIMING + Test_time -= MPI_Wtime(); +#endif + res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE); + if(res != OMPI_SUCCESS) { + NBC_Error ("MPI Error in MPI_Testall() (%i)", res); + return res; + } +#ifdef NBC_TIMING + Test_time += MPI_Wtime(); +#endif + } else { + flag = 1; /* we had no open requests -> proceed to next round */ + } + + /* a round is finished */ + if (flag) { + /* adjust delim to start of current round */ + NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", handle->schedule, handle->row_offset); + delim = handle->schedule->data + handle->row_offset; + NBC_DEBUG(10, "delim: %p\n", delim); + nbc_get_round_size(delim, &size); + NBC_DEBUG(10, "size: %li\n", size); + /* adjust delim to end of current round -> delimiter */ + delim = delim + size; + + if (NULL != handle->req_array) { + /* free request array */ + free (handle->req_array); + handle->req_array = NULL; + } + + handle->req_count = 0; + + if (*delim == 0) { + /* this was the last round - we're done */ + NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); + + NBC_Free(handle); + + return NBC_OK; + } + + NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n"); + /* move delim to start of next round */ + /* initializing handle for new virgin round */ + handle->row_offset = (intptr_t) (delim + 1) - (intptr_t) handle->schedule->data; + /* kick it off */ + res = NBC_Start_round(handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Error ("Error in NBC_Start_round() (%i)", res); + return res; + } } -error: return ret; } static inline int NBC_Start_round(NBC_Handle *handle) { int num; /* number of operations */ - int i, res, ret=NBC_OK; + int res; char* ptr; + MPI_Request *tmp; NBC_Fn_type type; NBC_Args_send sendargs; NBC_Args_recv recvargs; NBC_Args_op opargs; NBC_Args_copy copyargs; NBC_Args_unpack unpackargs; - NBC_Schedule myschedule; void *buf1, *buf2, *buf3; /* get round-schedule address */ - myschedule = (NBC_Schedule*)((char*)*handle->schedule + handle->row_offset); - ptr = (char*) myschedule; + ptr = handle->schedule->data + handle->row_offset; NBC_GET_BYTES(ptr,num); - NBC_DEBUG(10, "start_round round at address %p : posting %i operations\n", myschedule, num); + NBC_DEBUG(10, "start_round round at offset %d : posting %i operations\n", handle->row_offset, num); - for (i=0; ischedule->data); + + memcpy (&type, ptr, sizeof (type)); switch(type) { case SEND: - NBC_DEBUG(5," SEND (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5," SEND (offset %li) ", offset); NBC_GET_BYTES(ptr,sendargs); - NBC_DEBUG(5,"*buf: %p, count: %i, type: %lu, dest: %i, tag: %i)\n", sendargs.buf, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest, handle->tag); + NBC_DEBUG(5,"*buf: %p, count: %i, type: %p, dest: %i, tag: %i)\n", sendargs.buf, + sendargs.count, sendargs.datatype, sendargs.dest, handle->tag); /* get an additional request */ handle->req_count++; /* get buffer */ @@ -412,18 +400,30 @@ static inline int NBC_Start_round(NBC_Handle *handle) { #ifdef NBC_TIMING Isend_time -= MPI_Wtime(); #endif - handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - NBC_CHECK_NULL(handle->req_array); - res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->comm, handle->req_array+handle->req_count-1)); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } + tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); + if (NULL == tmp) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + handle->req_array = tmp; + + res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, + MCA_PML_BASE_SEND_STANDARD, handle->comm, + handle->req_array+handle->req_count - 1)); + if (OMPI_SUCCESS != res) { + NBC_Error ("Error in MPI_Isend(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, sendargs.count, + sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); + return res; + } #ifdef NBC_TIMING Isend_time += MPI_Wtime(); #endif break; case RECV: - NBC_DEBUG(5, " RECV (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " RECV (offset %li) ", offset); NBC_GET_BYTES(ptr,recvargs); - NBC_DEBUG(5, "*buf: %p, count: %i, type: %lu, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag); + NBC_DEBUG(5, "*buf: %p, count: %i, type: %p, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, + recvargs.datatype, recvargs.source, handle->tag); /* get an additional request - TODO: req_count NOT thread safe */ handle->req_count++; /* get buffer */ @@ -435,18 +435,29 @@ static inline int NBC_Start_round(NBC_Handle *handle) { #ifdef NBC_TIMING Irecv_time -= MPI_Wtime(); #endif - handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - NBC_CHECK_NULL(handle->req_array); - res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, handle->comm, handle->req_array+handle->req_count-1)); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } + tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); + if (NULL == tmp) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + handle->req_array = tmp; + + res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, handle->comm, + handle->req_array+handle->req_count-1)); + if (OMPI_SUCCESS != res) { + NBC_Error("Error in MPI_Irecv(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, recvargs.count, + recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); + return res; + } #ifdef NBC_TIMING Irecv_time += MPI_Wtime(); #endif break; case OP: - NBC_DEBUG(5, " OP (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " OP (offset %li) ", offset); NBC_GET_BYTES(ptr,opargs); - NBC_DEBUG(5, "*buf1: %p, buf2: %p, buf3: %p, count: %i, type: %lu)\n", opargs.buf1, opargs.buf2, opargs.buf3, opargs.count, (unsigned long)opargs.datatype); + NBC_DEBUG(5, "*buf1: %p, buf2: %p, buf3: %p, count: %i, type: %p)\n", opargs.buf1, opargs.buf2, + opargs.buf3, opargs.count, opargs.datatype); /* get buffers */ if(opargs.tmpbuf1) { buf1=(char*)handle->tmpbuf+(long)opargs.buf1; @@ -466,9 +477,11 @@ static inline int NBC_Start_round(NBC_Handle *handle) { ompi_3buff_op_reduce(opargs.op, buf1, buf2, buf3, opargs.count, opargs.datatype); break; case COPY: - NBC_DEBUG(5, " COPY (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " COPY (offset %li) ", offset); NBC_GET_BYTES(ptr,copyargs); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs.src, copyargs.srccount, (unsigned long)copyargs.srctype, (unsigned long)copyargs.tgt, copyargs.tgtcount, (unsigned long)copyargs.tgttype); + NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu, tgtcount: %i, tgttype: %p)\n", + (unsigned long) copyargs.src, copyargs.srccount, copyargs.srctype, + (unsigned long) copyargs.tgt, copyargs.tgtcount, copyargs.tgttype); /* get buffers */ if(copyargs.tmpsrc) { buf1=(char*)handle->tmpbuf+(long)copyargs.src; @@ -480,13 +493,17 @@ static inline int NBC_Start_round(NBC_Handle *handle) { } else { buf2=copyargs.tgt; } - res = NBC_Copy(buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, handle->comm); - if(res != NBC_OK) { printf("NBC_Copy() failed (code: %i)\n", res); ret=res; goto error; } + res = NBC_Copy (buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, + handle->comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } break; case UNPACK: - NBC_DEBUG(5, " UNPACK (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " UNPACK (offset %li) ", offset); NBC_GET_BYTES(ptr,unpackargs); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n", (unsigned long)unpackargs.inbuf, unpackargs.count, (unsigned long)unpackargs.datatype, (unsigned long)unpackargs.outbuf); + NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu\n", (unsigned long) unpackargs.inbuf, + unpackargs.count, unpackargs.datatype, (unsigned long) unpackargs.outbuf); /* get buffers */ if(unpackargs.tmpinbuf) { buf1=(char*)handle->tmpbuf+(long)unpackargs.inbuf; @@ -498,13 +515,16 @@ static inline int NBC_Start_round(NBC_Handle *handle) { } else { buf2=unpackargs.outbuf; } - res = NBC_Unpack(buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); - if(res != NBC_OK) { printf("NBC_Unpack() failed (code: %i)\n", res); ret=res; goto error; } + res = NBC_Unpack (buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); + if (OMPI_SUCCESS != res) { + NBC_Error ("NBC_Unpack() failed (code: %i)", res); + return res; + } + break; default: - printf("NBC_Start_round: bad type %li at offset %li\n", (long)type, (long)ptr-(long)myschedule); - ret=NBC_BAD_SCHED; - goto error; + NBC_Error ("NBC_Start_round: bad type %li at offset %li", (long)type, offset); + return OMPI_ERROR; } } @@ -513,13 +533,14 @@ static inline int NBC_Start_round(NBC_Handle *handle) { * * threaded case: calling progress in the first round can lead to a * deadlock if NBC_Free is called in this round :-( */ - if(handle->row_offset != sizeof(int)) { + if (handle->row_offset) { res = NBC_Progress(handle); - if((NBC_OK != res) && (NBC_CONTINUE != res)) { printf("Error in NBC_Progress() (%i)\n", res); ret=res; goto error; } + if ((NBC_OK != res) && (NBC_CONTINUE != res)) { + return OMPI_ERROR; + } } -error: - return ret; + return OMPI_SUCCESS; } int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *comminfo) @@ -537,8 +558,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t handle->req_array = NULL; handle->comm = comm; handle->schedule = NULL; - /* first int is the schedule size */ - handle->row_offset = sizeof(int); + handle->row_offset = 0; /******************** Do the tag and shadow comm administration ... ***************/ @@ -555,7 +575,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t } OPAL_THREAD_UNLOCK(&comminfo->mutex); - handle->tag=comminfo->tag; + handle->tag = tmp_tag; /* register progress */ if (need_register) { @@ -574,7 +594,12 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t NBC_DEBUG(3, "got tag %i\n", handle->tag); - return NBC_OK; + return OMPI_SUCCESS; +} + +void NBC_Return_handle(ompi_coll_libnbc_request_t *request) { + NBC_Free (request); + OMPI_COLL_LIBNBC_REQUEST_RETURN(request); } int NBC_Init_comm(MPI_Comm comm, NBC_Comminfo *comminfo) { @@ -636,11 +661,12 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) { /* kick off first round */ res = NBC_Start_round(handle); - if((NBC_OK != res)) { printf("Error in NBC_Start_round() (%i)\n", res); return res; } - + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super)); - return NBC_OK; + return OMPI_SUCCESS; } #ifdef NBC_CACHE_SCHEDULE @@ -650,13 +676,8 @@ void NBC_SchedCache_args_delete_key_dummy(void *k) { } void NBC_SchedCache_args_delete(void *entry) { - struct NBC_dummyarg *tmp; - - tmp = (struct NBC_dummyarg*)entry; - /* free taglistentry */ - free((void*)*(tmp->schedule)); - /* the schedule pointer itself is also malloc'd */ - free((void*)tmp->schedule); - free((void*)tmp); + struct NBC_dummyarg *tmp = (struct NBC_dummyarg*)entry; + OBJ_RELEASE(tmp->schedule); + free(entry); } #endif diff --git a/ompi/mca/coll/libnbc/nbc_iallgather.c b/ompi/mca/coll/libnbc/nbc_iallgather.c index 3680951644..abf783e8c8 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgather.c +++ b/ompi/mca/coll/libnbc/nbc_iallgather.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -15,19 +18,20 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) ) { - return 0; + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif @@ -39,7 +43,7 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, r; + int rank, p, res; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; @@ -47,146 +51,181 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt NBC_Allgather_args *args, *found, search; #endif NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - handle->tmpbuf = NULL; + res = MPI_Type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + return res; + } if (inplace) { - sendtype = recvtype; - sendcount = recvcount; + sendtype = recvtype; + sendcount = recvcount; } else { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + rank * recvcount * rcvext; + res = NBC_Copy (sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = (NBC_Allgather_args *)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + found = (NBC_Allgather_args *) hb_tree_search ((hb_tree*)libnbc_module->NBC_Dict[NBC_ALLGATHER], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - - sbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); + sbuf = (char *)recvbuf + rank * recvcount * rcvext; /* do p-1 rounds */ - for(r=0;rsendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); + args = (NBC_Allgather_args *) malloc (sizeof (args)); + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->schedule = schedule; + + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLGATHER], args, args, 0); + if (res != 0) { + free (args); + } else { + OBJ_RETAIN(schedule); + } + /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLGATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_ALLGATHER]); + if (++libnbc_module->NBC_Dict_size[NBC_ALLGATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLGATHER], &libnbc_module->NBC_Dict_size[NBC_ALLGATHER]); } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } int ompi_coll_libnbc_iallgather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, r, rsize; + int res, rsize; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - - /* do rsize - 1 rounds */ - for(r = 0 ; r < rsize ; ++r) { - /* recv from rank r */ - rbuf = ((char *)recvbuf) + r*(recvcount*rcvext); - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /* send to rank r */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); - /*NBC_PRINT_SCHED(*schedule);*/ + /* set up schedule */ + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + /* do rsize - 1 rounds */ + for (int r = 0 ; r < rsize ; ++r) { + /* recv from rank r */ + rbuf = (char *) recvbuf + r * recvcount * rcvext; + res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + /* send to rank r */ + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c index 4eb385ab88..b462118caf 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_iallgatherv.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -8,7 +9,7 @@ * Author(s): Torsten Hoefler * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -32,117 +33,150 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, r, speer, rpeer; + int rank, p, res, speer, rpeer; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } + res = MPI_Type_extent (recvtype, &rcvext); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } if (inplace) { sendtype = recvtype; sendcount = recvcounts[rank]; } else { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (displs[rank]*rcvext); - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + displs[rank] * rcvext; + res = NBC_Copy (sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - sbuf = ((char*) recvbuf) + (displs[rank]*rcvext); + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + sbuf = (char *) recvbuf + displs[rank] * rcvext; /* do p-1 rounds */ - for(r=1;rtmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* do rsize rounds */ - for (r = 0 ; r < rsize ; ++r) { - char *rbuf = ((char *)recvbuf) + (displs[r]*rcvext); + for (int r = 0 ; r < rsize ; ++r) { + char *rbuf = (char *) recvbuf + displs[r] * rcvext; if (recvcounts[r]) { - res = NBC_Sched_recv(rbuf, false, recvcounts[r], recvtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false, recvcounts[r], recvtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } if (sendcount) { - for (r = 0 ; r < rsize ; ++r) { - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int r = 0 ; r < rsize ; ++r) { + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index 53200f9cbd..7be8075a94 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -18,25 +19,31 @@ #include -static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int allred_sched_linear(int rank, int p, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, + void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, + void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, + NBC_Handle *handle); +static inline int allred_sched_linear(int rank, int p, void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, int ext, int size, + NBC_Schedule *schedule, NBC_Handle *handle); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && - (a->op == b->op) ) { - return 0; + (a->op == b->op)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif @@ -54,29 +61,43 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat enum { NBC_ARED_BINOMIAL, NBC_ARED_RING } alg; char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); + res = ompi_datatype_get_extent(datatype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = ompi_datatype_type_size (datatype, &size); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_size() (%i)", res); + return res; + } - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - if((p == 1) && !inplace) { + handle->tmpbuf = malloc (ext * count); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if ((p == 1) && !inplace) { /* for a single node - copy data to receivebuf */ res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } /* algorithm selection */ @@ -88,19 +109,22 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Allreduce_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Allreduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; switch(alg) { case NBC_ARED_BINOMIAL: @@ -110,37 +134,59 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, handle); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Allreduce_args*)malloc(sizeof(NBC_Allreduce_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE]); + args = (NBC_Allreduce_args *) malloc (sizeof(args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], + &libnbc_module->NBC_Dict_size[NBC_ALLREDUCE]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, @@ -151,41 +197,66 @@ int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, M MPI_Aint ext; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); + res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + return res; + } - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + handle->tmpbuf = malloc (ext * count); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = allred_sched_linear(rank, rsize, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle); - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; + + res = allred_sched_linear (rank, rsize, sendbuf, recvbuf, count, datatype, op, + ext, size, schedule, handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Start(handle, schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } @@ -224,50 +295,56 @@ int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, M if (vrank == 0) rank = root; \ if (vrank == root) rank = 0; \ } -static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { - int root, vrank, r, maxr, firstred, vpeer, peer, res; +static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, + MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { + int root, vrank, maxr, vpeer, peer, res; root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */ RANK2VRANK(rank, vrank, root); maxr = (int)ceil((log((double)p)/LOG2)); - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred && MPI_IN_PLACE != sendbuf) { + res = NBC_Sched_recv (0, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred && MPI_IN_PLACE != sendbuf) { /* perform the reduce with the senbuf */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule, true); firstred = 0; } else { /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } } } else { /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); + vpeer = vrank - (1 << (r - 1)); VRANK2RANK(peer, vpeer, root) - if(firstred && MPI_IN_PLACE != sendbuf) { + if (firstred && MPI_IN_PLACE != sendbuf) { /* we have to use the sendbuf in the first round .. */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { /* and the recvbuf in all remeining rounds */ - res = NBC_Sched_send(recvbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (recvbuf, false, count, datatype, peer, schedule, false); } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* leave the game */ break; } @@ -278,61 +355,75 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat RANK2VRANK(rank, vrank, root); /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (vrank != 0) { + for (int r = 0; r < maxr ; ++r) { + if ((vrank >= (1 << r)) && (vrank < (1 << (r + 1)))) { + VRANK2RANK(peer, vrank - (1 << r), root); + res = NBC_Sched_recv (recvbuf, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - res = NBC_Sched_barrier(schedule); - if(NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } /* now send to the right hosts */ - for(r=0; rtmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int r = 0; r < maxr; ++r) { + if (((vrank + (1 << r) < p) && (vrank < (1 << r))) || (vrank == 0)) { + VRANK2RANK(peer, vrank + (1 << r), root); + res = NBC_Sched_send (recvbuf, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - /* end of the bcast */ - return NBC_OK; + /* end of the bcast */ + return OMPI_SUCCESS; } -static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) { - int i; /* runner */ +static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, + int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) { int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */ int speer, rpeer; /* send and recvpeer */ + int res = OMPI_SUCCESS; - if(count == 0) return NBC_OK; + if (count == 0) { + return OMPI_SUCCESS; + } - { - int mycount; /* temporary */ - segsizes = (int*)malloc(sizeof(int)*p); - segoffsets = (int*)malloc(sizeof(int)*p); - segsize = count/p; /* size of the segments */ - if(count%p != 0) segsize++; - mycount = count; - segoffsets[0] = 0; - for(i = 0; i reduced this round * / -> sum (reduced in a previous step) @@ -432,96 +523,131 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty * 2p-2 rounds ... every node does p-1 reductions and p-1 sends * */ - { - int round = 0; - /* first p-1 rounds are reductions */ - do { - int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ - int soffset = segoffsets[selement]*ext; - int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ - int roffset = segoffsets[relement]*ext; + /* first p-1 rounds are reductions */ + for (int round = 0 ; round < p - 1 ; ++round) { + int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ + int soffset = segoffsets[selement]*ext; + int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ + int roffset = segoffsets[relement]*ext; - /* first message come out of sendbuf */ - if(round == 0) { - NBC_Sched_send((char*)sendbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - } else { - NBC_Sched_send((char*)recvbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - } - NBC_Sched_recv((char*)recvbuf+roffset, false, segsizes[relement], datatype, rpeer, schedule); + /* first message come out of sendbuf */ + if (round == 0) { + res = NBC_Sched_send ((char *) sendbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + } else { + res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + } - NBC_Sched_barrier(schedule); - NBC_Sched_op((char*)recvbuf+roffset, false, (char*)sendbuf+roffset, false, (char*)recvbuf+roffset, false, segsizes[relement], datatype, op, schedule); - NBC_Sched_barrier(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } - round++; - } while(round < p-1); + res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } - do { - int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ - int soffset = segoffsets[selement]*ext; - int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ - int roffset = segoffsets[relement]*ext; - - NBC_Sched_send((char*)recvbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - NBC_Sched_recv((char*)recvbuf+roffset, false, segsizes[relement], datatype, rpeer, schedule); - NBC_Sched_barrier(schedule); - round++; - } while (round < 2*p-2); + res = NBC_Sched_op ((char *) recvbuf + roffset, false, (char *) sendbuf + roffset, false, + (char *) recvbuf + roffset, false, segsizes[relement], datatype, op, schedule, + true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } } - return NBC_OK; + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + free (segsizes); + free (segoffsets); + return res; + } + + for (int round = p - 1 ; round < 2 * p - 2 ; ++round) { + int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ + int soffset = segoffsets[selement]*ext; + int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ + int roffset = segoffsets[relement]*ext; + + res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + + res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + + free (segsizes); + free (segoffsets); + + return res; } static inline int allred_sched_linear(int rank, int rsize, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle) { - int res, rpeer; + int res; - if(count == 0) return NBC_OK; + if (0 == count) { + return OMPI_SUCCESS; + } /* send my data to the remote root */ - res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } if (0 == rank) { /* wait for data from the remote root */ res = NBC_Sched_barrier (schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* get data from remote peers and reduce */ - for (rpeer = 1 ; rpeer < rsize ; ++rpeer) { - res = NBC_Sched_recv (0, true, count, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) { + res = NBC_Sched_recv (0, true, count, datatype, rpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } /* exchange our result with the remote root (each root will broadcast to the other's peers) */ - res = NBC_Sched_recv (0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (0, true, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* wait for data from remote root */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* broadcast the result to all remote peers */ - for (rpeer = 1 ; rpeer < rsize ; ++rpeer) { - res = NBC_Sched_send (0, true, count, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) { + res = NBC_Sched_send (0, true, count, datatype, rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index ae9fc42929..166d5d426d 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -15,26 +16,33 @@ */ #include "nbc_internal.h" -static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); +static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, + void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, + void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, + void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) ) { - return 0; + (a->recvtype == b->recvtype)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif @@ -52,27 +60,33 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty char *rbuf, *sbuf, inplace; enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS} alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = MPI_Type_size(sendtype, &sndsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + return res; + } /* algorithm selection */ - a2asize = sndsize*sendcount*p; + a2asize = sndsize * sendcount * p; /* this number is optimized for TCP on odin.cs.indiana.edu */ if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { /* just send as fast as we can if we have less than 8 peers, if the @@ -85,73 +99,104 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty } else alg = NBC_A2A_LINEAR; /*NBC_A2A_PAIRWISE;*/ - if(!inplace) { + if (!inplace) { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); - res = NBC_Copy(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + rank * recvcount * rcvext; + sbuf = (char *) sendbuf + rank * sendcount * sndext; + res = NBC_Copy (sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } /* allocate temp buffer if we need one */ - if(alg == NBC_A2A_DISS) { + if (alg == NBC_A2A_DISS) { /* only A2A_DISS needs buffers */ if(NBC_Type_intrinsic(sendtype)) { - datasize = sndext*sendcount; + datasize = sndext * sendcount; } else { - res = MPI_Pack_size(sendcount, sendtype, comm, &datasize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } + res = MPI_Pack_size (sendcount, sendtype, comm, &datasize); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Pack_size() (%i)", res); + NBC_Return_handle (handle); + return res; + } } + /* allocate temporary buffers */ - if(p % 2 == 0) { - handle->tmpbuf=malloc(datasize*p*2); + if ((p & 1) == 0) { + handle->tmpbuf = malloc (datasize * p * 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ - handle->tmpbuf=malloc(datasize*(p/2+1)*2*2); + handle->tmpbuf = malloc (datasize * (p / 2 + 1) * 2 * 2); + } + + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; } /* phase 1 - rotate n data blocks upwards into the tmpbuffer */ #if OPAL_CUDA_SUPPORT - if(NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs((char *)sendbuf, (char *)recvbuf))) { + if (NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs((char *)sendbuf, (char *)recvbuf))) { #else - if(NBC_Type_intrinsic(sendtype)) { + if (NBC_Type_intrinsic(sendtype)) { #endif /* OPAL_CUDA_SUPPORT */ /* contiguous - just copy (1st copy) */ - memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank)); - if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank)); + memcpy (handle->tmpbuf, (char *) sendbuf + datasize * rank, datasize * (p - rank)); + if (rank != 0) { + memcpy ((char *) handle->tmpbuf + datasize * (p - rank), sendbuf, datasize * rank); + } } else { int pos=0; /* non-contiguous - pack */ - res = MPI_Pack((char*)sendbuf+rank*sendcount*sndext, (p-rank)*sendcount, sendtype, handle->tmpbuf, (p-rank)*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - if(rank != 0) { + res = MPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, handle->tmpbuf, + (p - rank) * datasize, &pos, comm); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error("MPI Error in MPI_Pack() (%i)", res); + NBC_Return_handle (handle); + return res; + } + + if (rank != 0) { pos = 0; - MPI_Pack(sendbuf, rank*sendcount, sendtype, (char*)handle->tmpbuf+datasize*(p-rank), rank*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } + res = MPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) handle->tmpbuf + datasize * (p - rank), + rank * datasize, &pos, comm); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error("MPI Error in MPI_Pack() (%i)", res); + NBC_Return_handle (handle); + return res; + } } } - } else { - handle->tmpbuf=NULL; } #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = (NBC_Alltoall_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + found = (NBC_Alltoall_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], &search); + if (NULL == found) { #endif /* not found - generate new schedule */ - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; switch(alg) { case NBC_A2A_LINEAR: @@ -165,205 +210,272 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty break; } - if (NBC_OK != res) { return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Alltoall_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLTOALL] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]); - /*if(!rank) printf("[%i] removing %i elements - new size: %i \n", rank, SCHED_DICT_UPPER-SCHED_DICT_LOWER, handle->comminfo->NBC_Alltoall_size);*/ + args = (NBC_Alltoall_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_ALLTOALL] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], + &libnbc_module->NBC_Dict_size[NBC_ALLTOALL]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } - /*if(!rank) printf("[%i] added new schedule to tree - number %i\n", rank, handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]);*/ } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } int ompi_coll_libnbc_ialltoall_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); + rsize = ompi_comm_remote_size (comm); - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - for (i = 0; i < rsize; i++) { - /* post all sends */ - sbuf = ((char *) sendbuf) + (i * sendcount * sndext); - res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* post all receives */ - rbuf = ((char *) recvbuf) + (i * recvcount * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = MPI_Type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; } - /*NBC_PRINT_SCHED(*schedule);*/ + res = MPI_Type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + for (int i = 0; i < rsize; i++) { + /* post all sends */ + sbuf = (char *) sendbuf + i * sendcount * sndext; + res = NBC_Sched_send (sbuf, false, sendcount, sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } - return NBC_OK; -} - -static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { - int res, r, sndpeer, rcvpeer; - char *rbuf, *sbuf; - - res = NBC_OK; - if(p < 2) return res; - - for(r=1;rtmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+p/2); + if ((p & 1) == 0) { + rtmpbuf = (char *) handle->tmpbuf + datasize * p; + stmpbuf = (char *) handle->tmpbuf + datasize * (p + p / 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ - virtp = (p/2+1)*2; - rtmpbuf = (char*)handle->tmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+virtp/2); + virtp = (p / 2 + 1) * 2; + rtmpbuf = (char *) handle->tmpbuf + datasize * p; + stmpbuf = (char *) handle->tmpbuf + datasize * (p + virtp / 2); } /* phase 2 - communicate */ - /*printf("[%i] temp buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)datasize*p*(1<tmpbuf, true, datasize, MPI_BYTE, schedule); + res = NBC_Sched_copy((void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, stmpbuf + offset - + (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } offset += datasize; } } - speer = ( rank + r) % p; + speer = (rank + r) % p; /* add p because modulo does not work with negative values */ - rpeer = ((rank - r)+p) % p; + rpeer = ((rank - r) + p) % p; - /*printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, offset, rpeer, (unsigned long)rtmpbuf);*/ - res = NBC_Sched_recv(rtmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rtmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - /*printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, offset, speer, (unsigned long)stmpbuf);*/ - res = NBC_Sched_send(stmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_send (stmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* unpack from buffer */ offset = 0; - for(i=1; itmpbuf, true, datasize, MPI_BYTE, (void*)(long)(i*datasize), true, datasize, MPI_BYTE, schedule); + res = NBC_Sched_copy (rtmpbuf + offset - (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, + (void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + offset += datasize; } } @@ -371,12 +483,15 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* phase 3 - reorder - data is now in wrong order in handle->tmpbuf - * reorder it into recvbuf */ - for(i=0; i * @@ -20,71 +23,95 @@ int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) + struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res= MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + res = MPI_Type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + res = MPI_Type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + (rdispls[rank] * rcvext); - sbuf = ((char *) sendbuf) + (sdispls[rank] * sndext); - res = NBC_Copy(sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if ((sendcounts[rank] != 0) && !inplace) { + rbuf = (char *) recvbuf + rdispls[rank] * rcvext; + sbuf = (char *) sendbuf + sdispls[rank] * sndext; + res = NBC_Copy (sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0 ; i < p ; ++i) { + if (i == rank) { + continue; + } + /* post all sends */ - if(sendcounts[i] != 0) { + if (sendcounts[i] != 0) { sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } + /* post all receives */ - if(recvcounts[i] != 0) { + if (recvcounts[i] != 0) { rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } /* simple linear Alltoallv */ @@ -93,56 +120,74 @@ int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdis MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - MPI_Comm_remote_size (comm, &rsize); + rsize = ompi_comm_remote_size (comm); - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - for (i = 0; i < rsize; i++) { + for (int i = 0; i < rsize; i++) { /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (sendcounts[i] != 0) { + sbuf = (char *) sendbuf + sdispls[i] * sndext; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (recvcounts[i] != 0) { + rbuf = (char *) recvbuf + rdispls[i] * rcvext; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallw.c b/ompi/mca/coll/libnbc/nbc_ialltoallw.c index e3fe6ab42e..39381c1619 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallw.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -22,64 +25,78 @@ int ompi_coll_libnbc_ialltoallw(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res= MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + rdispls[rank]; - sbuf = ((char *) sendbuf) + sdispls[rank]; + if ((sendcounts[rank] != 0) && !inplace) { + rbuf = (char *) recvbuf + rdispls[rank]; + sbuf = (char *) sendbuf + sdispls[rank]; res = NBC_Copy(sbuf, sendcounts[rank], sendtypes[rank], rbuf, recvcounts[rank], recvtypes[rank], comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0; i < p; i++) { + if (i == rank) { + continue; + } + /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + sdispls[i]; - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (sendcounts[i] != 0) { + sbuf = (char *) sendbuf + sdispls[i]; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + rdispls[i]; - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (recvcounts[i] != 0) { + rbuf = (char *) recvbuf + rdispls[i]; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } /* simple linear Alltoallw */ @@ -88,51 +105,59 @@ int ompi_coll_libnbc_ialltoallw_inter (void* sendbuf, int *sendcounts, int *sdis MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; NBC_Schedule *schedule; char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); - MPI_Comm_remote_size (comm, &rsize); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - for (i = 0; i < rsize; i++) { + for (int i = 0 ; i < rsize ; ++i) { /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + sdispls[i]; - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (sendcounts[i] != 0) { + sbuf = (char *) sendbuf + sdispls[i]; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + rdispls[i]; - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (recvcounts[i] != 0) { + rbuf = (char *) recvbuf + rdispls[i]; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index 4016f323bd..ce3a563123 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -18,137 +19,186 @@ int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int round, rank, p, maxround, res, recvpeer, sendpeer; + int rank, p, maxround, res, recvpeer, sendpeer; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - handle->tmpbuf=(void*)malloc(2*sizeof(char)); + handle->tmpbuf = malloc (2); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } #ifdef NBC_CACHE_SCHEDULE /* there only one argument set per communicator -> hang it directly at * the tree-position, NBC_Dict_size[...] is 0 for not initialized and * 1 for initialized. NBC_Dict[...] is a pointer to the schedule in * this case */ - if(handle->comminfo->NBC_Dict_size[NBC_BARRIER] == 0) { + if (libnbc_module->NBC_Dict_size[NBC_BARRIER] == 0) { /* we did not init it yet */ #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - round = -1; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; maxround = (int)ceil((log((double)p)/LOG2)-1); - do { - round++; - sendpeer = (rank + (1<comminfo->NBC_Dict[NBC_BARRIER] = (hb_tree*)schedule; - handle->comminfo->NBC_Dict_size[NBC_BARRIER] = 1; + libnbc_module->NBC_Dict[NBC_BARRIER] = (hb_tree *) schedule; + libnbc_module->NBC_Dict_size[NBC_BARRIER] = 1; } else { /* we found it */ - schedule = (NBC_Schedule*)handle->comminfo->NBC_Dict[NBC_BARRIER]; + handle->schedule = schedule = (NBC_Schedule *) libnbc_module->NBC_Dict[NBC_BARRIER]; } + OBJ_RETAIN(schedule); #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, rsize, peer; + int rank, res, rsize; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - handle->tmpbuf=(void*)malloc(2*sizeof(char)); + handle->tmpbuf = malloc (2); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; if (0 == rank) { - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv (0, true, 1, MPI_BYTE, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv (0, true, 1, MPI_BYTE, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } } } /* synchronize with the remote root */ - res = NBC_Sched_recv (0, true, 1, MPI_BYTE, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (0, true, 1, MPI_BYTE, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_send (0, true, 1, MPI_BYTE, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (0, true, 1, MPI_BYTE, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } if (0 == rank) { /* wait for the remote root */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } /* inform remote peers that all local peers have entered the barrier */ - for (peer = 0 ; peer < rsize ; ++peer) { - res = NBC_Sched_send (0, true, 1, MPI_BYTE, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int peer = 0 ; peer < rsize ; ++peer) { + res = NBC_Sched_send (0, true, 1, MPI_BYTE, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index 0fbc4fc71a..d82b3461b0 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,36 +7,42 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * */ #include "nbc_internal.h" -static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size); +static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype); +static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype); +static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype, int fragsize, int size); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param) { - - if( (a->buffer == b->buffer) && + if ((a->buffer == b->buffer) && (a->count == b->count) && (a->datatype == b->datatype) && (a->root == b->root) ) { - return 0; + return 0; } - if( a->buffer < b->buffer ) { + + if( a->buffer < b->buffer ) { return -1; - } - return +1; + } + + return 1; } #endif int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) + struct mca_coll_base_module_2_1_0_t *module) { int rank, p, res, size, segsize; NBC_Schedule *schedule; @@ -44,48 +51,44 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int #endif enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + return res; + } segsize = 16384; /* algorithm selection */ - if(p <= 4) { + if (p <= 4) { alg = NBC_BCAST_LINEAR; - } else if(size*count < 65536) { + } else if (size * count < 65536) { alg = NBC_BCAST_BINOMIAL; - } else if(size*count < 524288) { + } else if (size * count < 524288) { alg = NBC_BCAST_CHAIN; - segsize = 16384/2; + segsize = 8192; } else { alg = NBC_BCAST_CHAIN; - segsize = 65536/2; + segsize = 32768; } - handle->tmpbuf=NULL; - #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.buffer=buffer; - search.count=count; - search.datatype=datatype; - search.root=root; - found = (NBC_Bcast_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], &search); - if(found == NULL) { + search.buffer = buffer; + search.count = count; + search.datatype = datatype; + search.root = root; + found = (NBC_Bcast_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } switch(alg) { case NBC_BCAST_LINEAR: @@ -98,34 +101,63 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Bcast_args*)malloc(sizeof(NBC_Bcast_args)); - args->buffer=buffer; - args->count=count; - args->datatype=datatype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_BCAST] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], &handle->comminfo->NBC_Dict_size[NBC_BCAST]); + args = (NBC_Bcast_args *) malloc (sizeof (args)); + if (NULL != args) { + args->buffer = buffer; + args->count = count; + args->datatype = datatype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], args, args, 0); + if (0 == res) { + OBJ_RETAIN (schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_BCAST] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], + &libnbc_module->NBC_Dict_size[NBC_BCAST]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } /* better binomial bcast @@ -154,62 +186,73 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int if (vrank == root) rank = 0; \ } static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) { - int maxr, vrank, peer, r, res; + int maxr, vrank, peer, res; maxr = (int)ceil((log((double)p)/LOG2)); RANK2VRANK(rank, vrank, root); /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<= (1 << r)) && (vrank < (1 << (r + 1)))) { + VRANK2RANK(peer, vrank - (1 << r), root); + res = NBC_Sched_recv (buffer, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } /* now send to the right hosts */ - for(r=0; r * @@ -15,54 +18,66 @@ int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, size, peer; + int res, size; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + return res; + } - handle->tmpbuf=NULL; + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } - - if(root != MPI_PROC_NULL) { + if (root != MPI_PROC_NULL) { /* send to all others */ - if(root == MPI_ROOT) { + if (root == MPI_ROOT) { int remsize; - res = MPI_Comm_remote_size(comm, &remsize); - if(MPI_SUCCESS != res) { printf("MPI_Comm_remote_size() failed\n"); return res; } + remsize = ompi_comm_remote_size (comm); - for (peer=0;peersendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) ) { - return 0; + return 0; } + if( a->sendbuf < b->sendbuf ) { return -1; } - return +1; + + return 1; } #endif @@ -51,114 +52,162 @@ int ompi_coll_libnbc_iexscan(void* sendbuf, void* recvbuf, int count, MPI_Dataty #endif char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if (res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - handle = (*coll_req); - - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - if (inplace && rank < p - 1) + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + if (inplace && rank < p - 1) { /* need more buffer space for the inplace case */ handle->tmpbuf = malloc(ext * count * 2); - else + } else { handle->tmpbuf = malloc(ext * count); + } - if (handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + if (handle->tmpbuf == NULL) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } #ifdef NBC_CACHE_SCHEDULE - fprintf (stderr, "NBC_CACHE_SCHEDULE\n"); /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Scan_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &search); - if (found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if (res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; if (rank != 0) { - if (inplace && rank < p - 1) + if (inplace && rank < p - 1) { /* if sendbuf == recvbuf do not clobber the send buffer until it has been combined * with the incoming data. */ - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, rank-1, schedule); - else - res = NBC_Sched_recv(recvbuf, false, count, datatype, rank-1, schedule); + res = NBC_Sched_recv ((void *) (ext * count), true, count, datatype, rank-1, schedule, false); + } else { + res = NBC_Sched_recv (recvbuf, false, count, datatype, rank-1, schedule, false); + } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } if (rank < p - 1) { /* we have to wait until we have the data */ res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } /* perform the reduce in my temporary buffer */ - if (inplace) - res = NBC_Sched_op(0, true, sendbuf, false, (void *)(ext * count), true, count, datatype, op, schedule); - else - res = NBC_Sched_op(0, true, sendbuf, false, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after */ + if (inplace) { + res = NBC_Sched_op (0, true, sendbuf, false, (void *)(ext * count), true, count, + datatype, op, schedule, true); + } else { + res = NBC_Sched_op (0, true, sendbuf, false, recvbuf, false, count, datatype, op, + schedule, true); + } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } /* send reduced data onward */ - res = NBC_Sched_send(0, true, count, datatype, rank + 1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (0, true, count, datatype, rank + 1, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - if (inplace) + if (inplace) { /* copy the received data into the receive buffer */ - NBC_Sched_copy ((void *)(ext * count), true, count, datatype, recvbuf, false, count, datatype, schedule); + res = NBC_Sched_copy ((void *)(ext * count), true, count, datatype, recvbuf, + false, count, datatype, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } } } else if (p > 1) { - res = NBC_Sched_send(sendbuf, false, count, datatype, 1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, count, datatype, 1, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Scan_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &handle->comminfo->NBC_Dict_size[NBC_EXSCAN]); + args = (NBC_Scan_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], + &libnbc_module->NBC_Dict_size[NBC_EXSCAN]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igather.c b/ompi/mca/coll/libnbc/nbc_igather.c index de63f8fd2f..dcc40b3f09 100644 --- a/ompi/mca/coll/libnbc/nbc_igather.c +++ b/ompi/mca/coll/libnbc/nbc_igather.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,180 +21,223 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif -int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Gather_args *args, *found, search; -#endif NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + if (rank == root) { - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = MPI_Type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } } - handle->tmpbuf = NULL; if (inplace) { - sendcount = recvcount; - sendtype = recvtype; + sendcount = recvcount; + sendtype = recvtype; } else if (rank == root) { rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); /* if I am the root - just copy the message (only without MPI_IN_PLACE) */ res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - search.root=root; - found = (NBC_Gather_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], &search); - if(found == NULL) { -#endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + NBC_Gather_args *args, *found, search; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* search schedule in communicator specific tree */ + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + search.root = root; + found = (NBC_Gather_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], + &search); + if (NULL == found) { +#endif + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ - if(rank != root) { + if (rank != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_GATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], &handle->comminfo->NBC_Dict_size[NBC_GATHER]); + args = (NBC_Gather_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_GATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], + &libnbc_module->NBC_Dict_size[NBC_GATHER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -int ompi_coll_libnbc_igather_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_igather_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i, rsize; + int res, rsize; MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); if (root == MPI_ROOT) { res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - } - handle->tmpbuf = NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* send to root */ - if(root != MPI_ROOT && root != MPI_PROC_NULL) { - /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { - rbuf = ((char *)recvbuf) + (i * recvcount * rcvext); - /* root receives message to the right buffer */ - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + /* send to root */ + if (root != MPI_ROOT && root != MPI_PROC_NULL) { + /* send msg to root */ + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } else if (MPI_ROOT == root) { + for (int i = 0 ; i < rsize ; ++i) { + rbuf = ((char *)recvbuf) + (i * recvcount * rcvext); + /* root receives message to the right buffer */ + res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + } - return NBC_OK; + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c index e4674e1e53..48c918e958 100644 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ b/ompi/mca/coll/libnbc/nbc_igatherv.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -24,118 +27,151 @@ int ompi_coll_libnbc_igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtyp void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + if (rank == root) { res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } } - handle->tmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ - if(rank != root) { + if (rank != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;itmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { - rbuf = ((char *)recvbuf) + (displs[i]*rcvext); + for (int i = 0 ; i < rsize ; ++i) { + rbuf = (char *) recvbuf + displs[i] * rcvext; /* root receives message to the right buffer */ - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c index 88825becc9..21e6f46a11 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,7 +21,7 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_allgather_args_compare(NBC_Ineighbor_allgather_args *a, NBC_Ineighbor_allgather_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && @@ -26,10 +29,12 @@ int NBC_Ineighbor_allgather_args_compare(NBC_Ineighbor_allgather_args *a, NBC_In (a->rtype == b->rtype) ) { return 0; } + if( a->sbuf < b->sbuf ) { return -1; } - return +1; + + return 1; } #endif @@ -37,129 +42,127 @@ int NBC_Ineighbor_allgather_args_compare(NBC_Ineighbor_allgather_args *a, NBC_In int ompi_coll_libnbc_ineighbor_allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint sndext, rcvext; + int res, indegree, outdegree, *srcs, *dsts; + MPI_Aint rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; + + res = MPI_Type_extent (rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_allgather_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_allgather_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_allgather_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - handle->tmpbuf = malloc(indegree*rcvext*rcount); - - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)0+i*rcount*rcvext, true, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+i*rcount*rcvext, true, rcount, rtype, (char*)rbuf+i*rcount*rcvext, false, rcount, rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)rbuf+i*rcount*rcvext, false, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + for (int i = 0 ; i < indegree ; ++i) { + if (MPI_PROC_NULL != srcs[i]) { + res = NBC_Sched_recv ((char *) rbuf + i * rcount * rcvext, true, rcount, rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (MPI_PROC_NULL != dsts[i]) { + res = NBC_Sched_send ((char *) sbuf, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } + + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_allgather_args*)malloc(sizeof(NBC_Ineighbor_allgather_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER]); - } + args = (NBC_Ineighbor_allgather_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start(handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c index b44cd893c8..dc579b5f8f 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,18 +21,20 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_allgatherv_args_compare(NBC_Ineighbor_allgatherv_args *a, NBC_Ineighbor_allgatherv_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && (a->rtype == b->rtype) ) { - return 0; + return 0; } + if( a->sbuf < b->sbuf ) { return -1; } - return +1; + + return 1; } #endif @@ -38,138 +43,128 @@ int ompi_coll_libnbc_ineighbor_allgatherv(void *sbuf, int scount, MPI_Datatype s int *rcounts, int *displs, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint sndext, rcvext; + int res, indegree, outdegree, *srcs, *dsts; + MPI_Aint rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; + + res = MPI_Type_extent(rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_allgatherv_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_allgatherv_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_allgatherv_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + res = NBC_Comm_neighbors(comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - int sumrcounts=0; - int offset=0; - for(i=0; itmpbuf = malloc(rcvext*sumrcounts); - - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+offset, true, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - offset=0; - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+offset, true, rcounts[i], rtype, (char*)rbuf+displs[i]*rcvext, false, rcounts[i], rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - } else { /* non INPLACE case */ - - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+displs[i]*rcvext, false, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + /* simply loop over neighbors and post send/recv operations */ + for (int i = 0 ; i < indegree ; ++i) { + if (srcs[i] != MPI_PROC_NULL) { + res = NBC_Sched_recv ((char *) rbuf + displs[i] * rcvext, false, rcounts[i], rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + free (dsts); + OBJ_RELEASE(schedule); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } + + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_allgatherv_args*)malloc(sizeof(NBC_Ineighbor_allgatherv_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV]); + args = (NBC_Ineighbor_allgatherv_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if(++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c index b63283190c..d99e5b7ffd 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -36,129 +39,133 @@ int NBC_Ineighbor_alltoall_args_compare(NBC_Ineighbor_alltoall_args *a, NBC_Inei int ompi_coll_libnbc_ineighbor_alltoall(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; + int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + NBC_Schedule *schedule; res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + + res = MPI_Type_extent(rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - char inplace; - NBC_Schedule *schedule; #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_alltoall_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoall_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoall_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + res = NBC_Comm_neighbors(comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = indegree ? (int*)malloc(sizeof(int)*indegree) : NULL; - dsts = outdegree ? (int*)malloc(sizeof(int)*outdegree) : NULL; - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - handle->tmpbuf = malloc(indegree*rcvext*rcount); - - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)0+i*rcount*rcvext, true, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf+i*scount*sndext, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+i*rcount*rcvext, true, rcount, rtype, (char*)rbuf+i*rcount*rcvext, false, rcount, rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)rbuf+i*rcount*rcvext, false, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf+i*scount*sndext, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + for (int i = 0 ; i < indegree ; ++i) { + if (MPI_PROC_NULL != srcs[i]) { + res = NBC_Sched_recv ((char *) rbuf + i * rcount * rcvext, true, rcount, rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (MPI_PROC_NULL != dsts[i]) { + res = NBC_Sched_send ((char *) sbuf + i * scount * sndext, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } + + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoall_args*)malloc(sizeof(NBC_Ineighbor_alltoall_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL]); + args = (NBC_Ineighbor_alltoall_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c index 8d79d46cbe..806cf03e69 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,18 +21,20 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_alltoallv_args_compare(NBC_Ineighbor_alltoallv_args *a, NBC_Ineighbor_alltoallv_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && (a->rtype == b->rtype) ) { - return 0; + return 0; } - if( a->sbuf < b->sbuf ) { + + if (a->sbuf < b->sbuf) { return -1; } - return +1; + + return 1; } #endif @@ -38,138 +43,134 @@ int ompi_coll_libnbc_ineighbor_alltoallv(void *sbuf, int *scounts, int *sdispls, void *rbuf, int *rcounts, int *rdispls, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; + int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; + + res = MPI_Type_extent (stype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + + res = MPI_Type_extent (rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_alltoallv_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoallv_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoallv_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - int sumrcounts=0; - int offset=0; - for(i=0; itmpbuf = malloc(rcvext*sumrcounts); - - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+offset, true, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdispls[i]*sndext, false, scounts[i], stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - offset=0; - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+offset, true, rcounts[i], rtype, (char*)rbuf+rdispls[i]*rcvext, false, rcounts[i], rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+rdispls[i]*rcvext, false, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdispls[i]*sndext, false, scounts[i], stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + /* simply loop over neighbors and post send/recv operations */ + for (int i = 0 ; i < indegree ; ++i) { + if (srcs[i] != MPI_PROC_NULL) { + res = NBC_Sched_recv ((char *) rbuf + rdispls[i] * rcvext, false, rcounts[i], rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf + sdispls[i] * sndext, false, scounts[i], stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } + + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoallv_args*)malloc(sizeof(NBC_Ineighbor_alltoallv_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV]); + args = (NBC_Ineighbor_alltoallv_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c index 55b116d3d5..17d3b2cac2 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,18 +21,20 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_alltoallw_args_compare(NBC_Ineighbor_alltoallw_args *a, NBC_Ineighbor_alltoallw_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && - (a->rtype == b->rtype) ) { - return 0; + (a->rtype == b->rtype)) { + return 0; } - if( a->sbuf < b->sbuf ) { + + if (a->sbuf < b->sbuf) { return -1; } - return +1; + + return 1; } #endif @@ -37,137 +42,119 @@ int ompi_coll_libnbc_ineighbor_alltoallw(void *sbuf, int *scounts, MPI_Aint *sdi void *rbuf, int *rcounts, MPI_Aint *rdisps, MPI_Datatype *rtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint *sndexts, *rcvexts; + int res, indegree, outdegree, *srcs, *dsts; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; + #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_alltoallw_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoallw_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], &search); + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoallw_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], + &search); if(found == NULL) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - sndexts = (MPI_Aint*)malloc(sizeof(MPI_Aint)*outdegree); - for(i=0; itmpbuf = malloc(sumrbytes); - - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+rdisps[i], true, rcounts[i], rtypes[i], srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+rdisps[i], true, rcounts[i], rtypes[i], (char*)rbuf+rdisps[i], false, rcounts[i], rtypes[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+rdisps[i], false, rcounts[i], rtypes[i], srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + /* simply loop over neighbors and post send/recv operations */ + for (int i = 0 ; i < indegree ; ++i) { + if (srcs[i] != MPI_PROC_NULL) { + res = NBC_Sched_recv ((char *) rbuf + rdisps[i], false, rcounts[i], rtypes[i], srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf + sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } + + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoallw_args*)malloc(sizeof(NBC_Ineighbor_alltoallw_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW]); - } + args = (NBC_Ineighbor_alltoallw_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - return NBC_OK; + res = NBC_Start(handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index e5c2165e91..8efa056871 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * */ #ifndef __NBC_INTERNAL_H__ @@ -30,6 +33,7 @@ #include "ompi/include/ompi/constants.h" #include "ompi/request/request.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/communicator/communicator.h" #include #include @@ -82,66 +86,74 @@ typedef enum { /* the send argument struct */ typedef struct { - void *buf; - char tmpbuf; + NBC_Fn_type type; int count; + void *buf; MPI_Datatype datatype; int dest; + char tmpbuf; } NBC_Args_send; /* the receive argument struct */ typedef struct { - void *buf; - char tmpbuf; + NBC_Fn_type type; int count; + void *buf; MPI_Datatype datatype; + char tmpbuf; int source; } NBC_Args_recv; /* the operation argument struct */ typedef struct { - void *buf1; + NBC_Fn_type type; char tmpbuf1; - void *buf2; char tmpbuf2; - void *buf3; char tmpbuf3; - int count; + void *buf1; + void *buf2; + void *buf3; MPI_Op op; MPI_Datatype datatype; + int count; } NBC_Args_op; /* the copy argument struct */ typedef struct { - void *src; - char tmpsrc; + NBC_Fn_type type; int srccount; - MPI_Datatype srctype; + void *src; void *tgt; - char tmptgt; - int tgtcount; + MPI_Datatype srctype; MPI_Datatype tgttype; + int tgtcount; + char tmpsrc; + char tmptgt; } NBC_Args_copy; /* unpack operation arguments */ typedef struct { - void *inbuf; - char tmpinbuf; + NBC_Fn_type type; int count; - MPI_Datatype datatype; + void *inbuf; void *outbuf; + MPI_Datatype datatype; + char tmpinbuf; char tmpoutbuf; } NBC_Args_unpack; /* internal function prototypes */ -int NBC_Sched_create(NBC_Schedule* schedule); -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule); -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule); -int NBC_Sched_op(void* buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule); -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule); -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule); -int NBC_Sched_barrier(NBC_Schedule *schedule); -int NBC_Sched_commit(NBC_Schedule *schedule); +int NBC_Sched_send (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_op (void* buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, + MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, + NBC_Schedule *schedule, bool barrier); + +int NBC_Sched_barrier (NBC_Schedule *schedule); +int NBC_Sched_commit (NBC_Schedule *schedule); #ifdef NBC_CACHE_SCHEDULE /* this is a dummy structure which is used to get the schedule out of @@ -247,12 +259,21 @@ void NBC_SchedCache_args_delete_key_dummy(void *k); int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule); int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *module); +void NBC_Return_handle(ompi_coll_libnbc_request_t *request); static inline int NBC_Type_intrinsic(MPI_Datatype type); -static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm); int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); /* some macros */ +static inline void NBC_Error (char *format, ...) { + va_list args; + + va_start (args, format); + vfprintf (stderr, format, args); + fprintf (stderr, "\n"); + va_end (args); +} + /* a schedule has the following format: * [schedule] ::= [size][round-schedule][delimiter][round-schedule][delimiter]...[end] * [size] ::= size of the schedule (int) @@ -277,83 +298,68 @@ int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); * schedule. A round has the format: * [num]{[type][type-args]} * e.g. [(int)2][(NBC_Fn_type)SEND][(NBC_Args_send)SEND-ARGS][(NBC_Fn_type)RECV][(NBC_Args_recv)RECV-ARGS] */ -#define NBC_GET_ROUND_SIZE(schedule, size) \ - { \ - int num; \ - char *p = (char*) schedule; \ - NBC_Fn_type type; \ - int i; \ - \ - NBC_GET_BYTES(p,num); \ - /*NBC_DEBUG(10, "GET_ROUND_SIZE got %i elements\n", num); */\ - for (i=0; isize; } /* increase the size of a schedule by size bytes */ -#define NBC_INC_SIZE(schedule, size) \ -{ \ - *(int*)schedule+=size; \ +static inline void nbc_schedule_inc_size (NBC_Schedule *schedule, int size) { + schedule->size += size; } /* increments the number of operations in the last round */ -#define NBC_INC_NUM_ROUND(schedule) \ -{ \ - int total_size, num_last_round; \ - long round_size; \ - char *ptr, *lastround; \ - \ - NBC_GET_SIZE(schedule, total_size); \ - \ - /* ptr begins at first round (first int is overall size) */ \ - ptr = (char*)schedule+sizeof(int); \ - lastround = ptr; \ - while ((long)ptr-(long)schedule < total_size) { \ - NBC_GET_ROUND_SIZE(ptr, round_size); \ - /*printf("got round_size %i\n", round_size);*/ \ - lastround = ptr; \ - ptr += round_size; \ - ptr += sizeof(char); /* barrier delimiter */ \ - /*printf("(long)ptr-(long)schedule=%li, total_size=%i\n", (long)ptr-(long)schedule, total_size); */\ - } \ - /*printf("lastround count is at offset: %li\n", (long)lastround-(long)schedule);*/ \ - /* increment the count in the last round of the schedule */ \ - memcpy(&num_last_round, lastround, sizeof(int)); \ - num_last_round++; \ - memcpy(lastround, &num_last_round, sizeof(int)); \ +static inline void nbc_schedule_inc_round (NBC_Schedule *schedule) { + int last_round_num; + char *lastround; + + lastround = schedule->data + schedule->current_round_offset; + + /* increment the count in the last round of the schedule (memcpy is used + * to protect against unaligned access) */ + memcpy (&last_round_num, lastround, sizeof (last_round_num)); + ++last_round_num; + memcpy (lastround, &last_round_num, sizeof (last_round_num)); } /* NBC_PRINT_ROUND prints a round in a schedule. A round has the format: @@ -430,15 +436,6 @@ int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); } \ } -#define NBC_CHECK_NULL(ptr) \ -{ \ - if(ptr == NULL) { \ - printf("realloc error :-(\n"); \ - } \ -} - - - /* #define NBC_DEBUG(level, ...) {} */ @@ -498,27 +495,48 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void * /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ res = ompi_datatype_get_extent(srctype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + memcpy(tgt, src, srccount*ext); } else { /* we have to pack and unpack */ res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i:%i)\n", res, size); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Pack_size() (%i:%i)", res, size); + return res; + } + if (0 == size) { - return NBC_OK; + return OMPI_SUCCESS; } packbuf = malloc(size); - if (NULL == packbuf) { printf("Error in malloc()\n"); return res; } + if (NULL == packbuf) { + NBC_Error("Error in malloc()"); + return res; + } + pos=0; res = MPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } + + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Pack() (%i)", res); + free (packbuf); + return res; + } + pos=0; res = MPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } free(packbuf); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Unpack() (%i)", res); + return res; + } } - return NBC_OK; + return OMPI_SUCCESS; } static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) { @@ -533,19 +551,29 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ res = ompi_datatype_get_extent (srctype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + memcpy(tgt, src, srccount * ext); } else { /* we have to unpack */ res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - pos=0; + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Pack_size() (%i)", res); + return res; + } + pos = 0; res = MPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Unpack() (%i)", res); + return res; + } } - return NBC_OK; + return OMPI_SUCCESS; } /* deletes elements from dict until low watermark is reached */ @@ -576,8 +604,8 @@ static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) { } \ } -int NBC_Comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted); -int NBC_Comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); +int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree); +int NBC_Comm_neighbors (ompi_communicator_t *comm, int **sources, int *source_count, int **destinations, int *dest_count); #ifdef __cplusplus } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index a4dd0eb423..68285508cb 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -14,26 +15,31 @@ */ #include "nbc_internal.h" -static inline int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); +static inline int red_sched_binomial (int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int red_sched_chain (int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); -static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int red_sched_linear (int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) { - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; } - return +1; + + return 1; } #endif @@ -45,68 +51,86 @@ int ompi_coll_libnbc_ireduce(void* sendbuf, void* recvbuf, int count, MPI_Dataty MPI_Aint ext; NBC_Schedule *schedule; char *redbuf=NULL, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Reduce_args *args, *found, search; -#endif enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_size() (%i)", res); + return res; + } /* only one node -> copy data */ - if((p == 1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if ((p == 1) && !inplace) { + res = NBC_Copy (sendbuf, count, datatype, recvbuf, count, datatype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + *request = &ompi_request_empty; + return OMPI_SUCCESS; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } /* algorithm selection */ - if(p > 4 || size*count < 65536) { + if (p > 4 || size * count < 65536) { alg = NBC_RED_BINOMIAL; if(rank == root) { /* root reduces in receivebuffer */ - handle->tmpbuf = malloc(ext*count); + handle->tmpbuf = malloc (ext * count); } else { /* recvbuf may not be valid on non-root nodes */ - handle->tmpbuf = malloc(ext*count*2); - redbuf = ((char*)handle->tmpbuf)+(ext*count); + handle->tmpbuf = malloc (ext * count * 2); + redbuf = (char*) handle->tmpbuf + ext * count; } } else { - handle->tmpbuf = malloc(ext*count); + handle->tmpbuf = malloc (ext * count); alg = NBC_RED_CHAIN; segsize = 16384/2; } - if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } #ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - search.root=root; - found = (NBC_Reduce_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], &search); - if(found == NULL) { -#endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + NBC_Reduce_args *args, *found, search; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* search schedule in communicator specific tree */ + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + search.root = root; + found = (NBC_Reduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], &search); + if (NULL == found) { +#endif + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; switch(alg) { case NBC_RED_BINOMIAL: @@ -116,37 +140,59 @@ int ompi_coll_libnbc_ireduce(void* sendbuf, void* recvbuf, int count, MPI_Dataty res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Reduce_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for Reduce */ - if(++handle->comminfo->NBC_Dict_size[NBC_REDUCE] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], &handle->comminfo->NBC_Dict_size[NBC_REDUCE]); + args = (NBC_Reduce_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for Reduce */ + if (++libnbc_module->NBC_Dict_size[NBC_REDUCE] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], + &libnbc_module->NBC_Dict_size[NBC_REDUCE]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, @@ -156,40 +202,56 @@ int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_ NBC_Schedule *schedule; MPI_Aint ext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = MPI_Type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - handle->tmpbuf = malloc(ext*count); - if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + handle->tmpbuf = malloc (ext * count); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, count, datatype, op, schedule, handle); - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } @@ -222,156 +284,180 @@ int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_ if (vrank == 0) rank = root; \ if (vrank == root) rank = 0; \ } -static inline int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle) { - int firstred, vrank, vpeer, peer, res, maxr, r; +static inline int red_sched_binomial (int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle) { + int vrank, vpeer, peer, res, maxr; RANK2VRANK(rank, vrank, root); maxr = (int)ceil((log((double)p)/LOG2)); - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_recv (0, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* perform the reduce in my local buffer */ - if(firstred) { - if(rank == root) { + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred) { + if (rank == root) { /* root is the only one who reduces in the receivebuffer * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule, true); } else { /* all others may not have a receive buffer * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op((char *)redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op ((char *) redbuf - (intptr_t) handle->tmpbuf, true, sendbuf, false, 0, true, count, + datatype, op, schedule, true); } firstred = 0; } else { if(rank == root) { /* root is the only one who reduces in the receivebuffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule, true); } else { /* all others may not have a receive buffer */ - res = NBC_Sched_op((char *)redbuf-(unsigned long)handle->tmpbuf, true, (char *)redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op ((char *) redbuf - (intptr_t) handle->tmpbuf, true, (char *) redbuf - (intptr_t) handle->tmpbuf, + true, 0, true, count, datatype, op, schedule, true); } } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } else { /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); + vpeer = vrank - (1 << (r - 1)); VRANK2RANK(peer, vpeer, root) - if(firstred) { + if (firstred) { /* we did not reduce anything */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { /* we have to use the redbuf the root (which works in receivebuf) is never sending .. */ - res = NBC_Sched_send((char *)redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); + res = NBC_Sched_send ((char *) redbuf - (intptr_t) handle->tmpbuf, true, count, datatype, peer, schedule, + false); } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* leave the game */ break; } } - return NBC_OK; + return OMPI_SUCCESS; } /* chain send ... */ -static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { - int res, vrank, rpeer, speer, numfrag, fragnum, fragcount, thiscount; +static inline int red_sched_chain (int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { + int res, vrank, rpeer, speer, numfrag, fragcount, thiscount; long offset; RANK2VRANK(rank, vrank, root); VRANK2RANK(rpeer, vrank+1, root); VRANK2RANK(speer, vrank-1, root); - if(count == 0) return NBC_OK; + if (0 == count) { + return OMPI_SUCCESS; + } - numfrag = count*size/fragsize; - if((count*size)%fragsize != 0) numfrag++; - fragcount = count/numfrag; - /*printf("numfrag: %i, count: %i, size: %i, fragcount: %i\n", numfrag, count, size, fragcount);*/ + numfrag = count * size / fragsize; + if ((count * size) % fragsize != 0) { + numfrag++; + } - for(fragnum = 0; fragnum < numfrag; fragnum++) { - offset = fragnum*fragcount*ext; + fragcount = count / numfrag; + + for (int fragnum = 0 ; fragnum < numfrag ; ++fragnum) { + offset = fragnum * fragcount * ext; thiscount = fragcount; - if(fragnum == numfrag-1) { + if(fragnum == numfrag - 1) { /* last fragment may not be full */ - thiscount = count-fragcount*fragnum; + thiscount = count - fragcount * fragnum; } /* last node does not recv */ - if(vrank != p-1) { - res = NBC_Sched_recv((char*)offset, true, thiscount, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); + if (vrank != p-1) { + res = NBC_Sched_recv ((char *) offset, true, thiscount, datatype, rpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* root reduces into receivebuf */ if(vrank == 0) { - res = NBC_Sched_op((char*)recvbuf+offset, false, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); + res = NBC_Sched_op ((char *) recvbuf + offset, false, (char *) sendbuf + offset, false, (char *) offset, true, + thiscount, datatype, op, schedule, true); } else { - res = NBC_Sched_op((char*)offset, true, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); + res = NBC_Sched_op ((char *) offset, true, (char *) sendbuf + offset, false, (char *) offset, true, thiscount, + datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - res = NBC_Sched_barrier(schedule); } /* root does not send */ - if(vrank != 0) { + if (vrank != 0) { /* rank p-1 has to send out of sendbuffer :) */ - if(vrank == p-1) { - res = NBC_Sched_send((char*)sendbuf+offset, false, thiscount, datatype, speer, schedule); + /* the barrier here seems awkward but isn't!!!! */ + if (vrank == p-1) { + res = NBC_Sched_send ((char *) sendbuf + offset, false, thiscount, datatype, speer, schedule, true); } else { - res = NBC_Sched_send((char*)offset, true, thiscount, datatype, speer, schedule); + res = NBC_Sched_send ((char *) offset, true, thiscount, datatype, speer, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* this barrier here seems awkward but isn't!!!! */ - res = NBC_Sched_barrier(schedule); } } - return NBC_OK; + return OMPI_SUCCESS; } /* simple linear algorithm for intercommunicators */ -static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { - int res, peer; +static inline int red_sched_linear (int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { + int res; - if(count == 0) return NBC_OK; - - if (MPI_ROOT == root) { - res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier (schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv (0, true, count, datatype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - } - } else if (MPI_PROC_NULL != root) { - res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (0 == count) { + return OMPI_SUCCESS; } - return NBC_OK; + if (MPI_ROOT == root) { + res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv (0, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } else if (MPI_PROC_NULL != root) { + res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index 1fd89a8181..23fe1b599a 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -37,220 +38,285 @@ int ompi_coll_libnbc_ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, maxr, p, r, res, count, offset, firstred; + int peer, rank, maxr, p, res, count; MPI_Aint ext; char *redbuf, *sbuf, inplace; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - if(p==1) { - if(!inplace) { - /* single node not in_place: copy data to recvbuf */ - res = NBC_Copy(sendbuf, recvcounts[0], datatype, recvbuf, recvcounts[0], datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - /* manually complete the request */ - (*request)->req_status.MPI_ERROR = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_complete(*request, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - return NBC_OK; + res = MPI_Type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; } - handle = (*coll_req); - - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - maxr = (int)ceil((log((double)p)/LOG2)); - count = 0; - for(r=0;rtmpbuf = malloc(ext*count*2); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + if (p == 1 || 0 == count) { + if (!inplace) { + /* single node not in_place: copy data to recvbuf */ + res = NBC_Copy(sendbuf, recvcounts[0], datatype, recvbuf, recvcounts[0], datatype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } - redbuf = ((char*)handle->tmpbuf)+(ext*count); + *request = &ompi_request_empty; + return OMPI_SUCCESS; + } - firstred = 1; - for(r=1; r<=maxr; r++) { - if((rank % (1<tmpbuf = malloc (ext * count * 2); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + redbuf = (char *) handle->tmpbuf + ext * count; + + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; + + for (int r = 1, firstred = 1 ; r <= maxr ; ++r) { + if ((rank % (1 << r)) == 0) { /* we have to receive this round */ - peer = rank + (1<<(r-1)); - if(peertmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + peer = rank + (1 << (r - 1)); + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { + res = NBC_Sched_recv(0, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred) { /* take reduce data from the sendbuf in the first round -> save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (redbuf - (intptr_t) handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, + op, schedule, true); firstred = 0; } else { /* perform the reduce in my local buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (redbuf - (intptr_t) handle->tmpbuf, true, redbuf - (intptr_t) handle->tmpbuf, true, + 0, true, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } } } else { /* we have to send this round */ - peer = rank - (1<<(r-1)); - if(firstred) { + peer = rank - (1 << (r - 1)); + if (firstred) { /* we have to send the senbuf */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { /* we send an already reduced value from redbuf */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); + res = NBC_Sched_send (redbuf - (intptr_t) handle->tmpbuf, true, count, datatype, peer, schedule, false); } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + /* leave the game */ break; } } res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } /* rank 0 is root and sends - all others receive */ - if(rank != 0) { - res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - - if(rank == 0) { - offset = 0; - for(r=1;rtmpbuf, true, recvcounts[r], datatype, r, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sbuf - (intptr_t) handle->tmpbuf, true, recvcounts[r], datatype, r, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + + res = NBC_Sched_copy (redbuf - (intptr_t) handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, + recvcounts[0], datatype, schedule, false); + } else { + res = NBC_Sched_recv (recvbuf, false, recvcounts[rank], datatype, 0, schedule, false); } - /*NBC_PRINT_SCHED(*schedule);*/ + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } -int ompi_coll_libnbc_ireduce_scatter_inter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, - MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, r, res, count, rsize, offset; +int ompi_coll_libnbc_ireduce_scatter_inter (void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, + MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int rank, res, count, rsize; MPI_Aint ext; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + res = MPI_Type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } count = 0; - for (r = 0 ; r < rsize ; ++r) count += recvcounts[r]; + for (int r = 0 ; r < rsize ; ++r) { + count += recvcounts[r]; + } - handle->tmpbuf = malloc(2 * ext * count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + if (count > 0) { + handle->tmpbuf = malloc (2 * ext * count); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; /* send my data to the remote root */ - res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } if (0 == rank) { - res = NBC_Sched_recv((void *) 0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv ((void *) 0, true, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, peer, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv ((void *)(ext * count), true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + res = NBC_Sched_op ((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, datatype, + op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } /* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */ - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv ((void *)(ext * count), true, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_send((void *) 0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_send ((void *) 0, true, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } /* scatter */ - for (peer = 0, offset = ext * count ; peer < rsize ; ++peer) { - res = NBC_Sched_send((void *)(uintptr_t) offset, true, recvcounts[peer], datatype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int peer = 0, offset = ext * count ; peer < rsize ; ++peer) { + res = NBC_Sched_send ((void *)(uintptr_t) offset, true, recvcounts[peer], datatype, peer, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + offset += recvcounts[peer] * ext; } } /* receive my block */ - res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (recvbuf, false, recvcounts[rank], datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c new file mode 100644 index 0000000000..57cee6c61a --- /dev/null +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -0,0 +1,318 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * Author(s): Torsten Hoefler + * + */ +#include "nbc_internal.h" + +/* an reduce_csttare schedule can not be cached easily because the contents + * ot the recvcount value may change, so a comparison of the address + * would not be sufficient ... we simply do not cache it */ + +/* binomial reduce to rank 0 followed by a linear scatter ... + * + * Algorithm: + * pairwise exchange + * round r: + * grp = rank % 2^r + * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value + * if grp == 1: send to rank - 2^(r-1) and exit function + * + * do this for R=log_2(p) rounds + * + */ + +int ompi_coll_libnbc_ireduce_scatter_block(void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int peer, rank, maxr, p, res, count; + MPI_Aint ext; + char *redbuf, *sbuf, inplace; + NBC_Schedule *schedule; + NBC_Handle *handle; + ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + + NBC_IN_PLACE(sendbuf, recvbuf, inplace); + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = MPI_Type_extent(datatype, &ext); + if (MPI_SUCCESS != res || 0 == ext) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i:%i)", res, (int) ext); + return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OMPI_SUCCESS != res) { + return res; + } + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; + + maxr = (int)ceil((log((double)p)/LOG2)); + + count = p * recvcount; + + if (0 < count) { + handle->tmpbuf = malloc (ext*count*2); + if (NULL == handle->tmpbuf) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + OBJ_RELEASE(schedule); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + redbuf = (char *) handle->tmpbuf + ext * count; + + /* copy data to redbuf if we only have a single node */ + if ((p == 1) && !inplace) { + res = NBC_Copy (sendbuf, count, datatype, redbuf, count, datatype, comm); + if (OMPI_SUCCESS != res) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + *request = &ompi_request_empty; + return OMPI_SUCCESS; + } + + for (int r = 1, firstred = 1 ; r <= maxr; ++r) { + if ((rank % (1 << r)) == 0) { + /* we have to receive this round */ + peer = rank + (1 << (r - 1)); + if (peer < p) { + /* we have to wait until we have the data */ + res = NBC_Sched_recv (0, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + if (firstred) { + /* take reduce data from the sendbuf in the first round -> save copy */ + res = NBC_Sched_op (redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, + datatype, op, schedule, true); + firstred = 0; + } else { + /* perform the reduce in my local buffer */ + res = NBC_Sched_op (redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, + true, 0, true, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + } else { + /* we have to send this round */ + peer = rank - (1 << (r - 1)); + if(firstred) { + /* we have to send the senbuf */ + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, true); + } else { + /* we send an already reduced value from redbuf */ + res = NBC_Sched_send (redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* leave the game */ + break; + } + } + + res = NBC_Sched_barrier(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* rank 0 is root and sends - all others receive */ + if (rank != 0) { + res = NBC_Sched_recv (recvbuf, false, recvcount, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } else { + for (int r = 1, offset = 0 ; r < p ; ++r) { + offset += recvcount; + sbuf = ((char *)redbuf) + (offset*ext); + /* root sends the right buffer to the right receiver */ + res = NBC_Sched_send (sbuf-(unsigned long)handle->tmpbuf, true, recvcount, datatype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + + res = NBC_Sched_copy (redbuf-(unsigned long)handle->tmpbuf, true, recvcount, datatype, recvbuf, false, recvcount, + datatype, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + /* tmpbuf is freed with the handle */ + return OMPI_SUCCESS; +} + +int ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, + ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) { + int rank, res, count, rsize; + MPI_Aint ext; + NBC_Schedule *schedule; + NBC_Handle *handle; + ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); + + res = MPI_Type_extent (dtype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + count = rcount * rsize; + + if (count > 0) { + handle->tmpbuf = malloc (2 * ext * count); + if (NULL == handle->tmpbuf) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; + + /* send my data to the remote root */ + res = NBC_Sched_send (sbuf, false, count, dtype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + if (0 == rank) { + res = NBC_Sched_recv ((void *) 0, true, count, dtype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv ((void *)(ext * count), true, count, dtype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Sched_op ((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, dtype, op, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + + /* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */ + res = NBC_Sched_recv ((void *)(ext * count), true, count, dtype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Sched_send ((void *) 0, true, count, dtype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* scatter */ + for (int peer = 0 ; peer < rsize ; ++peer) { + res = NBC_Sched_send ((void *)(ext * (count + peer * rcount)), true, rcount, dtype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + } + + /* receive my block */ + res = NBC_Sched_recv(rbuf, true, rcount, dtype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /*NBC_PRINT_SCHED(*schedule);*/ + + res = NBC_Sched_commit(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Start(handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + /* tmpbuf is freed with the handle */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c index df79448a9d..f320273899 100644 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ b/ompi/mca/coll/libnbc/nbc_iscan.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,6 +7,8 @@ * rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -15,18 +18,19 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) ) { - return 0; + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; - } - return +1; + } + + return 1; } #endif @@ -43,96 +47,132 @@ int ompi_coll_libnbc_iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype int rank, p, res; MPI_Aint ext; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scan_args *args, *found, search; -#endif char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + res = MPI_Type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } - if((rank == 0) && !inplace) { + if ((rank == 0) && !inplace) { /* copy data to receivebuf */ - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Copy (sendbuf, count, datatype, recvbuf, count, datatype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + handle->tmpbuf = malloc (ext * count); + if (NULL == handle->tmpbuf) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; } #ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Scan_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], &search); - if(found == NULL) { -#endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + NBC_Scan_args *args, *found, search; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* search schedule in communicator specific tree */ + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], &search); + if (NULL == found) { +#endif + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* ensure the schedule is released with the handle */ + handle->schedule = schedule; if(rank != 0) { - res = NBC_Sched_recv(0, true, count, datatype, rank-1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_recv (0, true, count, datatype, rank-1, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - if(rank != p-1) { - res = NBC_Sched_send(recvbuf, false, count, datatype, rank+1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + res = NBC_Sched_op (recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule, + true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (rank != p-1) { + res = NBC_Sched_send (recvbuf, false, count, datatype, rank+1, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Scan_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], &handle->comminfo->NBC_Dict_size[NBC_SCAN]); + args = (NBC_Scan_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], + &libnbc_module->NBC_Dict_size[NBC_SCAN]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c index c72dc8684f..f8c3d7df4c 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ b/ompi/mca/coll/libnbc/nbc_iscatter.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -20,62 +21,61 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param) { - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; } - return +1; + + return 1; } #endif /* simple linear MPI_Iscatter */ -int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, - struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; +int ompi_coll_libnbc_iscatter (void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int rank, p, res; MPI_Aint sndext = 0; NBC_Schedule *schedule; char *sbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scatter_args *args, *found, search; -#endif NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + if (rank == root) { - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = MPI_Type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } } - handle->tmpbuf=NULL; - - if((rank == root) && (!inplace)) { - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); + if ((rank == root) && (!inplace)) { + sbuf = (char *) sendbuf + rank * sendcount * sndext; /* if I am the root - just copy the message (not for MPI_IN_PLACE) */ - res = NBC_Copy(sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Copy (sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE + NBC_Scatter_args *args, *found, search; + /* search schedule in communicator specific tree */ search.sendbuf=sendbuf; search.sendcount=sendcount; @@ -84,114 +84,156 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp search.recvcount=recvcount; search.recvtype=recvtype; search.root=root; - found = (NBC_Scatter_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], &search); - if(found == NULL) { + found = (NBC_Scatter_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ - if(rank != root) { + if (rank != root) { /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCATTER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], &handle->comminfo->NBC_Dict_size[NBC_SCATTER]); + args = (NBC_Scatter_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_SCATTER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], + &libnbc_module->NBC_Dict_size[NBC_SCATTER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -int ompi_coll_libnbc_iscatter_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, - struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; +int ompi_coll_libnbc_iscatter_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int res, rsize; MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); + if (MPI_ROOT == root) { res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } } - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - handle->tmpbuf = NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* recv msg from remote root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { + for (int i = 0 ; i < rsize ; ++i) { sbuf = ((char *)sendbuf) + (i * sendcount * sndext); /* root sends the right buffer to the right receiver */ - res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c index d5791b9aff..73d3e3b764 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ b/ompi/mca/coll/libnbc/nbc_iscatterv.c @@ -1,10 +1,11 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -26,117 +27,147 @@ int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_ void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - if (rank == root) { - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } /* receive from root */ - if(rank != root) { - /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } else { - for(i=0;itmpbuf = NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } /* receive from root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { - sbuf = ((char *)sendbuf) + (displs[i] * sndext); + res = MPI_Type_extent(sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in MPI_Type_extent() (%i)", res); + OBJ_RELEASE(schedule); + return res; + } + + for (int i = 0 ; i < rsize ; ++i) { + sbuf = (char *)sendbuf + displs[i] * sndext; /* root sends the right buffer to the right receiver */ - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c b/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c index edff3fcf86..924e852d58 100644 --- a/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c +++ b/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c @@ -1,106 +1,103 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * */ + #include "nbc_internal.h" +#include "ompi/mca/topo/base/base.h" -int NBC_Comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted) { - int topo, res; +int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree) { + if (OMPI_COMM_IS_CART(comm)) { + /* cartesian */ + /* outdegree is always 2*ndims because we need to iterate over empty buffers for MPI_PROC_NULL */ + *outdegree = *indegree = 2 * comm->c_topo->mtc.cart->ndims; + } else if (OMPI_COMM_IS_GRAPH(comm)) { + /* graph */ + int rank, nneighbors; - res = MPI_Topo_test(comm, &topo); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Topo_test() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + mca_topo_base_graph_neighbors_count (comm, rank, &nneighbors); - switch(topo) { - case MPI_CART: /* cartesian */ - { - int ndims; - res = MPI_Cartdim_get(comm, &ndims) ; - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Cartdim_get() (%i)\n", res); return res; } - /* outdegree is always 2*ndims because we need to iterate over empty buffers for MPI_PROC_NULL */ - *outdegree = *indegree = 2*ndims; - *weighted = 0; - } - break; - case MPI_GRAPH: /* graph */ - { - int rank, nneighbors; - MPI_Comm_rank(comm, &rank); - res = MPI_Graph_neighbors_count(comm, rank, &nneighbors); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Graph_neighbors_count() (%i)\n", res); return res; } - *outdegree = *indegree = nneighbors; - *weighted = 0; - } - break; - case MPI_DIST_GRAPH: /* graph */ - { - res = MPI_Dist_graph_neighbors_count(comm, indegree, outdegree, weighted); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Dist_graph_neighbors_count() (%i)\n", res); return res; } - } - break; - case MPI_UNDEFINED: - return NBC_INVALID_TOPOLOGY_COMM; - break; - default: - return NBC_INVALID_PARAM; - break; - } - return NBC_OK; -} - -int NBC_Comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]) { - int topo, res; - int index = 0; - - int indeg, outdeg, wgtd; - res = NBC_Comm_neighbors_count(comm, &indeg, &outdeg, &wgtd); - if(indeg > maxindegree && outdeg > maxoutdegree) return NBC_INVALID_PARAM; /* we want to return *all* neighbors */ - - res = MPI_Topo_test(comm, &topo); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Topo_test() (%i)\n", res); return res; } - - switch(topo) { - case MPI_CART: /* cartesian */ - { - int ndims, i, rpeer, speer; - res = MPI_Cartdim_get(comm, &ndims); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Cartdim_get() (%i)\n", res); return res; } - - for(i = 0; ic_topo->mtc.dist_graph->indegree; + *outdegree = comm->c_topo->mtc.dist_graph->outdegree; + } else { + return OMPI_ERR_BAD_PARAM; } - return NBC_OK; + return OMPI_SUCCESS; +} + +int NBC_Comm_neighbors (ompi_communicator_t *comm, int **sources, int *source_count, int **destinations, int *dest_count) { + int res, indeg, outdeg; + + *sources = *destinations = NULL; + + res = NBC_Comm_neighbors_count(comm, &indeg, &outdeg); + if (OMPI_SUCCESS != res) { + return res; + } + + *source_count = indeg; + *dest_count = outdeg; + + if (indeg) { + *sources = malloc (sizeof (int) * indeg); + if (OPAL_UNLIKELY(NULL == *sources)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else { + *sources = NULL; + } + + if (outdeg) { + *destinations = malloc (sizeof (int) * outdeg); + if (OPAL_UNLIKELY(NULL == *destinations)) { + free (*sources); + *sources = NULL; + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else { + *destinations = NULL; + } + + /* silence clang static analyzer warning about NULL-dereference */ + if (0 == indeg && 0 == outdeg) { + return OMPI_SUCCESS; + } + + if (OMPI_COMM_IS_CART(comm)) { + /* cartesian */ + int rpeer, speer; + + /* silence clang static analyzer warning */ + assert (indeg == outdeg); + + for (int dim = 0, i = 0 ; dim < comm->c_topo->mtc.cart->ndims ; ++dim) { + mca_topo_base_cart_shift (comm, dim, 1, &rpeer, &speer); + sources[0][i] = destinations[0][i] = rpeer; i++; + sources[0][i] = destinations[0][i] = speer; i++; + } + } else if (OMPI_COMM_IS_GRAPH(comm)) { + /* graph */ + mca_topo_base_graph_neighbors (comm, ompi_comm_rank (comm), indeg, sources[0]); + memcpy (destinations[0], sources[0], indeg * sizeof (int)); + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + /* dist graph */ + mca_topo_base_dist_graph_neighbors (comm, indeg, sources[0], MPI_UNWEIGHTED, outdeg, destinations[0], + MPI_UNWEIGHTED); + } + + return OMPI_SUCCESS; }