1
1
openmpi/ompi/mca/coll/cuda/coll_cuda_scan.c
George Bosilca 366d64b7e5 Move the collective structure outside the communicator.
As we changed the ABI (forcing a major release), we can limit
the size of the predefined communicators by moving the collective
structure outside the communicator. This might have a minimal,
but unnoticeable, impact on performance. This approach has been
discussed during the January 2017 devel meeting.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Signed-off-by: Joshua Hursey <jhursey@us.ibm.com>
2017-02-27 11:54:17 -06:00

74 строки
2.1 KiB
C

/*
* Copyright (c) 2014-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_cuda.h"
#include <stdio.h>
#include "ompi/op/op.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_cuda.h"
/*
* scan
*
* Function: - scan
* Accepts: - same arguments as MPI_Scan()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;
bufsize = opal_datatype_span(&dtype->super, count, &gap);
if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
sbuf1 = (char*)malloc(bufsize);
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}
if (opal_cuda_check_bufs(rbuf, NULL)) {
rbuf1 = (char*)malloc(bufsize);
if (NULL == rbuf1) {
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}
rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_scan_module);
if (NULL != sbuf1) {
free(sbuf1);
}
if (NULL != rbuf1) {
rbuf = rbuf2;
opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
}