ff384daab4
This commit was SVN r28048.
241 строка
7.9 KiB
C
241 строка
7.9 KiB
C
/*
|
|
* Copyright (c) 2012 Mellanox Technologies, Inc.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
#include "oshmem_config.h"
|
|
#include "oshmem/constants.h"
|
|
#include "scoll_fca.h"
|
|
#include <stdio.h>
|
|
#include "oshmem/proc/proc.h"
|
|
#include "oshmem/op/op.h"
|
|
int mca_scoll_fca_barrier(struct oshmem_group_t *group, long *pSync, int alg)
|
|
{
|
|
mca_scoll_fca_module_t *fca_module = ( mca_scoll_fca_module_t *)group->g_scoll.scoll_barrier_module;
|
|
int ret;
|
|
|
|
FCA_VERBOSE(5,"Using FCA Barrier");
|
|
ret = fca_do_barrier(fca_module->fca_comm);
|
|
if (ret < 0) {
|
|
if (ret == -EUSESHMEM) {
|
|
FCA_VERBOSE(5,"FCA Barrier failed, using original barrier");
|
|
goto orig_barrier;
|
|
}
|
|
FCA_ERROR("Barrier failed: %s", fca_strerror(ret));
|
|
return OSHMEM_ERROR;
|
|
}
|
|
return OSHMEM_SUCCESS;
|
|
orig_barrier:
|
|
return fca_module->previous_barrier(group, pSync, SCOLL_DEFAULT_ALG);
|
|
}
|
|
|
|
int mca_scoll_fca_broadcast(struct oshmem_group_t *group, int PE_root, void *target, const void *source, size_t nlong, long *pSync, int alg)
|
|
{
|
|
mca_scoll_fca_module_t *fca_module = ( mca_scoll_fca_module_t *)group->g_scoll.scoll_broadcast_module;
|
|
fca_bcast_spec_t spec;
|
|
int ret;
|
|
|
|
FCA_VERBOSE(5,"rank %i, DOING FCA BCAST\n", group->my_pe);
|
|
spec.root = oshmem_proc_group_find_id(group,PE_root);
|
|
if (group->my_pe == PE_root)
|
|
spec.buf = (void *)source;
|
|
else
|
|
spec.buf = target;
|
|
spec.size = nlong;
|
|
if (spec.size > fca_module->fca_comm_caps.max_payload) {
|
|
FCA_VERBOSE(5, "Unsupported bcast operation size %d, using fallback",
|
|
spec.size);
|
|
goto orig_bcast;
|
|
}
|
|
ret = fca_do_bcast(fca_module->fca_comm, &spec);
|
|
if (ret < 0) {
|
|
if (ret == -EUSESHMEM) {
|
|
FCA_VERBOSE(5,"FCA Broadcast failed, using original Broadcast");
|
|
goto orig_bcast;
|
|
}
|
|
FCA_ERROR("Bcast failed: %s", fca_strerror(ret));
|
|
return OSHMEM_ERROR;
|
|
}
|
|
return OSHMEM_SUCCESS;
|
|
orig_bcast:
|
|
return fca_module->previous_broadcast(group, PE_root, target, source, nlong, pSync, SCOLL_DEFAULT_ALG);
|
|
}
|
|
|
|
int mca_scoll_fca_collect(struct oshmem_group_t *group, void *target, const void *source, size_t nlong, long *pSync, bool nlong_type, int alg)
|
|
{
|
|
mca_scoll_fca_module_t *fca_module = ( mca_scoll_fca_module_t *)group->g_scoll.scoll_collect_module;
|
|
|
|
FCA_VERBOSE(5,"rank %i, DOING FCA_COLLECT, nlong_type = %i\n",group->my_pe,(int)nlong_type);
|
|
#if OSHMEM_FCA_ALLGATHER
|
|
if (nlong_type == true){
|
|
fca_gather_spec_t spec = {0,};
|
|
int ret;
|
|
spec.size = (int)nlong;
|
|
spec.sbuf = (void *)source;
|
|
spec.rbuf = target;
|
|
ret = fca_do_allgather(fca_module->fca_comm, &spec);
|
|
if (ret < 0) {
|
|
if (ret == -EUSESHMEM) {
|
|
FCA_VERBOSE(5,"FCA Fcollect(allgather) failed, using original Fcollect");
|
|
goto orig_collect;
|
|
}
|
|
FCA_ERROR("Fcollect(allgather) failed: %s", fca_strerror(ret));
|
|
return OSHMEM_ERROR;
|
|
}
|
|
return OSHMEM_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
int i, ret;
|
|
size_t *sendcounts = (size_t *)malloc(group->proc_count*sizeof(size_t));
|
|
mca_scoll_fca_collect(group,sendcounts,(void *)&nlong,sizeof(size_t),pSync,true,SCOLL_DEFAULT_ALG);
|
|
fca_gatherv_spec_t spec;
|
|
spec.sendsize = (int)nlong;
|
|
spec.sbuf = (void *)source;
|
|
spec.rbuf = target;
|
|
spec.recvsizes = alloca(sizeof(*spec.recvsizes) * group->proc_count);
|
|
spec.displs = alloca(sizeof(*spec.displs) * group->proc_count);
|
|
for (i=0; i<group->proc_count; i++){
|
|
spec.recvsizes[i] = (int)sendcounts[i];
|
|
}
|
|
spec.displs[0] = 0;
|
|
for (i=1; i<group->proc_count; i++){
|
|
spec.displs[i] = spec.displs[i-1]+spec.recvsizes[i-1];
|
|
}
|
|
ret = fca_do_allgatherv(fca_module->fca_comm, &spec);
|
|
if (ret < 0){
|
|
if (ret == -EUSESHMEM) {
|
|
FCA_VERBOSE(5,"FCA Collect(allgatherv) failed, using original Collect");
|
|
goto orig_collect;
|
|
}
|
|
FCA_ERROR("Collect(allgatherv) failed: %s", fca_strerror(ret));
|
|
return OSHMEM_ERROR;
|
|
}
|
|
free(sendcounts);
|
|
return OSHMEM_SUCCESS;
|
|
}
|
|
orig_collect:
|
|
#endif
|
|
return fca_module->previous_collect(group, target, source, nlong, pSync, nlong_type, SCOLL_DEFAULT_ALG);
|
|
}
|
|
|
|
#define FCA_DTYPE_8_SIGNED 1
|
|
#define FCA_DTYPE_16_SIGNED 2
|
|
#define FCA_DTYPE_32_SIGNED 3
|
|
#define FCA_DTYPE_64_SIGNED 4
|
|
#define FCA_DTYPE_32_FLOAT 9
|
|
#define FCA_DTYPE_64_FLOAT 10
|
|
#define UNSUPPORTED_OP -1
|
|
|
|
static bool if_floating_type(oshmem_op_t *op)
|
|
{
|
|
if ((op->dt == OSHMEM_OP_TYPE_FLOAT) ||
|
|
(op->dt == OSHMEM_OP_TYPE_DOUBLE) ||
|
|
(op->dt == OSHMEM_OP_TYPE_LDOUBLE))
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
static int shmem_dtype_to_fca_dtype(oshmem_op_t *op)
|
|
{
|
|
if ((op->dt == OSHMEM_OP_TYPE_FCOMPLEX) ||
|
|
(op->dt == OSHMEM_OP_TYPE_DCOMPLEX)){
|
|
return UNSUPPORTED_OP;
|
|
}
|
|
switch(op->dt_size*8){
|
|
case 64:
|
|
if (if_floating_type(op))
|
|
return FCA_DTYPE_64_FLOAT;
|
|
else
|
|
return FCA_DTYPE_64_SIGNED;
|
|
break;
|
|
case 32:
|
|
if (if_floating_type(op))
|
|
return FCA_DTYPE_32_FLOAT;
|
|
else
|
|
return FCA_DTYPE_32_SIGNED;
|
|
break;
|
|
case 16:
|
|
if (OPAL_UNLIKELY(if_floating_type(op)))
|
|
return UNSUPPORTED_OP;
|
|
else
|
|
return FCA_DTYPE_16_SIGNED;
|
|
break;
|
|
case 8:
|
|
if (OPAL_UNLIKELY(if_floating_type(op)))
|
|
return UNSUPPORTED_OP;
|
|
else
|
|
return FCA_DTYPE_8_SIGNED;
|
|
break;
|
|
default:
|
|
return UNSUPPORTED_OP;
|
|
}
|
|
}
|
|
|
|
static int shmem_op_to_fca_op(oshmem_op_t *op)
|
|
{
|
|
switch(op->op){
|
|
case OSHMEM_OP_AND:
|
|
return FCA_OP_BAND;
|
|
break;
|
|
case OSHMEM_OP_OR:
|
|
return FCA_OP_BOR;
|
|
break;
|
|
case OSHMEM_OP_XOR:
|
|
return FCA_OP_BXOR;
|
|
case OSHMEM_OP_MAX:
|
|
return FCA_OP_MAX;
|
|
break;
|
|
case OSHMEM_OP_MIN:
|
|
return FCA_OP_MIN;
|
|
break;
|
|
case OSHMEM_OP_SUM:
|
|
return FCA_OP_SUM;
|
|
break;
|
|
case OSHMEM_OP_PROD:
|
|
return FCA_OP_PROD;
|
|
break;
|
|
default:
|
|
return UNSUPPORTED_OP;
|
|
}
|
|
}
|
|
int mca_scoll_fca_reduce(struct oshmem_group_t *group, struct oshmem_op_t *op, void *target, const void *source, size_t nlong, long *pSync, void *pWrk, int alg)
|
|
{
|
|
mca_scoll_fca_module_t *fca_module = ( mca_scoll_fca_module_t *)group->g_scoll.scoll_reduce_module;
|
|
int fca_dtype;
|
|
int fca_op;
|
|
int ret;
|
|
fca_reduce_spec_t spec;
|
|
|
|
FCA_VERBOSE(5,"rank %i, DOING FCA_REDUCE\n",group->my_pe);
|
|
if ( (fca_dtype = shmem_dtype_to_fca_dtype(op)) < 0){
|
|
FCA_VERBOSE(5,"SHMEM_DATA_TYPE = %i is unsupported in the current version of FCA library; using original reduce",op->dt);
|
|
goto orig_reduce;
|
|
}
|
|
if ( (fca_op = shmem_op_to_fca_op(op)) < 0){
|
|
FCA_VERBOSE(5,"SHMEM_OPERATION_TYPE = %i is unsupported; using original reduce",op->op);
|
|
goto orig_reduce;
|
|
}
|
|
spec.sbuf = (void *)source;
|
|
spec.rbuf = target;
|
|
spec.dtype = (enum fca_reduce_dtype_t)fca_dtype;
|
|
spec.op = (enum fca_reduce_op_t)fca_op;
|
|
spec.length = (int)(nlong/op->dt_size);
|
|
ret = fca_do_all_reduce(fca_module->fca_comm, &spec);
|
|
if (ret < 0) {
|
|
if (ret == -EUSESHMEM) {
|
|
FCA_VERBOSE(5,"FCA Reduce(allreduce) failed, using original Reduce");
|
|
goto orig_reduce;
|
|
}
|
|
FCA_ERROR("Reduce (allreduce) failed: %s", fca_strerror(ret));
|
|
return OSHMEM_ERROR;
|
|
}
|
|
return OSHMEM_SUCCESS;
|
|
orig_reduce:
|
|
return fca_module->previous_reduce(group, op, target, source, nlong, pSync, pWrk, SCOLL_DEFAULT_ALG);
|
|
}
|