1
1
openmpi/oshmem/mca/scoll/fca/scoll_fca_ops.c

289 строки
9.5 KiB
C

/*
* Copyright (c) 2013 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "oshmem/constants.h"
#include "scoll_fca.h"
#include <stdio.h>
#include "oshmem/proc/proc.h"
#include "oshmem/op/op.h"
int mca_scoll_fca_barrier(struct oshmem_group_t *group, long *pSync, int alg)
{
mca_scoll_fca_module_t *fca_module =
(mca_scoll_fca_module_t *) group->g_scoll.scoll_barrier_module;
int ret;
FCA_VERBOSE(5, "Using FCA Barrier");
ret = fca_do_barrier(fca_module->fca_comm);
if (ret < 0) {
if (ret == -EUSESHMEM) {
FCA_VERBOSE(5, "FCA Barrier failed, using original barrier");
goto orig_barrier;
}
FCA_ERROR("Barrier failed: %s", fca_strerror(ret));
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
orig_barrier: return fca_module->previous_barrier(group,
pSync,
SCOLL_DEFAULT_ALG);
}
int mca_scoll_fca_broadcast(struct oshmem_group_t *group,
int PE_root,
void *target,
const void *source,
size_t nlong,
long *pSync,
int alg)
{
mca_scoll_fca_module_t *fca_module =
(mca_scoll_fca_module_t *) group->g_scoll.scoll_broadcast_module;
fca_bcast_spec_t spec;
int ret;
FCA_VERBOSE(5, "rank %i, DOING FCA BCAST\n", group->my_pe);
spec.root = oshmem_proc_group_find_id(group, PE_root);
if (group->my_pe == PE_root)
spec.buf = (void *) source;
else
spec.buf = target;
spec.size = nlong;
if (spec.size > fca_module->fca_comm_caps.max_payload) {
FCA_VERBOSE(5,
"Unsupported bcast operation size %d, using fallback",
spec.size);
goto orig_bcast;
}
ret = fca_do_bcast(fca_module->fca_comm, &spec);
if (ret < 0) {
if (ret == -EUSESHMEM) {
FCA_VERBOSE(5, "FCA Broadcast failed, using original Broadcast");
goto orig_bcast;
}
FCA_ERROR("Bcast failed: %s", fca_strerror(ret));
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
orig_bcast: return fca_module->previous_broadcast(group,
PE_root,
target,
source,
nlong,
pSync,
SCOLL_DEFAULT_ALG);
}
int mca_scoll_fca_collect(struct oshmem_group_t *group,
void *target,
const void *source,
size_t nlong,
long *pSync,
bool nlong_type,
int alg)
{
mca_scoll_fca_module_t *fca_module =
(mca_scoll_fca_module_t *) group->g_scoll.scoll_collect_module;
FCA_VERBOSE(5,
"rank %i, DOING FCA_COLLECT, nlong_type = %i\n",
group->my_pe, (int)nlong_type);
#if OSHMEM_FCA_ALLGATHER
if (nlong_type == true) {
fca_gather_spec_t spec = {0,};
int ret;
spec.size = (int)nlong;
spec.sbuf = (void *)source;
spec.rbuf = target;
ret = fca_do_allgather(fca_module->fca_comm, &spec);
if (ret < 0) {
if (ret == -EUSESHMEM) {
FCA_VERBOSE(5,"FCA Fcollect(allgather) failed, using original Fcollect");
goto orig_collect;
}
FCA_ERROR("Fcollect(allgather) failed: %s", fca_strerror(ret));
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
}
else
{
int i, ret;
size_t *sendcounts = (size_t *)malloc(group->proc_count*sizeof(size_t));
mca_scoll_fca_collect(group,sendcounts,(void *)&nlong,sizeof(size_t),pSync,true,SCOLL_DEFAULT_ALG);
fca_gatherv_spec_t spec;
spec.sendsize = (int)nlong;
spec.sbuf = (void *)source;
spec.rbuf = target;
spec.recvsizes = alloca(sizeof(*spec.recvsizes) * group->proc_count);
spec.displs = alloca(sizeof(*spec.displs) * group->proc_count);
for (i=0; i<group->proc_count; i++) {
spec.recvsizes[i] = (int)sendcounts[i];
}
spec.displs[0] = 0;
for (i=1; i<group->proc_count; i++) {
spec.displs[i] = spec.displs[i-1]+spec.recvsizes[i-1];
}
ret = fca_do_allgatherv(fca_module->fca_comm, &spec);
if (ret < 0) {
if (ret == -EUSESHMEM) {
FCA_VERBOSE(5,"FCA Collect(allgatherv) failed, using original Collect");
goto orig_collect;
}
FCA_ERROR("Collect(allgatherv) failed: %s", fca_strerror(ret));
return OSHMEM_ERROR;
}
free(sendcounts);
return OSHMEM_SUCCESS;
}
orig_collect:
#endif
return fca_module->previous_collect(group,
target,
source,
nlong,
pSync,
nlong_type,
SCOLL_DEFAULT_ALG);
}
#define FCA_DTYPE_8_SIGNED 1
#define FCA_DTYPE_16_SIGNED 2
#define FCA_DTYPE_32_SIGNED 3
#define FCA_DTYPE_64_SIGNED 4
#define FCA_DTYPE_32_FLOAT 9
#define FCA_DTYPE_64_FLOAT 10
#define UNSUPPORTED_OP -1
static bool if_floating_type(oshmem_op_t *op)
{
if ((op->dt == OSHMEM_OP_TYPE_FLOAT) || (op->dt == OSHMEM_OP_TYPE_DOUBLE)
|| (op->dt == OSHMEM_OP_TYPE_LDOUBLE))
return true;
else
return false;
}
static int shmem_dtype_to_fca_dtype(oshmem_op_t *op)
{
if ((op->dt == OSHMEM_OP_TYPE_FCOMPLEX)
|| (op->dt == OSHMEM_OP_TYPE_DCOMPLEX)) {
return UNSUPPORTED_OP;
}
switch (op->dt_size * 8) {
case 64:
if (if_floating_type(op))
return FCA_DTYPE_64_FLOAT;
else
return FCA_DTYPE_64_SIGNED;
break;
case 32:
if (if_floating_type(op))
return FCA_DTYPE_32_FLOAT;
else
return FCA_DTYPE_32_SIGNED;
break;
case 16:
if (OPAL_UNLIKELY(if_floating_type(op)))
return UNSUPPORTED_OP;
else
return FCA_DTYPE_16_SIGNED;
break;
case 8:
if (OPAL_UNLIKELY(if_floating_type(op)))
return UNSUPPORTED_OP;
else
return FCA_DTYPE_8_SIGNED;
break;
default:
return UNSUPPORTED_OP;
}
}
static int shmem_op_to_fca_op(oshmem_op_t *op)
{
switch (op->op) {
case OSHMEM_OP_AND:
return FCA_OP_BAND;
break;
case OSHMEM_OP_OR:
return FCA_OP_BOR;
break;
case OSHMEM_OP_XOR:
return FCA_OP_BXOR;
case OSHMEM_OP_MAX:
return FCA_OP_MAX;
break;
case OSHMEM_OP_MIN:
return FCA_OP_MIN;
break;
case OSHMEM_OP_SUM:
return FCA_OP_SUM;
break;
case OSHMEM_OP_PROD:
return FCA_OP_PROD;
break;
default:
return UNSUPPORTED_OP;
}
}
int mca_scoll_fca_reduce(struct oshmem_group_t *group,
struct oshmem_op_t *op,
void *target,
const void *source,
size_t nlong,
long *pSync,
void *pWrk,
int alg)
{
mca_scoll_fca_module_t *fca_module =
(mca_scoll_fca_module_t *) group->g_scoll.scoll_reduce_module;
int fca_dtype;
int fca_op;
int ret;
fca_reduce_spec_t spec;
FCA_VERBOSE(5, "rank %i, DOING FCA_REDUCE\n", group->my_pe);
if ((fca_dtype = shmem_dtype_to_fca_dtype(op)) < 0) {
FCA_VERBOSE(5,
"SHMEM_DATA_TYPE = %i is unsupported in the current version of FCA library; using original reduce",
op->dt);
goto orig_reduce;
}
if ((fca_op = shmem_op_to_fca_op(op)) < 0) {
FCA_VERBOSE(5,
"SHMEM_OPERATION_TYPE = %i is unsupported; using original reduce",
op->op);
goto orig_reduce;
}
spec.sbuf = (void *) source;
spec.rbuf = target;
spec.dtype = (enum fca_reduce_dtype_t) fca_dtype;
spec.op = (enum fca_reduce_op_t) fca_op;
spec.length = (int) (nlong / op->dt_size);
ret = fca_do_all_reduce(fca_module->fca_comm, &spec);
if (ret < 0) {
if (ret == -EUSESHMEM) {
FCA_VERBOSE(5,
"FCA Reduce(allreduce) failed, using original Reduce");
goto orig_reduce;
}
FCA_ERROR("Reduce (allreduce) failed: %s", fca_strerror(ret));
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
orig_reduce: return fca_module->previous_reduce(group,
op,
target,
source,
nlong,
pSync,
pWrk,
SCOLL_DEFAULT_ALG);
}