btl/ugni: add support for additional atomic operations
This commit adds support for Cray Aries atomic operations. This includes 32-bit and floating point support. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
23fe19a956
Коммит
c19426ac1b
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -11,18 +11,66 @@
|
||||
|
||||
#include "btl_ugni_rdma.h"
|
||||
|
||||
static gni_fma_cmd_type_t famo_cmds[] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
|
||||
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
|
||||
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
|
||||
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
|
||||
};
|
||||
|
||||
static gni_fma_cmd_type_t amo_cmds[] = {
|
||||
static gni_fma_cmd_type_t amo_cmds[][MCA_BTL_ATOMIC_LAST] = {
|
||||
[OPAL_INT32] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_IADD_S,
|
||||
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_AND_S,
|
||||
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_OR_S,
|
||||
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_XOR_S,
|
||||
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP_S,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN_S,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX_S,
|
||||
},
|
||||
[OPAL_INT64] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD,
|
||||
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND,
|
||||
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR,
|
||||
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR,
|
||||
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX,
|
||||
},
|
||||
[OPAL_FLOAT] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD_S,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN_S,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX_S,
|
||||
},
|
||||
[OPAL_DOUBLE] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX,
|
||||
},
|
||||
};
|
||||
|
||||
static gni_fma_cmd_type_t famo_cmds[][MCA_BTL_ATOMIC_LAST] = {
|
||||
[OPAL_INT32] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FIADD_S,
|
||||
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_FAND_S,
|
||||
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_FOR_S,
|
||||
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_FXOR_S,
|
||||
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP_S,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN_S,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX_S,
|
||||
},
|
||||
[OPAL_INT64] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
|
||||
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
|
||||
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
|
||||
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
|
||||
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX,
|
||||
},
|
||||
[OPAL_FLOAT] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD_S,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN_S,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX_S,
|
||||
},
|
||||
[OPAL_DOUBLE] = {
|
||||
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD,
|
||||
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN,
|
||||
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX,
|
||||
},
|
||||
};
|
||||
|
||||
int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -32,7 +80,20 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
{
|
||||
gni_mem_handle_t dummy = {0, 0};
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
int rc;
|
||||
int gni_op, rc, type;
|
||||
size_t size;
|
||||
|
||||
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
|
||||
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
|
||||
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
|
||||
} else {
|
||||
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
|
||||
}
|
||||
|
||||
gni_op = amo_cmds[type][op];
|
||||
if (0 == gni_op) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
@ -45,8 +106,8 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
}
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
|
||||
remote_handle->gni_handle, 8, 0);
|
||||
post_desc->desc.base.amo_cmd = amo_cmds[op];
|
||||
remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = operand;
|
||||
|
||||
@ -54,6 +115,10 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
if (GNI_RC_ILLEGAL_OP == rc) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -67,7 +132,20 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
int rc;
|
||||
int gni_op, rc, type;
|
||||
size_t size;
|
||||
|
||||
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
|
||||
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
|
||||
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
|
||||
} else {
|
||||
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
|
||||
}
|
||||
|
||||
gni_op = famo_cmds[type][op];
|
||||
if (0 == gni_op) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
@ -81,8 +159,8 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
|
||||
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, 8, 0);
|
||||
post_desc->desc.base.amo_cmd = famo_cmds[op];
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = operand;
|
||||
|
||||
@ -91,6 +169,9 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
if (GNI_RC_ILLEGAL_OP == rc) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -103,7 +184,11 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
int rc;
|
||||
int gni_op, rc;
|
||||
size_t size;
|
||||
|
||||
gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
|
||||
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
@ -117,8 +202,8 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
|
||||
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, 8, 0);
|
||||
post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP;
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = compare;
|
||||
post_desc->desc.base.second_operand = value;
|
||||
|
@ -291,6 +291,13 @@ btl_ugni_component_register(void)
|
||||
MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
|
||||
MCA_BTL_ATOMIC_SUPPORTS_CSWAP;
|
||||
|
||||
if (GNI_DEVICE_ARIES == device_type) {
|
||||
/* aries supports additional atomic operations */
|
||||
mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX |
|
||||
MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR |
|
||||
MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT;
|
||||
}
|
||||
|
||||
mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
|
||||
|
||||
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user