diff --git a/opal/mca/btl/ugni/Makefile.am b/opal/mca/btl/ugni/Makefile.am index 7304f1baeb..cff4f734a4 100644 --- a/opal/mca/btl/ugni/Makefile.am +++ b/opal/mca/btl/ugni/Makefile.am @@ -39,7 +39,8 @@ ugni_SOURCES = \ btl_ugni_smsg.h \ btl_ugni_smsg.c \ btl_ugni_progress_thread.c \ - btl_ugni_prepare.h + btl_ugni_prepare.h \ + btl_ugni_atomic.c mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) diff --git a/opal/mca/btl/ugni/btl_ugni.h b/opal/mca/btl/ugni/btl_ugni.h index 0847993e66..3fbd467292 100644 --- a/opal/mca/btl/ugni/btl_ugni.h +++ b/opal/mca/btl/ugni/btl_ugni.h @@ -281,6 +281,22 @@ int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); +int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, + mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); + +int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, + uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata); + +int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, + int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); + int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint); mca_btl_base_descriptor_t * diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index 99e1652d48..bc5a184dc6 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -34,7 +34,6 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t **peers, opal_bitmap_t *reachable) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - opal_proc_t *my_proc = opal_proc_local_get(); size_t i; int rc; void *mmap_start_addr; @@ -67,11 +66,8 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { ugni_module->nlocal_procs++; - /* Do not use uGNI to communicate with local procs unless we are adding more ranks. - * Change this when sm and vader are updated to handle additional add procs. */ - if (!ugni_module->initialized || my_proc == ompi_proc) { - continue; - } + /* ugni is allowed on local processes to provide support for network + * atomic operations */ } /* Create and Init endpoints */ diff --git a/opal/mca/btl/ugni/btl_ugni_atomic.c b/opal/mca/btl/ugni/btl_ugni_atomic.c new file mode 100644 index 0000000000..981bc759ee --- /dev/null +++ b/opal/mca/btl/ugni/btl_ugni_atomic.c @@ -0,0 +1,135 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "btl_ugni_rdma.h" + +static gni_fma_cmd_type_t famo_cmds[] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR, +}; + +static gni_fma_cmd_type_t amo_cmds[] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR, +}; + +int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, + mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) +{ + gni_mem_handle_t dummy = {0, 0}; + mca_btl_ugni_post_descriptor_t *post_desc; + int rc; + + rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + return rc; + } + + mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc); + if (OPAL_UNLIKELY(NULL == post_desc)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address, + remote_handle->gni_handle, 8, 0); + post_desc->desc.base.amo_cmd = amo_cmds[op]; + + post_desc->desc.base.first_operand = operand; + + OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock); + rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base); + OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); + if (GNI_RC_SUCCESS != rc) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + return OPAL_SUCCESS; +} + +int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, + uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata) +{ + mca_btl_ugni_post_descriptor_t *post_desc; + int rc; + + rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + return rc; + } + + mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc); + if (OPAL_UNLIKELY(NULL == post_desc)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + + init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, + remote_address, remote_handle->gni_handle, 8, 0); + post_desc->desc.base.amo_cmd = famo_cmds[op]; + + post_desc->desc.base.first_operand = operand; + + OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock); + rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base); + OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); + if (GNI_RC_SUCCESS != rc) { + mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + return OPAL_SUCCESS; +} + +int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, + mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, + int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) +{ + mca_btl_ugni_post_descriptor_t *post_desc; + int rc; + + rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + return rc; + } + + mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc); + if (OPAL_UNLIKELY(NULL == post_desc)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + + init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, + remote_address, remote_handle->gni_handle, 8, 0); + post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP; + + post_desc->desc.base.first_operand = compare; + post_desc->desc.base.second_operand = value; + + OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock); + rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base); + OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); + if (GNI_RC_SUCCESS != rc) { + mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 9ecd8918b8..bc3110635c 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -229,7 +229,11 @@ btl_ugni_component_register(void) mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | - MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; + MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS | + MCA_BTL_FLAGS_ATOMIC_FOPS; + mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | + MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | + MCA_BTL_ATOMIC_SUPPORTS_CSWAP; mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index 5526d40139..df2a81bf84 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -10,8 +10,6 @@ * $HEADER$ */ -#include "btl_ugni.h" - #include "btl_ugni_endpoint.h" #include "btl_ugni_smsg.h" @@ -90,10 +88,8 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) { int rc; - /* get the modex info for this endpoint and setup a ugni endpoint */ - rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common); - if (OPAL_SUCCESS != rc) { - assert (0); + rc = mca_btl_ugni_ep_connect_rdma (ep); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } @@ -107,11 +103,6 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) { return rc; } - rc = opal_common_ugni_ep_create (ep->common, ep->btl->rdma_local_cq, &ep->rdma_ep_handle); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - return rc; - } - /* build connection data */ rc = mca_btl_ugni_ep_smsg_get_mbox (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.h b/opal/mca/btl/ugni/btl_ugni_endpoint.h index c83f81ea53..79908471f9 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.h +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -17,6 +17,7 @@ enum mca_btl_ugni_endpoint_state_t { MCA_BTL_UGNI_EP_STATE_INIT = 0, + MCA_BTL_UGNI_EP_STATE_RDMA, MCA_BTL_UGNI_EP_STATE_CONNECTING, MCA_BTL_UGNI_EP_STATE_CONNECTED }; @@ -114,6 +115,7 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep switch (ep->state) { case MCA_BTL_UGNI_EP_STATE_INIT: + case MCA_BTL_UGNI_EP_STATE_RDMA: rc = mca_btl_ugni_ep_connect_progress (ep); if (OPAL_SUCCESS != rc) { break; @@ -130,6 +132,43 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep return rc; } +static inline int mca_btl_ugni_ep_connect_rdma (mca_btl_base_endpoint_t *ep) { + int rc; + + if (ep->state >= MCA_BTL_UGNI_EP_STATE_RDMA) { + return OPAL_SUCCESS; + } + + /* get the modex info for this endpoint and setup a ugni endpoint */ + rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common); + if (OPAL_SUCCESS != rc) { + assert (0); + return rc; + } + + /* bind endpoint to remote address */ + rc = opal_common_ugni_ep_create (ep->common, ep->btl->rdma_local_cq, &ep->rdma_ep_handle); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + return rc; + } + + ep->state = MCA_BTL_UGNI_EP_STATE_RDMA; + + return OPAL_SUCCESS; +} + +static inline int mca_btl_ugni_check_endpoint_state_rdma (mca_btl_base_endpoint_t *ep) { + int rc; + if (OPAL_LIKELY(MCA_BTL_UGNI_EP_STATE_INIT < ep->state)) { + return OPAL_SUCCESS; + } + + opal_mutex_lock (&ep->lock); + rc = mca_btl_ugni_ep_connect_rdma (ep); + opal_mutex_unlock (&ep->lock); + return rc; +} + static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) { gni_return_t rc; diff --git a/opal/mca/btl/ugni/btl_ugni_get.c b/opal/mca/btl/ugni/btl_ugni_get.c index 2f324028d3..f244035a56 100644 --- a/opal/mca/btl/ugni/btl_ugni_get.c +++ b/opal/mca/btl/ugni/btl_ugni_get.c @@ -34,7 +34,7 @@ int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t local_address, remote_address)); /* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */ - (void) mca_btl_ugni_check_endpoint_state(endpoint); + (void) mca_btl_ugni_check_endpoint_state_rdma (endpoint); return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle, remote_handle, order, cbfunc, cbcontext, cbdata); diff --git a/opal/mca/btl/ugni/btl_ugni_module.c b/opal/mca/btl/ugni/btl_ugni_module.c index 42b5096d15..9161d653f0 100644 --- a/opal/mca/btl/ugni/btl_ugni_module.c +++ b/opal/mca/btl/ugni/btl_ugni_module.c @@ -55,6 +55,9 @@ mca_btl_ugni_module_t mca_btl_ugni_module = { .btl_get = mca_btl_ugni_get, .btl_register_mem = mca_btl_ugni_register_mem, .btl_deregister_mem = mca_btl_ugni_deregister_mem, + .btl_atomic_op = mca_btl_ugni_aop, + .btl_atomic_fop = mca_btl_ugni_afop, + .btl_atomic_cswap = mca_btl_ugni_acswap, } }; diff --git a/opal/mca/btl/ugni/btl_ugni_put.c b/opal/mca/btl/ugni/btl_ugni_put.c index e076607a31..2729314e37 100644 --- a/opal/mca/btl/ugni/btl_ugni_put.c +++ b/opal/mca/btl/ugni/btl_ugni_put.c @@ -23,7 +23,7 @@ int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t local_address, remote_address)); /* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */ - (void) mca_btl_ugni_check_endpoint_state(endpoint); + (void) mca_btl_ugni_check_endpoint_state_rdma (endpoint); return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle, remote_handle, order, cbfunc, cbcontext, cbdata);