1
1

initial thread safety for ugni btl

This commit adds initial ugni thread safety support.
With this commit, sun thread tests (excepting MPI-2 RMA)
pass with various process counts and threads/process.
Also osu_latency_mt passes.
Этот коммит содержится в:
Howard Pritchard 2014-10-08 10:10:19 -06:00
родитель 81917412a8
Коммит 9947758d98
16 изменённых файлов: 201 добавлений и 61 удалений

Просмотреть файл

@ -50,6 +50,7 @@ typedef struct mca_btl_ugni_endpoint_attr_t {
uint64_t proc_id;
uint32_t index;
gni_smsg_attr_t smsg_attr;
gni_mem_handle_t rmt_irq_mem_hndl;
} mca_btl_ugni_endpoint_attr_t;
enum {
@ -68,17 +69,23 @@ typedef struct mca_btl_ugni_module_t {
opal_pointer_array_t endpoints;
opal_hash_table_t id_to_endpoint;
/* lock for this list */
opal_mutex_t failed_frags_lock;
opal_list_t failed_frags;
mca_mpool_base_module_t *smsg_mpool;
ompi_free_list_t smsg_mboxes;
gni_ep_handle_t wildcard_ep;
gni_ep_handle_t local_ep;
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
gni_cq_handle_t rdma_local_cq;
gni_cq_handle_t smsg_remote_cq;
gni_cq_handle_t smsg_local_cq;
gni_cq_handle_t smsg_remote_irq_cq;
gni_cq_handle_t rdma_local_irq_cq;
/* eager fragment list (registered) */
ompi_free_list_t eager_frags_send;
@ -91,6 +98,9 @@ typedef struct mca_btl_ugni_module_t {
ompi_free_list_t rdma_frags;
ompi_free_list_t rdma_int_frags;
/* lock for this list */
opal_mutex_t ep_wait_list_lock;
/* endpoints waiting on credits */
opal_list_t ep_wait_list;
@ -98,13 +108,13 @@ typedef struct mca_btl_ugni_module_t {
opal_pointer_array_t pending_smsg_frags_bb;
uint32_t reg_max;
uint32_t reg_count;
volatile int reg_count;
/* used to calculate the fraction of registered memory resources
* this rank should be limited too */
int nlocal_procs;
int active_send_count;
volatile int active_send_count;
} mca_btl_ugni_module_t;
typedef struct mca_btl_ugni_component_t {
@ -294,4 +304,5 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
return ((uint64_t) (opal_process_name_jobid(name) & 0x7fffffff) << 32 | opal_process_name_vpid(name));
}
#endif

Просмотреть файл

@ -92,22 +92,28 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
if (false == ugni_module->initialized) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
@ -179,14 +185,17 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING,
-1, &(ugni_reg->memory_hdl));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
ugni_module->reg_count++;
opal_atomic_add_32(&ugni_module->reg_count,1);
return OPAL_SUCCESS;
}
@ -199,9 +208,11 @@ static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_return_t rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1,
&(ugni_reg->memory_hdl));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
}
@ -212,12 +223,14 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
gni_return_t rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->memory_hdl);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
return OPAL_ERROR;
}
ugni_module->reg_count--;
opal_atomic_add_32(&ugni_module->reg_count,-1);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -55,7 +55,7 @@ btl_ugni_component_register(void)
int rc;
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
"Gemini byte transport layer");
"uGNI byte transport layer");
mca_btl_ugni_component.ugni_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
@ -63,7 +63,7 @@ btl_ugni_component_register(void)
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_free_list_num);
mca_btl_ugni_component.ugni_free_list_max = 16384;
mca_btl_ugni_component.ugni_free_list_max = 4096;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
@ -285,13 +285,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
unsigned int i;
int rc;
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
if (opal_using_threads() && !mca_btl_base_thread_multiple_override) {
opal_output_verbose(5, opal_btl_base_framework.framework_output,
"btl:ugni: MPI_THREAD_MULTIPLE not supported; skipping this component");
return NULL;
}
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
mca_btl_ugni_component.ugni_smsg_limit = 16384;
}
@ -365,7 +358,9 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
int count = 0, rc;
/* check for datagram completion */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
return 0;
}
@ -382,8 +377,10 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
}
/* wait for the incoming datagram to complete (in case it isn't) */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return opal_common_rc_ugni_to_opal (grc);
@ -429,28 +426,34 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
}
static inline int
mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
{
opal_common_ugni_post_desc_t *desc;
mca_btl_ugni_base_frag_t *frag;
gni_cq_entry_t event_data = 0;
uint32_t recoverable = 1;
gni_return_t rc;
gni_cq_handle_t the_cq;
rc = GNI_CqGetEvent (ugni_module->rdma_local_cq, &event_data);
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqGetEvent (the_cq, &event_data);
if (GNI_RC_NOT_DONE == rc) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d", rc));
assert (0);
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc,gni_err_str[rc]));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
}
rc = GNI_GetCompleted (ugni_module->rdma_local_cq, event_data, (gni_post_descriptor_t **) &desc);
rc = GNI_GetCompleted (the_cq, event_data, (gni_post_descriptor_t **) &desc);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
return opal_common_rc_ugni_to_opal (rc);
@ -459,13 +462,18 @@ mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module)
frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data))) {
char buffer[1024];
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
GNI_CqErrorStr(event_data,buffer,sizeof(buffer));
if (OPAL_UNLIKELY(++frag->post_desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
/* give up */
BTL_ERROR(("giving up on frag %p", (void *) frag));
frag->cbfunc (frag, OPAL_ERROR);
BTL_ERROR(("giving up on frag %p type %d CQE error %s", (void *) frag, frag->post_desc.base.type, buffer));
if (frag->cbfunc) {
frag->cbfunc (frag, OPAL_ERROR);
}
return OPAL_ERROR;
}
@ -478,7 +486,9 @@ mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module)
BTL_VERBOSE(("RDMA/FMA complete for frag %p", (void *) frag));
frag->cbfunc (frag, opal_common_rc_ugni_to_opal (rc));
if (frag->cbfunc) {
frag->cbfunc (frag, opal_common_rc_ugni_to_opal (rc));
}
return 1;
}
@ -490,11 +500,15 @@ mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *ugni_module)
int i;
for (i = 0 ; i < count ; ++i) {
OPAL_THREAD_LOCK(&ugni_module->failed_frags_lock);
mca_btl_ugni_base_frag_t *frag =
(mca_btl_ugni_base_frag_t *) opal_list_remove_first (&ugni_module->failed_frags);
OPAL_THREAD_UNLOCK(&ugni_module->failed_frags_lock);
assert (NULL != frag);
frag->cbfunc (frag, OPAL_SUCCESS);
if (frag->cbfunc) {
frag->cbfunc (frag, OPAL_SUCCESS);
}
}
return count;
@ -503,24 +517,39 @@ mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *ugni_module)
static inline int
mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
{
int count = opal_list_get_size (&ugni_module->ep_wait_list);
int rc, i;
int rc = OPAL_SUCCESS;
mca_btl_base_endpoint_t *endpoint = NULL;
int count;
for (i = 0 ; i < count ; ++i) {
mca_btl_base_endpoint_t *endpoint =
(mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
assert (NULL != endpoint);
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
count = opal_list_get_size(&ugni_module->ep_wait_list);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
endpoint->wait_listed = false;
do {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
if (endpoint != NULL) {
rc = mca_btl_ugni_progress_send_wait_list (endpoint);
if (OPAL_SUCCESS != rc && false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
endpoint->wait_listed = false;
rc = mca_btl_ugni_progress_send_wait_list (endpoint);
if (OPAL_SUCCESS != rc && false == endpoint->wait_listed) {
endpoint->wait_listed = true;
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
}
}
return count;
--count;
if (count == 0) break;
} while (endpoint != NULL) ;
return rc;
}
static int mca_btl_ugni_component_progress (void)
@ -538,7 +567,8 @@ static int mca_btl_ugni_component_progress (void)
count += mca_btl_ugni_progress_datagram (ugni_module);
count += mca_btl_ugni_progress_local_smsg (ugni_module);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
count += mca_btl_ugni_progress_rdma (ugni_module);
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
}
return count;

Просмотреть файл

@ -58,17 +58,24 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec
}
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
MCA_BTL_UGNI_TAG_DISCONNECT);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni could not send close message"));
}
/* we might want to wait for local completion here (do we even care) */
/* we might want to wait for local completion here (do we even care), yes we do */
/* TODO: FIX FIX FIX */
}
/* TODO: FIX GROSS */
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
(void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
(void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
OMPI_FREE_LIST_RETURN_MT(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox));
ep->mailbox = NULL;
@ -89,7 +96,7 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
}
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p",
ep->common->ep_rem_addr, ep->common->ep_rem_id, ep->peer_proc));
ep->common->ep_rem_addr, ep->common->ep_rem_id, (void *)ep->peer_proc));
/* bind endpoint to remote address */
/* we bind two endpoints to seperate out local smsg completion and local fma completion */
@ -150,6 +157,7 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);
ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
/* send all pending messages */
@ -158,7 +166,9 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
rc = mca_btl_ugni_progress_send_wait_list (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
ep->wait_listed = true;
OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
opal_list_append (&ep->btl->ep_wait_list, &ep->super);
OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
}
return OPAL_SUCCESS;
@ -167,7 +177,8 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
gni_return_t rc;
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, ep));
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, (void *)ep));
ep->mailbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
&ep->remote_attr, sizeof (ep->remote_attr),
@ -179,7 +190,7 @@ static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
int rc;
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", ep, ep->state));
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
return OPAL_SUCCESS;

Просмотреть файл

@ -39,9 +39,11 @@ typedef struct mca_btl_base_endpoint_t {
gni_ep_handle_t smsg_ep_handle;
gni_ep_handle_t rdma_ep_handle;
mca_btl_ugni_endpoint_attr_t remote_attr;
mca_btl_ugni_endpoint_attr_t remote_attr; /* TODO: UGH, remove this */
struct mca_btl_ugni_smsg_mbox_t *mailbox;
gni_mem_handle_t rmt_irq_mem_hndl;
opal_list_t frag_wait_list;
bool wait_listed;

Просмотреть файл

@ -74,12 +74,14 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
ugni_module->active_send_count = 0;
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_send, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->smsg_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
@ -90,8 +92,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
/* create wildcard endpoint to listen for connections.
* there is no need to bind this endpoint. */
OPAL_THREAD_LOCK(&dev->dev_lock);
rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
&ugni_module->wildcard_ep);
OPAL_THREAD_UNLOCK(&dev->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating wildcard ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
@ -136,6 +140,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
}
/* destroy all cqs */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ"));
@ -164,6 +169,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint"));
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
}
OBJ_DESTRUCT(&ugni_module->eager_frags_send);

Просмотреть файл

@ -36,7 +36,7 @@ static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
frag->post_desc.base.remote_mem_hndl = rem_mdh;
frag->post_desc.base.length = bufsize;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.rdma_mode = GNI_RDMAMODE_FENCE;
frag->post_desc.base.src_cq_hndl = cq_hndl;
frag->post_desc.tries = 0;
}
@ -50,7 +50,9 @@ static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_pos
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len, 0);
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
rc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -62,17 +64,19 @@ static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_pos
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
{
gni_return_t rc;
gni_return_t status;
/* Post descriptor */
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
rc = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", rc));
return OPAL_ERR_OUT_OF_RESOURCE;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != status) {
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
return opal_common_rc_ugni_to_opal(status);
}
return OPAL_SUCCESS;
@ -99,16 +103,20 @@ static inline void mca_btl_ugni_repost (mca_btl_ugni_base_frag_t *frag, int rc)
frag->cbfunc = mca_btl_ugni_frag_complete;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_POST_RDMA_PUT == frag->post_desc.base.type ||
GNI_POST_RDMA_GET == frag->post_desc.base.type) {
grc = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
} else {
grc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
}
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
frag->cbfunc = mca_btl_ugni_repost;
OPAL_THREAD_LOCK(&frag->endpoint->btl->failed_frags_lock);
opal_list_append (&frag->endpoint->btl->failed_frags, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&frag->endpoint->btl->failed_frags_lock);
}
}

Просмотреть файл

@ -32,7 +32,9 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
rc = mca_btl_ugni_check_endpoint_state (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
return OPAL_SUCCESS;
}
@ -71,11 +73,15 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
/* queue up request */
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
endpoint->wait_listed = true;
}
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
rc = OPAL_SUCCESS;
}
@ -133,22 +139,28 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
int mca_btl_ugni_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
{
mca_btl_ugni_base_frag_t *frag;
mca_btl_ugni_base_frag_t *frag=NULL;
int rc;
while (NULL !=
(frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) {
do {
OPAL_THREAD_LOCK(&endpoint->lock);
frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list);
OPAL_THREAD_UNLOCK(&endpoint->lock);
if (NULL == frag) {
break;
}
rc = mca_btl_ugni_send_frag (endpoint, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
} else {
mca_btl_ugni_frag_complete (frag, rc);
}
return rc;
}
}
} while(1);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -27,8 +27,17 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->memory_hdl;
#if 0
fprintf(stderr,"ugni_reg->memory_hdl 0x%lx 0x%lx\n",
ugni_reg->memory_hdl.qword1,ugni_reg->memory_hdl.qword2);
#endif
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME);
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
#if 0
fprintf(stderr,"Invoked mca_btl_ugni_smsg_mbox_construct with mbox->attr.rmt_irq_mem_hndl = 0x%lx 0x%lx\n",
mbox->attr.rmt_irq_mem_hndl.qword1,mbox->attr.rmt_irq_mem_hndl.qword2);
#endif
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, ompi_free_list_item_t,
@ -42,7 +51,7 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
rc = GNI_SmsgSetMaxRetrans (ugni_module->device->dev_handle,
mca_btl_ugni_component.smsg_max_retries);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error setting maximum SMSG retries"));
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
return opal_common_rc_ugni_to_opal (rc);
}
@ -70,9 +79,11 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
do {
uint8_t tag = GNI_SMSG_ANY_TAG;
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgGetNextWTag (ep->smsg_ep_handle, (void **) &data_ptr, &tag);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_NOT_DONE == rc) {
BTL_VERBOSE(("no smsg message waiting. rc = %d", rc));
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
ep->smsg_progressing = 0;
@ -80,7 +91,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
fprintf (stderr, "Unhandled Smsg error: %d\n", rc);
fprintf (stderr, "Unhandled Smsg error: %s\n", gni_err_str[rc]);
assert (0);
return OPAL_ERROR;
}
@ -140,7 +151,9 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
break;
}
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgRelease (ep->smsg_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("Smsg release failed! rc = %d", rc));
return OPAL_ERROR;
@ -175,7 +188,9 @@ mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
/* clear out remote cq */
do {
OPAL_THREAD_LOCK(&btl->device->dev_lock);
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
} while (GNI_RC_NOT_DONE != rc);
endpoint_count = opal_pointer_array_get_size (&btl->endpoints);
@ -207,7 +222,9 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
gni_return_t grc;
uint64_t inst_id;
OPAL_THREAD_LOCK(&btl->device->dev_lock);
grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
if (GNI_RC_NOT_DONE == grc) {
return 0;
}

Просмотреть файл

@ -47,7 +47,9 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return OPAL_SUCCESS;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
grc = GNI_CqGetEvent (ugni_module->smsg_local_cq, &event_data);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_NOT_DONE == grc) {
return OPAL_SUCCESS;
}
@ -69,7 +71,7 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return OPAL_ERROR;
}
ugni_module->active_send_count--;
opal_atomic_add_32(&ugni_module->active_send_count,-1);
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
@ -83,15 +85,18 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
void *hdr, size_t hdr_len,
void *payload, size_t payload_len,
mca_btl_ugni_smsg_tag_t tag) {
mca_btl_ugni_smsg_tag_t tag)
{
gni_return_t grc;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
payload, payload_len, frag->msg_id, tag);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
/* increment the active send counter */
frag->endpoint->btl->active_send_count++;
opal_atomic_add_32(&frag->endpoint->btl->active_send_count,1);
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
return OPAL_SUCCESS;

Просмотреть файл

@ -136,6 +136,8 @@ static int opal_common_ugni_device_init (opal_common_ugni_device_t *device,
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));
OBJ_CONSTRUCT(&device->dev_lock,opal_mutex_t);
/* Attach device to the communication domain */
rc = GNI_CdmAttach (opal_common_ugni_module.cd_handle, device->dev_id,
&device->dev_pe_addr, &device->dev_handle);
@ -267,8 +269,9 @@ int opal_common_ugni_init (void)
mca_btl_ugni_component.rdma_max_retries;
/* Create a communication domain */
/* TODO - bte single should be removed when the IRQ problem is figured out */
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL;
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL | GNI_CDM_MODE_BTE_SINGLE_CHANNEL;
/* collect uGNI information */
rc = get_ptag(&opal_common_ugni_module.ptag);

Просмотреть файл

@ -33,6 +33,7 @@
struct opal_common_ugni_modex_t {
uint32_t addr;
int id;
gni_mem_handle_t irq_memhndl;
};
typedef struct opal_common_ugni_modex_t opal_common_ugni_modex_t;
@ -47,8 +48,10 @@ struct opal_common_ugni_device_t {
uint32_t dev_addr;
uint32_t dev_cpu_id;
size_t dev_ep_count;
void *btl_ctx;
size_t dev_ep_count;
opal_mutex_t dev_lock;
gni_mem_handle_t smsg_irq_mhndl;
void *btl_ctx;
};
typedef struct opal_common_ugni_device_t opal_common_ugni_device_t;

Просмотреть файл

@ -42,6 +42,7 @@ int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_pro
endpoint->ep_rem_addr = modex->addr;
endpoint->ep_rem_id = modex->id;
endpoint->ep_rem_irq_memhndl = modex->irq_memhndl;
endpoint->dev = dev;
@ -70,14 +71,21 @@ int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_
}
/* create a uGNI endpoint handle and bind it to the remote peer */
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
return opal_common_rc_ugni_to_opal (grc);
}
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) {
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
GNI_EpDestroy (*ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
return opal_common_rc_ugni_to_opal (grc);
}
@ -92,6 +100,7 @@ int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep)
return OPAL_SUCCESS;
}
/* TODO: need to fix, may be outstanding tx's, etc. */
rc = GNI_EpUnbind (*ep);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */

Просмотреть файл

@ -18,6 +18,7 @@ struct opal_common_ugni_device_t;
struct opal_common_ugni_endpoint_t {
opal_object_t super;
uint32_t ep_rem_addr, ep_rem_id; /**< remote information */
gni_mem_handle_t ep_rem_irq_memhndl;
struct opal_common_ugni_device_t *dev; /**< device this endpoint is using */
};
typedef struct opal_common_ugni_endpoint_t opal_common_ugni_endpoint_t;

Просмотреть файл

@ -96,6 +96,7 @@ struct mca_mpool_udreg_module_t {
struct mca_mpool_base_resources_t resources;
ompi_free_list_t reg_list;
mca_mpool_udreg_hugepage_t *huge_page;
opal_mutex_t lock;
void *udreg_handle;
};
typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t;

Просмотреть файл

@ -156,6 +156,8 @@ int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t* mpool)
cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG;
}
OBJ_CONSTRUCT(&mpool->lock,opal_mutex_t);
strncpy (cache_attr.cache_name, mpool->resources.pool_name, UDREG_MAX_CACHENAME_LEN);
cache_attr.max_entries = mpool->resources.max_entries;
cache_attr.debug_mode = 0;
@ -363,14 +365,17 @@ int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr,
if (false == bypass_cache) {
/* Get a udreg entry for this region */
OPAL_THREAD_LOCK(&mpool_udreg->lock);
while (UDREG_RC_SUCCESS !=
(urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_udreg_evict (mpool)) {
*reg = NULL;
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data;
udreg_reg->mpool_context = udreg_entry;
@ -444,7 +449,9 @@ int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool,
if (0 == reg->ref_count && reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
mca_mpool_udreg_dereg_func (reg, mpool);
} else if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
OPAL_THREAD_LOCK(&mpool_udreg->lock);
UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context);
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
}
return OPAL_SUCCESS;
@ -473,6 +480,7 @@ void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool)
UDREG_CacheRelease (mpool_udreg->udreg_handle);
OBJ_DESTRUCT(&mpool_udreg->reg_list);
OBJ_DESTRUCT(&mpool_udreg->lock);
}
int mca_mpool_udreg_ft_event(int state) {