1
1

btl/ugni: switch to using tx cq irqs for rdma

Verified via testing with unit tests, etc. that
in fact BTE TX descriptors using CQs configured to
generate IRQs were in fact working correctly on Cray XC.  Disable
send message back to self and just use IRQs generated
by completion of TX descriptors posted to BTE.
Этот коммит содержится в:
Howard Pritchard 2014-12-23 12:38:52 -07:00
родитель acd07d98da
Коммит 2d14c2a204

Просмотреть файл

@ -36,9 +36,7 @@ static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
frag->post_desc.base.remote_mem_hndl = rem_mdh;
frag->post_desc.base.length = bufsize;
#if 0
frag->post_desc.base.rdma_mode = GNI_RDMAMODE_FENCE;
#endif
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.src_cq_hndl = cq_hndl;
frag->post_desc.tries = 0;
@ -64,40 +62,21 @@ static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_pos
return OPAL_SUCCESS;
}
static void mca_btl_ugni_write_to_self_complete(struct mca_btl_ugni_base_frag_t *frag, int rc)
{
frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE;
BTL_VERBOSE(("cqwrite frag complete"));
#if 0
fprintf(stderr,"returning cq_frag %p\n",frag);
#endif
mca_btl_ugni_frag_return (frag);
}
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
{
int rc;
gni_return_t status;
mca_btl_ugni_base_frag_t *cq_frag = NULL;
extern void *howards_start_addr;
/* Post descriptor */
#if 0
if (howards_progress_var && (getenv("GENERATE_RDMA_IRQS") != NULL)) {
fprintf(stderr,"Calling GNI_PostRdma with to trigger interrupt on rdma_local_irq_cq %p\n",frag->endpoint->btl->rdma_local_irq_cq);
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_irq_cq);
} else {
#endif
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
#if 0
}
#endif
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
@ -107,41 +86,6 @@ static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_pos
return opal_common_rc_ugni_to_opal(status);
}
if (howards_progress_var && (getenv("GENERATE_RDMA_IRQS") != NULL)) {
rc = mca_btl_ugni_frag_alloc(frag->endpoint,
&frag->endpoint->btl->rdma_frags,
&cq_frag);
if (rc == OPAL_SUCCESS) {
cq_frag->registration = NULL;
cq_frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
cq_frag->post_desc.base.type = GNI_POST_RDMA_PUT;
cq_frag->post_desc.base.length = 4;
cq_frag->post_desc.base.remote_addr = (uint64_t)howards_start_addr;
cq_frag->post_desc.base.remote_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
cq_frag->post_desc.base.local_addr = (uint64_t)howards_start_addr;
cq_frag->post_desc.base.cq_mode = GNI_CQMODE_REMOTE_EVENT | GNI_CQMODE_GLOBAL_EVENT;
cq_frag->post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
cq_frag->post_desc.base.src_cq_hndl = mca_btl_ugni_component.modules[0].rdma_local_cq;
cq_frag->post_desc.base.rdma_mode = 0;
cq_frag->post_desc.base.local_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
cq_frag->post_desc.base.post_id = 0xFFFF;
cq_frag->post_desc.tries = 0;
cq_frag->cbfunc = mca_btl_ugni_write_to_self_complete;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
status = GNI_PostRdma(mca_btl_ugni_component.modules[0].local_ep,&cq_frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (status == GNI_RC_ERROR_RESOURCE) { /* errors for PostCqWrite treated as non-fatal */
fprintf(stderr,"GNI_PostRdma returned %s\n",gni_err_str[status]);
mca_btl_ugni_frag_return (cq_frag);
} else {
if (status != GNI_RC_SUCCESS) {
fprintf(stderr,"GNI_PostRdma returned %s\n",gni_err_str[status]);
}
}
}
}
return OPAL_SUCCESS;
}