btl/ugni: switch to using tx cq irqs for rdma
Verified via testing with unit tests, etc. that in fact BTE TX descriptors using CQs configured to generate IRQs were in fact working correctly on Cray XC. Disable send message back to self and just use IRQs generated by completion of TX descriptors posted to BTE.
Этот коммит содержится в:
родитель
acd07d98da
Коммит
2d14c2a204
@ -36,9 +36,7 @@ static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
|
||||
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
|
||||
frag->post_desc.base.remote_mem_hndl = rem_mdh;
|
||||
frag->post_desc.base.length = bufsize;
|
||||
#if 0
|
||||
frag->post_desc.base.rdma_mode = GNI_RDMAMODE_FENCE;
|
||||
#endif
|
||||
frag->post_desc.base.rdma_mode = 0;
|
||||
frag->post_desc.base.rdma_mode = 0;
|
||||
frag->post_desc.base.src_cq_hndl = cq_hndl;
|
||||
frag->post_desc.tries = 0;
|
||||
@ -64,40 +62,21 @@ static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_pos
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_write_to_self_complete(struct mca_btl_ugni_base_frag_t *frag, int rc)
|
||||
{
|
||||
frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE;
|
||||
|
||||
BTL_VERBOSE(("cqwrite frag complete"));
|
||||
#if 0
|
||||
fprintf(stderr,"returning cq_frag %p\n",frag);
|
||||
#endif
|
||||
mca_btl_ugni_frag_return (frag);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
|
||||
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
|
||||
{
|
||||
int rc;
|
||||
gni_return_t status;
|
||||
mca_btl_ugni_base_frag_t *cq_frag = NULL;
|
||||
extern void *howards_start_addr;
|
||||
|
||||
/* Post descriptor */
|
||||
#if 0
|
||||
if (howards_progress_var && (getenv("GENERATE_RDMA_IRQS") != NULL)) {
|
||||
fprintf(stderr,"Calling GNI_PostRdma with to trigger interrupt on rdma_local_irq_cq %p\n",frag->endpoint->btl->rdma_local_irq_cq);
|
||||
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
|
||||
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
|
||||
frag->endpoint->btl->rdma_local_irq_cq);
|
||||
} else {
|
||||
#endif
|
||||
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
|
||||
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
|
||||
frag->endpoint->btl->rdma_local_cq);
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
|
||||
@ -107,41 +86,6 @@ static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_pos
|
||||
return opal_common_rc_ugni_to_opal(status);
|
||||
}
|
||||
|
||||
if (howards_progress_var && (getenv("GENERATE_RDMA_IRQS") != NULL)) {
|
||||
|
||||
rc = mca_btl_ugni_frag_alloc(frag->endpoint,
|
||||
&frag->endpoint->btl->rdma_frags,
|
||||
&cq_frag);
|
||||
if (rc == OPAL_SUCCESS) {
|
||||
cq_frag->registration = NULL;
|
||||
cq_frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
cq_frag->post_desc.base.type = GNI_POST_RDMA_PUT;
|
||||
cq_frag->post_desc.base.length = 4;
|
||||
cq_frag->post_desc.base.remote_addr = (uint64_t)howards_start_addr;
|
||||
cq_frag->post_desc.base.remote_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
|
||||
cq_frag->post_desc.base.local_addr = (uint64_t)howards_start_addr;
|
||||
cq_frag->post_desc.base.cq_mode = GNI_CQMODE_REMOTE_EVENT | GNI_CQMODE_GLOBAL_EVENT;
|
||||
cq_frag->post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
|
||||
cq_frag->post_desc.base.src_cq_hndl = mca_btl_ugni_component.modules[0].rdma_local_cq;
|
||||
cq_frag->post_desc.base.rdma_mode = 0;
|
||||
cq_frag->post_desc.base.local_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
|
||||
cq_frag->post_desc.base.post_id = 0xFFFF;
|
||||
cq_frag->post_desc.tries = 0;
|
||||
cq_frag->cbfunc = mca_btl_ugni_write_to_self_complete;
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
status = GNI_PostRdma(mca_btl_ugni_component.modules[0].local_ep,&cq_frag->post_desc.base);
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
if (status == GNI_RC_ERROR_RESOURCE) { /* errors for PostCqWrite treated as non-fatal */
|
||||
fprintf(stderr,"GNI_PostRdma returned %s\n",gni_err_str[status]);
|
||||
mca_btl_ugni_frag_return (cq_frag);
|
||||
} else {
|
||||
if (status != GNI_RC_SUCCESS) {
|
||||
fprintf(stderr,"GNI_PostRdma returned %s\n",gni_err_str[status]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user