Adding progress thread support to OpenIB BTL.
Reviewed by Gleb. This commit was SVN r12411.
Этот коммит содержится в:
родитель
f4e81176fe
Коммит
566667ac61
@ -144,10 +144,6 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
|
|||||||
LIBS="$ompi_check_openib_$1_save_LIBS"],
|
LIBS="$ompi_check_openib_$1_save_LIBS"],
|
||||||
[ompi_check_openib_happy="no"])
|
[ompi_check_openib_happy="no"])
|
||||||
|
|
||||||
AS_IF([test "$ompi_check_openib_happy" = "yes" -a "$enable_progress_threads" = "yes"],
|
|
||||||
[AC_MSG_WARN([OpenIB driver does not currently support progress threads. Disabling BTL.])
|
|
||||||
ompi_check_openib_happy="no"])
|
|
||||||
|
|
||||||
AS_IF([test "$ompi_check_openib_happy" = "yes"],
|
AS_IF([test "$ompi_check_openib_happy" = "yes"],
|
||||||
[$2],
|
[$2],
|
||||||
[AS_IF([test ! -z "$with_openib" -a "$with_openib" != "no"],
|
[AS_IF([test ! -z "$with_openib" -a "$with_openib" != "no"],
|
||||||
|
@ -618,6 +618,14 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
if(openib_btl->hca->progress) {
|
||||||
|
openib_btl->hca->progress = false;
|
||||||
|
if (pthread_cancel(openib_btl->hca->thread.t_handle))
|
||||||
|
BTL_ERROR(("Failed to cancel OpenIB progress thread"));
|
||||||
|
opal_thread_join(&openib_btl->hca->thread, NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -763,6 +771,8 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl,
|
|||||||
*/
|
*/
|
||||||
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
||||||
{
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
/* Allocate Protection Domain */
|
/* Allocate Protection Domain */
|
||||||
openib_btl->poll_cq = false;
|
openib_btl->poll_cq = false;
|
||||||
|
|
||||||
@ -796,6 +806,17 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Create the low and high priority queue pairs */
|
/* Create the low and high priority queue pairs */
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
||||||
|
openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
|
||||||
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
|
mca_btl_openib_component.ib_cq_size, openib_btl->hca->ib_channel);
|
||||||
|
#else
|
||||||
|
openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
|
||||||
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
|
mca_btl_openib_component.ib_cq_size, openib_btl, openib_btl->hca->ib_channel, 0);
|
||||||
|
#endif
|
||||||
|
#else /* OMPI_ENABLE_PROGRESS_THREADS DISABLED */
|
||||||
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
||||||
openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
|
openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
|
||||||
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
@ -805,6 +826,7 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
mca_btl_openib_component.ib_cq_size, NULL, NULL, 0);
|
mca_btl_openib_component.ib_cq_size, NULL, NULL, 0);
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* OMPI_ENABLE_PROGRESS_THREADS */
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_cq[BTL_OPENIB_LP_QP]) {
|
if(NULL == openib_btl->ib_cq[BTL_OPENIB_LP_QP]) {
|
||||||
BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
|
BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
|
||||||
@ -813,6 +835,25 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
if(ibv_req_notify_cq(openib_btl->ib_cq[BTL_OPENIB_LP_QP], 0)) {
|
||||||
|
BTL_ERROR(("error requesting low priority cq notification for %s"
|
||||||
|
" errno says %s\n",
|
||||||
|
ibv_get_device_name(openib_btl->hca->ib_dev),
|
||||||
|
strerror(errno)));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
||||||
|
openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
|
||||||
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
|
mca_btl_openib_component.ib_cq_size, openib_btl->hca->ib_channel);
|
||||||
|
#else
|
||||||
|
openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
|
||||||
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
|
mca_btl_openib_component.ib_cq_size, openib_btl, openib_btl->hca->ib_channel, 0);
|
||||||
|
#endif
|
||||||
|
#else /* OMPI_ENABLE_PROGRESS_THREADS DISABLED */
|
||||||
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
|
||||||
openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
|
openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
|
||||||
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
@ -822,6 +863,7 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
ibv_create_cq(openib_btl->hca->ib_dev_context,
|
||||||
mca_btl_openib_component.ib_cq_size, NULL, NULL, 0);
|
mca_btl_openib_component.ib_cq_size, NULL, NULL, 0);
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* OMPI_ENABLE_PROGRESS_THREADS */
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_cq[BTL_OPENIB_HP_QP]) {
|
if(NULL == openib_btl->ib_cq[BTL_OPENIB_HP_QP]) {
|
||||||
BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
|
BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
|
||||||
@ -829,7 +871,25 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
strerror(errno)));
|
strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
if(ibv_req_notify_cq(openib_btl->ib_cq[BTL_OPENIB_HP_QP], 0)) {
|
||||||
|
BTL_ERROR(("error requesting high priority cq notification for %s"
|
||||||
|
" errno says %s\n",
|
||||||
|
ibv_get_device_name(openib_btl->hca->ib_dev),
|
||||||
|
strerror(errno)));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
OPAL_THREAD_LOCK(&openib_btl->hca->hca_lock);
|
||||||
|
if (!openib_btl->hca->progress){
|
||||||
|
openib_btl->hca->progress = true;
|
||||||
|
if(OPAL_SUCCESS != (rc = opal_thread_start(&openib_btl->hca->thread))) {
|
||||||
|
BTL_ERROR(("Unable to create progress thread, retval=%d", rc));
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&openib_btl->hca->hca_lock);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -151,6 +151,12 @@ typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
|
|||||||
|
|
||||||
struct mca_btl_openib_hca_t {
|
struct mca_btl_openib_hca_t {
|
||||||
struct ibv_device *ib_dev; /* the ib device */
|
struct ibv_device *ib_dev; /* the ib device */
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
struct ibv_comp_channel *ib_channel; /* Channel event for the HCA */
|
||||||
|
opal_thread_t thread; /* Progress thread */
|
||||||
|
volatile bool progress; /* Progress status */
|
||||||
|
#endif
|
||||||
|
opal_mutex_t hca_lock; /* hca level lock */
|
||||||
struct ibv_context *ib_dev_context;
|
struct ibv_context *ib_dev_context;
|
||||||
struct ibv_device_attr ib_dev_attr;
|
struct ibv_device_attr ib_dev_attr;
|
||||||
struct ibv_pd *ib_pd;
|
struct ibv_pd *ib_pd;
|
||||||
@ -212,6 +218,9 @@ struct mca_btl_openib_module_t {
|
|||||||
|
|
||||||
extern mca_btl_openib_module_t mca_btl_openib_module;
|
extern mca_btl_openib_module_t mca_btl_openib_module;
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
extern void* mca_btl_openib_progress_thread(opal_object_t*);
|
||||||
|
#endif
|
||||||
/**
|
/**
|
||||||
* Register a callback function that is called on receipt
|
* Register a callback function that is called on receipt
|
||||||
* of a fragment.
|
* of a fragment.
|
||||||
|
@ -77,6 +77,7 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
|||||||
size_t byte_len, const int prio);
|
size_t byte_len, const int prio);
|
||||||
static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
|
static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
|
||||||
static int btl_openib_component_progress(void);
|
static int btl_openib_component_progress(void);
|
||||||
|
static int btl_openib_module_progress(mca_btl_openib_module_t *openib_btl);
|
||||||
static void btl_openib_frag_progress_pending(
|
static void btl_openib_frag_progress_pending(
|
||||||
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
|
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
|
||||||
const int prio);
|
const int prio);
|
||||||
@ -314,6 +315,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
hca->ib_dev = ib_dev;
|
hca->ib_dev = ib_dev;
|
||||||
hca->ib_dev_context = ibv_open_device(ib_dev);
|
hca->ib_dev_context = ibv_open_device(ib_dev);
|
||||||
hca->btls = 0;
|
hca->btls = 0;
|
||||||
|
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
|
||||||
if(NULL == hca->ib_dev_context){
|
if(NULL == hca->ib_dev_context){
|
||||||
BTL_ERROR(("error obtaining device context for %s errno says %s\n",
|
BTL_ERROR(("error obtaining device context for %s errno says %s\n",
|
||||||
ibv_get_device_name(ib_dev), strerror(errno)));
|
ibv_get_device_name(ib_dev), strerror(errno)));
|
||||||
@ -402,7 +404,18 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
goto dealloc_pd;
|
goto dealloc_pd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
hca->ib_channel = ibv_create_comp_channel(hca->ib_dev_context);
|
||||||
|
if (NULL == hca->ib_channel) {
|
||||||
|
BTL_ERROR(("error creating channel for %s errno says %s\n",
|
||||||
|
ibv_get_device_name(hca->ib_dev),
|
||||||
|
strerror(errno)));
|
||||||
|
goto mpool_destroy;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
ret = OMPI_SUCCESS;
|
ret = OMPI_SUCCESS;
|
||||||
|
|
||||||
/* Note ports are 1 based hence j = 1 */
|
/* Note ports are 1 based hence j = 1 */
|
||||||
for(i = 1; i <= hca->ib_dev_attr.phys_port_cnt; i++){
|
for(i = 1; i <= hca->ib_dev_attr.phys_port_cnt; i++){
|
||||||
struct ibv_port_attr ib_port_attr;
|
struct ibv_port_attr ib_port_attr;
|
||||||
@ -427,9 +440,21 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hca->btls != 0)
|
if (hca->btls != 0){
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
/* Prepare data for thread, but not starting it */
|
||||||
|
OBJ_CONSTRUCT(&hca->thread, opal_thread_t);
|
||||||
|
hca->thread.t_run = mca_btl_openib_progress_thread;
|
||||||
|
hca->thread.t_arg = hca;
|
||||||
|
hca->progress = false;
|
||||||
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
ibv_destroy_comp_channel(hca->ib_channel);
|
||||||
|
#endif
|
||||||
|
mpool_destroy:
|
||||||
mca_mpool_base_module_destroy(hca->mpool);
|
mca_mpool_base_module_destroy(hca->mpool);
|
||||||
dealloc_pd:
|
dealloc_pd:
|
||||||
ibv_dealloc_pd(hca->ib_pd);
|
ibv_dealloc_pd(hca->ib_pd);
|
||||||
@ -470,15 +495,6 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
*num_btl_modules = 0;
|
*num_btl_modules = 0;
|
||||||
num_devs = 0;
|
num_devs = 0;
|
||||||
|
|
||||||
/* openib BTL does not currently support progress threads, so
|
|
||||||
disable the component if they were requested */
|
|
||||||
if (enable_progress_threads) {
|
|
||||||
mca_btl_base_error_no_nics("OpenIB", "HCA");
|
|
||||||
mca_btl_openib_component.ib_num_btls = 0;
|
|
||||||
btl_openib_modex_send();
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
seedv[0] = orte_process_info.my_name->vpid;
|
seedv[0] = orte_process_info.my_name->vpid;
|
||||||
seedv[1] = opal_sys_timer_get_cycles();
|
seedv[1] = opal_sys_timer_get_cycles();
|
||||||
seedv[2] = opal_sys_timer_get_cycles();
|
seedv[2] = opal_sys_timer_get_cycles();
|
||||||
@ -974,6 +990,49 @@ static void btl_openib_frag_progress_pending(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
void* mca_btl_openib_progress_thread(opal_object_t* arg)
|
||||||
|
{
|
||||||
|
mca_btl_openib_module_t* openib_btl;
|
||||||
|
opal_thread_t* thread = (opal_thread_t*)arg;
|
||||||
|
mca_btl_openib_hca_t* hca = thread->t_arg;
|
||||||
|
unsigned int ev_lp, ev_hp;
|
||||||
|
struct ibv_cq *ev_cq;
|
||||||
|
void *ev_ctx;
|
||||||
|
int qp;
|
||||||
|
|
||||||
|
/* This thread enter in a cancel enabled state */
|
||||||
|
pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL );
|
||||||
|
pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );
|
||||||
|
|
||||||
|
while (hca->progress) {
|
||||||
|
while(opal_progress_threads()) {
|
||||||
|
while(opal_progress_threads())
|
||||||
|
sched_yield();
|
||||||
|
usleep(100); /* give app a chance to re-enter library */
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ibv_get_cq_event(hca->ib_channel, &ev_cq, &ev_ctx))
|
||||||
|
BTL_ERROR(("Failed to get CQ event with error %s",
|
||||||
|
strerror(errno)));
|
||||||
|
if(ibv_req_notify_cq(ev_cq, 0)) {
|
||||||
|
BTL_ERROR(("Couldn't request CQ notification with error %s",
|
||||||
|
strerror(errno)));
|
||||||
|
}
|
||||||
|
openib_btl=(mca_btl_openib_module_t*)ev_ctx;
|
||||||
|
|
||||||
|
if (ev_cq == openib_btl->ib_cq[BTL_OPENIB_LP_QP])
|
||||||
|
ibv_ack_cq_events (openib_btl->ib_cq[BTL_OPENIB_LP_QP], 1);
|
||||||
|
else
|
||||||
|
ibv_ack_cq_events (openib_btl->ib_cq[BTL_OPENIB_HP_QP], 1);
|
||||||
|
|
||||||
|
while(btl_openib_module_progress(openib_btl));
|
||||||
|
}
|
||||||
|
|
||||||
|
return PTHREAD_CANCELED;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IB component progress.
|
* IB component progress.
|
||||||
*/
|
*/
|
||||||
@ -1042,23 +1101,32 @@ static int btl_openib_component_progress(void)
|
|||||||
if(count) return count;
|
if(count) return count;
|
||||||
|
|
||||||
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
||||||
openib_btl = &mca_btl_openib_component.openib_btls[i];
|
return btl_openib_module_progress(&mca_btl_openib_component.openib_btls[i]);
|
||||||
|
}
|
||||||
/* We have two completion queues, one for "high" priority and one for
|
}
|
||||||
* "low". Check high priority before low priority */
|
|
||||||
for(qp = 0; qp < 2; qp++) {
|
|
||||||
ne = ibv_poll_cq(openib_btl->ib_cq[qp], 1, &wc);
|
|
||||||
|
|
||||||
if(0 == ne)
|
static int btl_openib_module_progress(mca_btl_openib_module_t* openib_btl)
|
||||||
continue;
|
{
|
||||||
|
static char *qp_name[] = {"HP", "LP"};
|
||||||
|
int i, j, c, qp;
|
||||||
|
int count = 0,ne = 0, ret;
|
||||||
|
mca_btl_openib_frag_t* frag;
|
||||||
|
mca_btl_openib_endpoint_t* endpoint;
|
||||||
|
struct ibv_wc wc;
|
||||||
|
|
||||||
if(ne < 0 || wc.status != IBV_WC_SUCCESS)
|
for(qp = 0; qp < 2; qp++) {
|
||||||
goto error;
|
ne = ibv_poll_cq(openib_btl->ib_cq[qp], 1, &wc);
|
||||||
|
|
||||||
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
|
if(0 == ne)
|
||||||
endpoint = frag->endpoint;
|
continue;
|
||||||
/* Handle work completions */
|
|
||||||
switch(wc.opcode) {
|
if(ne < 0 || wc.status != IBV_WC_SUCCESS)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
|
||||||
|
endpoint = frag->endpoint;
|
||||||
|
/* Handle work completions */
|
||||||
|
switch(wc.opcode) {
|
||||||
case IBV_WC_RDMA_READ:
|
case IBV_WC_RDMA_READ:
|
||||||
assert(BTL_OPENIB_LP_QP == qp);
|
assert(BTL_OPENIB_LP_QP == qp);
|
||||||
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
|
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
|
||||||
@ -1124,7 +1192,6 @@ static int btl_openib_component_progress(void)
|
|||||||
openib_btl->error_cb(&openib_btl->super,
|
openib_btl->error_cb(&openib_btl->super,
|
||||||
MCA_BTL_ERROR_FLAGS_FATAL);
|
MCA_BTL_ERROR_FLAGS_FATAL);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
@ -1139,8 +1206,8 @@ error:
|
|||||||
if(frag) {
|
if(frag) {
|
||||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||||
if(endpoint &&
|
if(endpoint &&
|
||||||
endpoint->endpoint_proc &&
|
endpoint->endpoint_proc &&
|
||||||
endpoint->endpoint_proc->proc_ompi) {
|
endpoint->endpoint_proc->proc_ompi) {
|
||||||
remote_proc = endpoint->endpoint_proc->proc_ompi;
|
remote_proc = endpoint->endpoint_proc->proc_ompi;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1149,7 +1216,7 @@ error:
|
|||||||
"status number %d for wr_id %llu opcode %d",
|
"status number %d for wr_id %llu opcode %d",
|
||||||
qp_name[qp],
|
qp_name[qp],
|
||||||
btl_openib_component_status_to_string(wc.status),
|
btl_openib_component_status_to_string(wc.status),
|
||||||
wc.status, wc.wr_id, wc.opcode));
|
wc.status, wc.wr_id, wc.opcode));
|
||||||
if(wc.status == IBV_WC_RETRY_EXC_ERR) {
|
if(wc.status == IBV_WC_RETRY_EXC_ERR) {
|
||||||
opal_show_help("help-mpi-btl-openib.txt",
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"btl_openib:retry-exceeded", true);
|
"btl_openib:retry-exceeded", true);
|
||||||
|
@ -297,6 +297,10 @@ int btl_openib_register_mca_params(void)
|
|||||||
CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages ",
|
CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages ",
|
||||||
1, &ival, 0));
|
1, &ival, 0));
|
||||||
mca_btl_openib_component.use_eager_rdma = (uint32_t) (ival != 0);
|
mca_btl_openib_component.use_eager_rdma = (uint32_t) (ival != 0);
|
||||||
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
|
/* Fast rdma path isn't supported by PROGRESS_THREAD */
|
||||||
|
mca_btl_openib_component.use_eager_rdma = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
CHECK(reg_int("eager_rdma_threshold",
|
CHECK(reg_int("eager_rdma_threshold",
|
||||||
"Use RDMA for short messages after this number of "
|
"Use RDMA for short messages after this number of "
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user