From 400af6c52dadacf16a685c70fc56d21649cc4541 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 23 Dec 2015 22:53:01 +0600 Subject: [PATCH] openib addproc improvements: 1. finer grained locks; 2. separate srq creation from cq adjustments. --- opal/mca/btl/openib/btl_openib.c | 105 ++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index ac20a4a5ad..26297f3fb4 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -354,8 +354,10 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } else #endif { + opal_mutex_lock(&openib_btl->device->device_lock); openib_btl->qps[qp].u.srq_qp.srq = ibv_create_srq(openib_btl->device->ib_pd, &attr); + opal_mutex_unlock(&openib_btl->device->device_lock); } if (NULL == openib_btl->qps[qp].u.srq_qp.srq) { mca_btl_openib_show_init_error(__FILE__, __LINE__, @@ -404,12 +406,32 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) return OPAL_SUCCESS; } -static int mca_btl_openib_size_queues_nolock(struct mca_btl_openib_module_t* openib_btl, size_t nprocs) +static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl) +{ + int rc = OPAL_SUCCESS; + opal_mutex_lock(&openib_btl->ib_lock); + if (0 == openib_btl->num_peers && + (mca_btl_openib_component.num_srq_qps > 0 || + mca_btl_openib_component.num_xrc_qps > 0)) { + rc = create_srq(openib_btl); + } + opal_mutex_unlock(&openib_btl->ib_lock); + return rc; +} + + +static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs) { uint32_t send_cqes, recv_cqes; int rc = OPAL_SUCCESS, qp; mca_btl_openib_device_t *device = openib_btl->device; + if( 0 == nprocs){ + /* nothing to do */ + return OPAL_SUCCESS; + } + + opal_mutex_lock(&openib_btl->ib_lock); /* figure out reasonable sizes for completion queues */ for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { if(BTL_OPENIB_QP_TYPE_SRQ(qp)) { @@ -420,8 +442,11 @@ static int mca_btl_openib_size_queues_nolock(struct mca_btl_openib_module_t* ope mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs; recv_cqes = send_cqes; } + + opal_mutex_lock(&openib_btl->device->device_lock); openib_btl->device->cq_size[qp_cq_prio(qp)] += recv_cqes; openib_btl->device->cq_size[BTL_OPENIB_LP_CQ] += send_cqes; + opal_mutex_unlock(&openib_btl->device->device_lock); } rc = adjust_cq(device, BTL_OPENIB_HP_CQ); @@ -434,14 +459,9 @@ static int mca_btl_openib_size_queues_nolock(struct mca_btl_openib_module_t* ope goto out; } - if (0 == openib_btl->num_peers && - (mca_btl_openib_component.num_srq_qps > 0 || - mca_btl_openib_component.num_xrc_qps > 0)) { - rc = create_srq(openib_btl); - } - openib_btl->num_peers += nprocs; out: + opal_mutex_unlock(&openib_btl->ib_lock); return rc; } @@ -601,13 +621,15 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, return OPAL_SUCCESS; } -static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) +static int prepare_device_for_use (mca_btl_openib_device_t *device) { mca_btl_openib_frag_init_data_t *init_data; - int rc, length; + int rc = OPAL_SUCCESS, length; + + opal_mutex_lock(&device->device_lock); if (device->ready_for_use) { - return OPAL_SUCCESS; + goto exit; } /* For each btl module that we made - find every @@ -628,7 +650,8 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_device_qp_t)); if (NULL == device->qps) { BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } for (int qp_index = 0 ; qp_index < mca_btl_openib_component.num_qps ; qp_index++) { @@ -660,13 +683,15 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) mca_btl_openib_component.num_xrc_qps, ibv_get_device_name(device->ib_dev), opal_process_info.nodename); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } if (MCA_BTL_XRC_ENABLED) { if (OPAL_SUCCESS != mca_btl_openib_open_xrc_domain(device)) { BTL_ERROR(("XRC Internal error. Failed to open xrc domain")); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } #endif @@ -681,7 +706,8 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_endpoint_t*)); if(NULL == device->eager_rdma_buffers) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } } @@ -694,7 +720,8 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) device->eager_rdma_buffers = NULL; } BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } length = sizeof(mca_btl_openib_header_t) + @@ -722,7 +749,7 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return rc; + goto exit; } /* setup all the qps */ @@ -730,7 +757,8 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); if (NULL == init_data) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } /* Initialize pool of send fragments */ @@ -763,7 +791,7 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return OPAL_ERROR; + goto exit; } init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); @@ -785,13 +813,16 @@ static int prepare_device_for_use_nolock (mca_btl_openib_device_t *device) mca_btl_openib_component.ib_free_list_inc, device->mpool, 0, NULL, mca_btl_openib_frag_init, init_data)) { - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } device->ready_for_use = true; - return OPAL_SUCCESS; +exit: + opal_mutex_unlock(&device->device_lock); + return rc; } static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_openib_proc_t* ib_proc, @@ -994,22 +1025,23 @@ int mca_btl_openib_add_procs( } #endif - /* protect the device */ - opal_mutex_lock(&openib_btl->device->device_lock); - rc = prepare_device_for_use_nolock (openib_btl->device); + rc = prepare_device_for_use (openib_btl->device); if (OPAL_SUCCESS != rc) { BTL_ERROR(("could not prepare openib device for use")); - opal_mutex_unlock(&openib_btl->device->device_lock); return rc; } - rc = mca_btl_openib_size_queues_nolock(openib_btl, nprocs); + rc = openib_btl_prepare(openib_btl); if (OPAL_SUCCESS != rc) { - BTL_ERROR(("error creating cqs")); - opal_mutex_unlock(&openib_btl->device->device_lock); + BTL_ERROR(("could not prepare openib btl structure for usel")); + return rc; + } + + rc = openib_btl_size_queues(openib_btl, nprocs); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); return rc; } - opal_mutex_unlock(&openib_btl->device->device_lock); for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) { struct opal_proc_t* proc = procs[i]; @@ -1089,24 +1121,23 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul int local_port_cnt = 0, btl_rank; bool is_new; - // TODO: shift to the separate function - /* protect the device */ - opal_mutex_lock(&openib_btl->device->device_lock); - rc = prepare_device_for_use_nolock (openib_btl->device); + rc = prepare_device_for_use (openib_btl->device); if (OPAL_SUCCESS != rc) { BTL_ERROR(("could not prepare openib device for use")); - opal_mutex_unlock(&openib_btl->device->device_lock); return NULL; } - rc = mca_btl_openib_size_queues_nolock(openib_btl, 1); + rc = openib_btl_prepare(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl structure for use")); + return NULL; + } + + rc = openib_btl_size_queues(openib_btl, 1); if (OPAL_SUCCESS != rc) { BTL_ERROR(("error creating cqs")); - opal_mutex_unlock(&openib_btl->device->device_lock); return NULL; } - opal_mutex_unlock(&openib_btl->device->device_lock); - if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc, &is_new))) { /* if we don't have connection info for this process, it's