diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index 10162916c6..4826587564 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -55,8 +55,8 @@ struct mca_pml_ob1_t { int free_list_num; /* initial size of free list */ int free_list_max; /* maximum size of free list */ int free_list_inc; /* number of elements to grow free list */ - size_t send_pipeline_depth; - size_t recv_pipeline_depth; + int32_t send_pipeline_depth; + int32_t recv_pipeline_depth; size_t rdma_retries_limit; int max_rdma_per_request; int max_send_per_range; diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 6557bc2037..1bb14e179e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -184,8 +184,8 @@ static int mca_pml_ob1_component_register(void) mca_pml_ob1_param_register_int("free_list_max", -1, &mca_pml_ob1.free_list_max); mca_pml_ob1_param_register_int("free_list_inc", 64, &mca_pml_ob1.free_list_inc); mca_pml_ob1_param_register_int("priority", 20, &mca_pml_ob1.priority); - mca_pml_ob1_param_register_sizet("send_pipeline_depth", 3, &mca_pml_ob1.send_pipeline_depth); - mca_pml_ob1_param_register_sizet("recv_pipeline_depth", 4, &mca_pml_ob1.recv_pipeline_depth); + mca_pml_ob1_param_register_int("send_pipeline_depth", 3, &mca_pml_ob1.send_pipeline_depth); + mca_pml_ob1_param_register_int("recv_pipeline_depth", 4, &mca_pml_ob1.recv_pipeline_depth); /* NTH: we can get into a live-lock situation in the RDMA failure path so disable RDMA retries for now. Falling back to send may suck but it is better than diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 5b45410874..3cd4c0d8fb 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; - OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1); MCA_PML_OB1_RDMA_FRAG_RETURN(frag); @@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r assert ((uint64_t) rdma_size == frag->rdma_length); /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, (size_t) rdma_size); + OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size); if (recv_request_pml_complete_check(recvreq) == false && recvreq->req_rdma_offset < recvreq->req_send_offset) { /* schedule additional rdma operations */ @@ -951,7 +951,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, } while(bytes_remaining > 0 && - recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) { + recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) { mca_pml_ob1_rdma_frag_t *frag = NULL; mca_btl_base_module_t *btl; int rc, rdma_idx; @@ -1028,7 +1028,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, if (OPAL_LIKELY(OMPI_SUCCESS == rc)) { /* update request state */ recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; } else { diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 6d57569323..82c4767d83 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -41,12 +41,12 @@ BEGIN_C_DECLS struct mca_pml_ob1_recv_request_t { mca_pml_base_recv_request_t req_recv; opal_ptr_t remote_req_send; - int32_t req_lock; - size_t req_pipeline_depth; - size_t req_bytes_received; /**< amount of data transferred into the user buffer */ - size_t req_bytes_expected; /**< local size of the data as suggested by the user */ - size_t req_rdma_offset; - size_t req_send_offset; + int32_t req_lock; + int32_t req_pipeline_depth; + size_t req_bytes_received; /**< amount of data transferred into the user buffer */ + size_t req_bytes_expected; /**< local size of the data as suggested by the user */ + size_t req_rdma_offset; + size_t req_send_offset; uint32_t req_rdma_cnt; uint32_t req_rdma_idx; bool req_pending; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index f0a227f5dc..f358d733da 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -313,7 +313,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl, des->des_segment_count, sizeof(mca_pml_ob1_frag_hdr_t)); - OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1); OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); if(send_request_pml_complete_check(sendreq) == false) { @@ -913,13 +913,13 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) /* check pipeline_depth here before attempting to get any locks */ if(true == sendreq->req_throttle_sends && - sendreq->req_pipeline_depth >= mca_pml_ob1.send_pipeline_depth) + sendreq->req_pipeline_depth >= mca_pml_ob1.send_pipeline_depth) return OMPI_SUCCESS; range = get_send_range(sendreq); while(range && (false == sendreq->req_throttle_sends || - sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth)) { + sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth)) { mca_pml_ob1_frag_hdr_t* hdr; mca_btl_base_descriptor_t* des; int rc, btl_idx; @@ -1044,7 +1044,7 @@ cannot_pack: range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; @@ -1060,7 +1060,7 @@ cannot_pack: range->range_btls[btl_idx].length -= size; range->range_send_length -= size; range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); + OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1); if(range->range_send_length == 0) { range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 80acc93f4e..5cb21f6aba 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -45,11 +45,11 @@ struct mca_pml_ob1_send_request_t { mca_pml_base_send_request_t req_send; mca_bml_base_endpoint_t* req_endpoint; opal_ptr_t req_recv; - int32_t req_state; - int32_t req_lock; - bool req_throttle_sends; - size_t req_pipeline_depth; - size_t req_bytes_delivered; + int32_t req_state; + int32_t req_lock; + bool req_throttle_sends; + int32_t req_pipeline_depth; + size_t req_bytes_delivered; uint32_t req_rdma_cnt; mca_pml_ob1_send_pending_t req_pending; opal_mutex_t req_send_range_lock;