Make the pipeline depth an int instead of a size_t. While
they are supposed to be unsigned, casting them to a signed value for all atomic operations is as errorprone as handling them as signed entities. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
c340da2586
Коммит
050bd3b6d7
@ -55,8 +55,8 @@ struct mca_pml_ob1_t {
|
||||
int free_list_num; /* initial size of free list */
|
||||
int free_list_max; /* maximum size of free list */
|
||||
int free_list_inc; /* number of elements to grow free list */
|
||||
size_t send_pipeline_depth;
|
||||
size_t recv_pipeline_depth;
|
||||
int32_t send_pipeline_depth;
|
||||
int32_t recv_pipeline_depth;
|
||||
size_t rdma_retries_limit;
|
||||
int max_rdma_per_request;
|
||||
int max_send_per_range;
|
||||
|
@ -184,8 +184,8 @@ static int mca_pml_ob1_component_register(void)
|
||||
mca_pml_ob1_param_register_int("free_list_max", -1, &mca_pml_ob1.free_list_max);
|
||||
mca_pml_ob1_param_register_int("free_list_inc", 64, &mca_pml_ob1.free_list_inc);
|
||||
mca_pml_ob1_param_register_int("priority", 20, &mca_pml_ob1.priority);
|
||||
mca_pml_ob1_param_register_sizet("send_pipeline_depth", 3, &mca_pml_ob1.send_pipeline_depth);
|
||||
mca_pml_ob1_param_register_sizet("recv_pipeline_depth", 4, &mca_pml_ob1.recv_pipeline_depth);
|
||||
mca_pml_ob1_param_register_int("send_pipeline_depth", 3, &mca_pml_ob1.send_pipeline_depth);
|
||||
mca_pml_ob1_param_register_int("recv_pipeline_depth", 4, &mca_pml_ob1.recv_pipeline_depth);
|
||||
|
||||
/* NTH: we can get into a live-lock situation in the RDMA failure path so disable
|
||||
RDMA retries for now. Falling back to send may suck but it is better than
|
||||
|
@ -190,7 +190,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
OPAL_THREAD_SUB_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, -1);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
@ -198,7 +198,7 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
|
||||
assert ((uint64_t) rdma_size == frag->rdma_length);
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, (size_t) rdma_size);
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, rdma_size);
|
||||
if (recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
/* schedule additional rdma operations */
|
||||
@ -951,7 +951,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
}
|
||||
|
||||
while(bytes_remaining > 0 &&
|
||||
recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
|
||||
recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
|
||||
mca_pml_ob1_rdma_frag_t *frag = NULL;
|
||||
mca_btl_base_module_t *btl;
|
||||
int rc, rdma_idx;
|
||||
@ -1028,7 +1028,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD32(&recvreq->req_pipeline_depth, 1);
|
||||
recvreq->req_rdma[rdma_idx].length -= size;
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
|
@ -41,12 +41,12 @@ BEGIN_C_DECLS
|
||||
struct mca_pml_ob1_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
opal_ptr_t remote_req_send;
|
||||
int32_t req_lock;
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
||||
size_t req_bytes_expected; /**< local size of the data as suggested by the user */
|
||||
size_t req_rdma_offset;
|
||||
size_t req_send_offset;
|
||||
int32_t req_lock;
|
||||
int32_t req_pipeline_depth;
|
||||
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
||||
size_t req_bytes_expected; /**< local size of the data as suggested by the user */
|
||||
size_t req_rdma_offset;
|
||||
size_t req_send_offset;
|
||||
uint32_t req_rdma_cnt;
|
||||
uint32_t req_rdma_idx;
|
||||
bool req_pending;
|
||||
|
@ -313,7 +313,7 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
|
||||
des->des_segment_count,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
|
||||
OPAL_THREAD_SUB_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
if(send_request_pml_complete_check(sendreq) == false) {
|
||||
@ -913,13 +913,13 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq)
|
||||
|
||||
/* check pipeline_depth here before attempting to get any locks */
|
||||
if(true == sendreq->req_throttle_sends &&
|
||||
sendreq->req_pipeline_depth >= mca_pml_ob1.send_pipeline_depth)
|
||||
sendreq->req_pipeline_depth >= mca_pml_ob1.send_pipeline_depth)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
range = get_send_range(sendreq);
|
||||
|
||||
while(range && (false == sendreq->req_throttle_sends ||
|
||||
sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth)) {
|
||||
sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth)) {
|
||||
mca_pml_ob1_frag_hdr_t* hdr;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
int rc, btl_idx;
|
||||
@ -1044,7 +1044,7 @@ cannot_pack:
|
||||
range->range_btls[btl_idx].length -= size;
|
||||
range->range_send_length -= size;
|
||||
range->range_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
|
||||
if(range->range_send_length == 0) {
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
@ -1060,7 +1060,7 @@ cannot_pack:
|
||||
range->range_btls[btl_idx].length -= size;
|
||||
range->range_send_length -= size;
|
||||
range->range_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1);
|
||||
OPAL_THREAD_ADD32(&sendreq->req_pipeline_depth, 1);
|
||||
if(range->range_send_length == 0) {
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
|
@ -45,11 +45,11 @@ struct mca_pml_ob1_send_request_t {
|
||||
mca_pml_base_send_request_t req_send;
|
||||
mca_bml_base_endpoint_t* req_endpoint;
|
||||
opal_ptr_t req_recv;
|
||||
int32_t req_state;
|
||||
int32_t req_lock;
|
||||
bool req_throttle_sends;
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_delivered;
|
||||
int32_t req_state;
|
||||
int32_t req_lock;
|
||||
bool req_throttle_sends;
|
||||
int32_t req_pipeline_depth;
|
||||
size_t req_bytes_delivered;
|
||||
uint32_t req_rdma_cnt;
|
||||
mca_pml_ob1_send_pending_t req_pending;
|
||||
opal_mutex_t req_send_range_lock;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user