1
1

Fix deadlock in OB1 protocol by by sending memory by copying if registration

fails.

This commit was SVN r14842.
Этот коммит содержится в:
Gleb Natapov 2007-06-03 08:31:58 +00:00
родитель a25e1e7b15
Коммит 10266fb467
8 изменённых файлов: 47 добавлений и 19 удалений

Просмотреть файл

@ -253,7 +253,8 @@ int mca_pml_ob1_send_fin(
ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl,
void *hdr_des,
uint8_t order
uint8_t order,
uint32_t status
)
{
mca_btl_base_descriptor_t* fin;
@ -262,7 +263,7 @@ int mca_pml_ob1_send_fin(
MCA_PML_OB1_DES_ALLOC(bml_btl, fin, order, sizeof(mca_pml_ob1_fin_hdr_t));
if(NULL == fin) {
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order);
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
return OMPI_ERR_OUT_OF_RESOURCE;
}
fin->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
@ -274,6 +275,7 @@ int mca_pml_ob1_send_fin(
hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
hdr->hdr_des.pval = hdr_des;
hdr->hdr_fail = status;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#ifdef WORDS_BIGENDIAN
@ -296,12 +298,11 @@ int mca_pml_ob1_send_fin(
MCA_BTL_TAG_PML
);
if(OMPI_SUCCESS != rc) {
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order);
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, fin);
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
return OMPI_ERR_OUT_OF_RESOURCE;
}
return OMPI_SUCCESS;
}
@ -352,7 +353,8 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
case MCA_PML_OB1_HDR_TYPE_FIN:
rc = mca_pml_ob1_send_fin(pckt->proc, send_dst,
pckt->hdr.hdr_fin.hdr_des.pval,
pckt->order);
pckt->order,
pckt->hdr.hdr_fin.hdr_fail);
MCA_PML_OB1_PCKT_PENDING_RETURN(pckt);
if(OMPI_ERR_OUT_OF_RESOURCE == rc)
return;
@ -378,6 +380,7 @@ void mca_pml_ob1_process_pending_rdma(void)
if(NULL == frag)
break;
if(frag->rdma_state == MCA_PML_OB1_RDMA_PUT) {
frag->retries++;
rc = mca_pml_ob1_send_request_put_frag(frag);
} else {
rc = mca_pml_ob1_recv_request_get_frag(frag);

Просмотреть файл

@ -54,6 +54,7 @@ struct mca_pml_ob1_t {
size_t eager_limit; /* maximum eager limit size - overrides btl setting */
size_t send_pipeline_depth;
size_t recv_pipeline_depth;
size_t rdma_put_retries_limit;
bool leave_pinned;
int leave_pinned_pipeline;
@ -266,7 +267,7 @@ do { \
(ompi_free_list_item_t*)pckt); \
} while(0)
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, B, O) \
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, B, O, S) \
do { \
mca_pml_ob1_pckt_pending_t *_pckt; \
int _rc; \
@ -274,6 +275,7 @@ do { \
MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt,_rc); \
_pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; \
_pckt->hdr.hdr_fin.hdr_des.pval = (D); \
_pckt->hdr.hdr_fin.hdr_fail = (S); \
_pckt->proc = (P); \
_pckt->bml_btl = (B); \
_pckt->order = (O); \
@ -285,7 +287,7 @@ do { \
int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
void *hdr_des, uint8_t order);
void *hdr_des, uint8_t order, uint32_t status);
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
* Packets are added to the queue when sending of FIN or ACK is failed due to

Просмотреть файл

@ -111,7 +111,9 @@ int mca_pml_ob1_component_open(void)
mca_pml_ob1_param_register_int("send_pipeline_depth", 3);
mca_pml_ob1.recv_pipeline_depth =
mca_pml_ob1_param_register_int("recv_pipeline_depth", 4);
mca_pml_ob1.rdma_put_retries_limit =
mca_pml_ob1_param_register_int("rdma_put_retries_limit", 5);
mca_pml_ob1.unexpected_limit =
mca_pml_ob1_param_register_int("unexpected_limit", 128);

Просмотреть файл

@ -219,6 +219,7 @@ struct mca_pml_ob1_fin_hdr_t {
uint8_t hdr_padding[6];
#endif
ompi_ptr_t hdr_des; /**< completed descriptor */
uint32_t hdr_fail; /**< RDMA operation failed */
};
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;

Просмотреть файл

@ -45,6 +45,7 @@ struct mca_pml_ob1_rdma_frag_t {
struct mca_bml_base_endpoint_t* rdma_ep;
ompi_convertor_t convertor;
mca_mpool_base_registration_t* reg;
uint32_t retries;
};
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;

Просмотреть файл

@ -182,7 +182,8 @@ void mca_pml_ob1_recv_frag_callback(
}
#endif
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
rdma->des_cbfunc(btl, NULL, rdma, OMPI_SUCCESS);
rdma->des_cbfunc(btl, NULL, rdma,
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
break;
}
default:

Просмотреть файл

@ -157,8 +157,10 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
size_t bytes_received = 0;
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
if(status == OMPI_SUCCESS) {
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
}
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
mca_bml_base_free(bml_btl, des);
@ -308,7 +310,7 @@ static void mca_pml_ob1_rget_completion(
mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc,
bml_btl,
frag->rdma_hdr.hdr_rget.hdr_des.pval,
des->order);
des->order, 0);
/* is receive request complete */
if( OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length)

Просмотреть файл

@ -1074,7 +1074,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
bml_btl,
frag->rdma_hdr.hdr_rdma.hdr_des.pval,
des->order);
des->order, 0);
/* check for request completion */
if( OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length)
@ -1115,12 +1115,27 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
&des );
if(NULL == des) {
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
frag->rdma_length = save_size;
ompi_convertor_set_position(&frag->convertor, &offset);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
if(frag->retries < mca_pml_ob1.rdma_put_retries_limit) {
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
frag->rdma_length = save_size;
ompi_convertor_set_position(&frag->convertor, &offset);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
} else {
mca_pml_ob1_send_request_t *sendreq =
(mca_pml_ob1_send_request_t*)frag->rdma_req;
/* tell receiver to unregister memory */
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
frag->rdma_hdr.hdr_rdma.hdr_des.pval, bml_btl,
MCA_BTL_NO_ORDER, 1);
/* send fragment by copy in/out */
mca_pml_ob1_send_requst_copy_in_out(sendreq,
frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length);
mca_pml_ob1_send_request_schedule(sendreq);
}
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -1194,6 +1209,7 @@ void mca_pml_ob1_send_request_put(
frag->rdma_length = size;
frag->rdma_state = MCA_PML_OB1_RDMA_PUT;
frag->reg = NULL;
frag->retries = 0;
/* lookup the corresponding registration */
for(i=0; i<sendreq->req_rdma_cnt; i++) {