1
1

Corrections to pml_dr, now passes intel test suite (p2p_c).

Note, the checksums are not enabled currently, setting to zero as the
convertor is not ready for checksums yet. 

Also, we can't call unpack/pack on convertor with 0 bytes, otherwise it
crashes. 

This commit was SVN r9062.
Этот коммит содержится в:
Galen Shipman 2006-02-16 16:15:16 +00:00
родитель 670cefa1d0
Коммит 0bc3cbf0db
7 изменённых файлов: 140 добавлений и 113 удалений

Просмотреть файл

@ -720,6 +720,7 @@ void mca_pml_dr_recv_frag_ack(mca_pml_dr_recv_frag_t* frag)
ack->hdr_vmask = 1;
ack->hdr_vid = frag->hdr.hdr_match.hdr_vid;
ack->hdr_src_req = frag->hdr.hdr_match.hdr_src_req;
assert(ack->hdr_src_req.pval);
ack->hdr_dst_req.pval = NULL;
/* initialize descriptor */

Просмотреть файл

@ -166,11 +166,12 @@ static void mca_pml_dr_recv_request_matched(
/* fill out header */
ack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
ack->hdr_common.hdr_type = type;
ack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_ACK;
ack->hdr_common.hdr_flags = MCA_PML_DR_HDR_FLAGS_MATCH;
ack->hdr_vid = hdr->hdr_match.hdr_vid;
ack->hdr_vmask = 0x1;
ack->hdr_src_req = hdr->hdr_match.hdr_src_req;
assert(ack->hdr_src_req.pval);
ack->hdr_dst_req.pval = recvreq;
/* initialize descriptor */
@ -224,6 +225,7 @@ static void mca_pml_dr_recv_request_nack(
nack->hdr_vid = hdr->hdr_vid;
nack->hdr_vmask = 1 << hdr->hdr_frag_idx;
nack->hdr_src_req = hdr->hdr_src_req;
assert(nack->hdr_src_req.pval);
nack->hdr_dst_req.pval = recvreq;
/* initialize descriptor */
@ -263,6 +265,7 @@ static void mca_pml_dr_recv_request_vfrag_ack(
/* fill out header */
ack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
ack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_ACK;
ack->hdr_common.hdr_flags = 0;
ack->hdr_vid = vfrag->vf_id;
@ -336,7 +339,9 @@ void mca_pml_dr_recv_request_progress(
bytes_delivered,
csum);
mca_pml_dr_recv_request_matched(recvreq, &hdr->hdr_rndv,
(csum == hdr->hdr_match.hdr_csum) ? MCA_PML_DR_HDR_TYPE_ACK : MCA_PML_DR_HDR_TYPE_NACK);
MCA_PML_DR_HDR_TYPE_ACK);
/* mca_pml_dr_recv_request_matched(recvreq, &hdr->hdr_rndv, */
/* (csum == hdr->hdr_match.hdr_csum) ? MCA_PML_DR_HDR_TYPE_ACK : MCA_PML_DR_HDR_TYPE_NACK); */
break;
case MCA_PML_DR_HDR_TYPE_FRAG:
@ -354,7 +359,8 @@ void mca_pml_dr_recv_request_progress(
csum);
/* if checksum fails - immediately nack this fragment */
if(csum != hdr->hdr_frag.hdr_frag_csum) {
/* if(csum != hdr->hdr_frag.hdr_frag_csum) { */
if(0) {
bytes_received = bytes_delivered = 0;
mca_pml_dr_recv_request_nack(recvreq, &hdr->hdr_frag);
} else {

Просмотреть файл

@ -248,6 +248,7 @@ do {
} else { \
bytes_delivered = 0; \
} \
csum = request->req_recv.req_convertor.checksum; \
} while (0)

Просмотреть файл

@ -42,7 +42,7 @@ static int mca_pml_dr_send_request_fini(struct ompi_request_t** request)
MCA_PML_DR_FREE(request);
} else {
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_INACTIVE;
/* rewind convertor */
/* rewind convertor */
if(sendreq->req_send.req_bytes_packed) {
size_t offset = 0;
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
@ -73,6 +73,12 @@ static int mca_pml_dr_send_request_cancel(struct ompi_request_t* request, int co
static void mca_pml_dr_send_request_construct(mca_pml_dr_send_request_t* req)
{
OBJ_CONSTRUCT(&req->req_vfrag0, mca_pml_dr_vfrag_t);
OBJ_CONSTRUCT(&req->req_pending, opal_list_t);
OBJ_CONSTRUCT(&req->req_retrans, opal_list_t);
OBJ_CONSTRUCT(&req->req_mutex, opal_mutex_t);
req->req_vfrag0.vf_len = 1;
req->req_vfrag0.vf_idx = 1;
req->req_vfrag0.vf_mask = 1;
@ -81,10 +87,6 @@ static void mca_pml_dr_send_request_construct(mca_pml_dr_send_request_t* req)
req->req_send.req_base.req_ompi.req_free = mca_pml_dr_send_request_free;
req->req_send.req_base.req_ompi.req_cancel = mca_pml_dr_send_request_cancel;
OBJ_CONSTRUCT(&req->req_vfrag0, mca_pml_dr_vfrag_t);
OBJ_CONSTRUCT(&req->req_pending, opal_list_t);
OBJ_CONSTRUCT(&req->req_retrans, opal_list_t);
OBJ_CONSTRUCT(&req->req_mutex, opal_mutex_t);
}
static void mca_pml_dr_send_request_destruct(mca_pml_dr_send_request_t* req)
@ -385,16 +387,18 @@ int mca_pml_dr_send_request_start_copy(
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
&free_after)) < 0) {
if(size > 0) {
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
&free_after)) < 0) {
mca_bml_base_free(bml_btl, descriptor);
return rc;
}
}
/* build match header */
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0;
@ -405,6 +409,7 @@ int mca_pml_dr_send_request_start_copy(
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_match.hdr_src_req.pval = sendreq;
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
/* update lengths */
@ -468,6 +473,7 @@ int mca_pml_dr_send_request_start_prepare(
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_match.hdr_src_req.pval = sendreq;
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
/* short message */
@ -580,10 +586,12 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send);
mca_pml_dr_vfrag_t* vfrag = sendreq->req_vfrag;
size_t size = bytes_remaining;
/* offset tells us how much of the vfrag has been scheduled */
size_t offset = sendreq->req_send_offset - vfrag->vf_offset;
int rc;
/* do we need to allocate a new vfrag */
/* do we need to allocate a new vfrag
(we scheduled all the vfrag already) */
if(vfrag->vf_size == offset) {
MCA_PML_DR_VFRAG_ALLOC(vfrag,rc);
if(NULL == vfrag) {
@ -632,7 +640,7 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
hdr->hdr_vid = vfrag->vf_id;
hdr->hdr_vlen = vfrag->vf_len;
hdr->hdr_frag_idx = vfrag->vf_idx;
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_frag_offset = sendreq->req_send_offset;
hdr->hdr_src_req.pval = sendreq;
hdr->hdr_dst_req = sendreq->req_vfrag0.vf_recv;
@ -716,7 +724,7 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
hdr->hdr_vid = vfrag->vf_id;
hdr->hdr_vlen = vfrag->vf_len;
hdr->hdr_frag_idx = vfrag->vf_idx;
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
hdr->hdr_frag_offset = sendreq->req_send_offset;
hdr->hdr_src_req.pval = sendreq;
hdr->hdr_dst_req = sendreq->req_vfrag0.vf_recv;
@ -769,6 +777,7 @@ void mca_pml_dr_send_request_acked(
mca_pml_dr_send_request_t* sendreq,
mca_pml_dr_ack_hdr_t* ack)
{
assert(sendreq);
if(ack->hdr_common.hdr_flags & MCA_PML_DR_HDR_FLAGS_MATCH) {
sendreq->req_vfrag0.vf_recv = ack->hdr_dst_req;
MCA_PML_DR_SEND_REQUEST_ADVANCE(sendreq);

Просмотреть файл

@ -40,7 +40,7 @@ extern "C" {
struct mca_pml_dr_send_request_t {
mca_pml_base_send_request_t req_send;
ompi_proc_t* req_proc;
/* ompi_proc_t* req_proc; */
mca_bml_base_endpoint_t* req_endpoint;
#if OMPI_HAVE_THREAD_SUPPORT
volatile int32_t req_state;
@ -83,7 +83,7 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t);
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_WAIT(&mca_pml_dr.send_requests, item, rc); \
sendreq = (mca_pml_dr_send_request_t*)item; \
sendreq->req_proc = proc; \
sendreq->req_send.req_base.req_proc = proc; \
} \
}
@ -119,10 +119,11 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t);
do { \
mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
mca_pml_dr_comm_proc_t* proc = comm->procs + sendreq->req_send.req_base.req_peer; \
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)sendreq->req_proc->proc_pml; \
mca_bml_base_endpoint_t* endpoint = \
(mca_bml_base_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; \
mca_bml_base_btl_t* bml_btl; \
size_t size = sendreq->req_send.req_bytes_packed; \
\
size_t eager_limit; \
if(endpoint == NULL) { \
rc = OMPI_ERR_UNREACH; \
break; \
@ -146,73 +147,69 @@ do {
\
/* select a btl */ \
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
\
/* shortcut for zero byte */ \
if(size == 0 && sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) { \
mca_btl_base_descriptor_t* descriptor; \
mca_btl_base_segment_t* segment; \
mca_pml_dr_hdr_t* hdr; \
\
/* allocate a descriptor */ \
MCA_PML_DR_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_dr_match_hdr_t)); \
if(NULL == descriptor) { \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
segment = descriptor->des_src; \
\
/* build hdr */ \
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval; \
hdr->hdr_common.hdr_flags = 0; \
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_MATCH; \
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
\
/* short message */ \
descriptor->des_cbfunc = mca_pml_dr_match_completion_cache; \
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \
descriptor->des_cbdata = sendreq; \
\
/* request is complete at mpi level */ \
OPAL_THREAD_LOCK(&ompi_request_lock); \
MCA_PML_DR_SEND_REQUEST_MPI_COMPLETE(sendreq); \
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
\
/* send */ \
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \
if(OMPI_SUCCESS != rc) { \
mca_bml_base_free(bml_btl, descriptor ); \
} \
\
} else { \
size_t eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_dr_hdr_t); \
if(size <= eager_limit) { \
switch(sendreq->req_send.req_send_mode) { \
case MCA_PML_BASE_SEND_SYNCHRONOUS: \
rc = mca_pml_dr_send_request_start_rndv(sendreq, bml_btl, size, 0); \
break; \
case MCA_PML_BASE_SEND_BUFFERED: \
rc = mca_pml_dr_send_request_start_copy(sendreq, bml_btl, size); \
break; \
default: \
if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \
rc = mca_pml_dr_send_request_start_prepare(sendreq, bml_btl, size); \
} else { \
rc = mca_pml_dr_send_request_start_copy(sendreq, bml_btl, size); \
} \
break; \
} \
} else { \
size = eager_limit; \
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
rc = mca_pml_dr_send_request_start_buffered(sendreq, bml_btl, size); \
} else { \
rc = mca_pml_dr_send_request_start_rndv(sendreq, bml_btl, size, 0); \
} \
} \
} \
} while (0)
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_dr_hdr_t); \
if(size <= eager_limit) { \
switch(sendreq->req_send.req_send_mode) { \
case MCA_PML_BASE_SEND_SYNCHRONOUS: \
rc = mca_pml_dr_send_request_start_rndv(sendreq, bml_btl, size, 0); \
break; \
case MCA_PML_BASE_SEND_BUFFERED: \
rc = mca_pml_dr_send_request_start_copy(sendreq, bml_btl, size); \
break; \
default: \
if(size == 0) { \
mca_btl_base_descriptor_t* descriptor; \
mca_btl_base_segment_t* segment; \
mca_pml_dr_hdr_t* hdr; \
/* allocate a descriptor */ \
MCA_PML_DR_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_dr_match_hdr_t)); \
if(NULL == descriptor) { \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
segment = descriptor->des_src; \
\
/* build hdr */ \
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval; \
hdr->hdr_common.hdr_flags = 0; \
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_MATCH; \
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
hdr->hdr_match.hdr_src_req.pval = sendreq; \
\
/* short message */ \
descriptor->des_cbfunc = mca_pml_dr_match_completion_cache; \
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \
descriptor->des_cbdata = sendreq; \
\
/* request is complete at mpi level */ \
OPAL_THREAD_LOCK(&ompi_request_lock); \
MCA_PML_DR_SEND_REQUEST_MPI_COMPLETE(sendreq); \
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
\
/* send */ \
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \
if(OMPI_SUCCESS != rc) { \
mca_bml_base_free(bml_btl, descriptor ); \
} \
\
} else if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \
rc = mca_pml_dr_send_request_start_prepare(sendreq, bml_btl, size); \
} else { \
rc = mca_pml_dr_send_request_start_copy(sendreq, bml_btl, size); \
} \
break; \
} \
} else { \
size = eager_limit; \
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
rc = mca_pml_dr_send_request_start_buffered(sendreq, bml_btl, size); \
} else { \
rc = mca_pml_dr_send_request_start_rndv(sendreq, bml_btl, size, 0); \
} \
} \
} while (0)
/*

Просмотреть файл

@ -22,6 +22,16 @@
static void mca_pml_dr_vfrag_construct(mca_pml_dr_vfrag_t* vfrag)
{
vfrag->vf_send.pval = NULL;
vfrag->vf_recv.pval = NULL;
vfrag->vf_id = 0;
vfrag->vf_idx = 0;
vfrag->vf_len = 0;
vfrag->vf_offset = 0;
vfrag->vf_size = 0;
vfrag->vf_max_send_size = 0;
vfrag->vf_ack = 0;
vfrag->vf_mask = 0;
memset(&vfrag->vf_event, 0, sizeof(vfrag->vf_event));
}

Просмотреть файл

@ -37,7 +37,7 @@ int MPI_Unpack(void *inbuf, int insize, int *position,
void *outbuf, int outcount, MPI_Datatype datatype,
MPI_Comm comm)
{
int rc, freeAfter;
int rc = 1, freeAfter;
ompi_convertor_t local_convertor;
struct iovec outvec;
unsigned int iov_count;
@ -62,31 +62,34 @@ int MPI_Unpack(void *inbuf, int insize, int *position,
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME);
}
}
OBJ_CONSTRUCT( &local_convertor, ompi_convertor_t );
/* the resulting convertor will be set the the position ZERO */
ompi_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, datatype, outcount, outbuf, &local_convertor );
/* Check for truncation */
ompi_convertor_get_packed_size( &local_convertor, &size );
if( (*position + size) > (unsigned int)insize ) {
if( insize > 0 ) {
OBJ_CONSTRUCT( &local_convertor, ompi_convertor_t );
/* the resulting convertor will be set the the position ZERO */
ompi_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, datatype, outcount, outbuf, &local_convertor );
/* Check for truncation */
ompi_convertor_get_packed_size( &local_convertor, &size );
if( (*position + size) > (unsigned int)insize ) {
OBJ_DESTRUCT( &local_convertor );
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME);
}
/* Prepare the iovec with all informations */
outvec.iov_base = (char*) inbuf + (*position);
outvec.iov_len = insize - (*position);
/* Do the actual unpacking */
iov_count = 1;
rc = ompi_convertor_unpack( &local_convertor, &outvec, &iov_count,
&size, &freeAfter );
*position += size;
OBJ_DESTRUCT( &local_convertor );
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME);
/* All done. Note that the convertor returns 1 upon success, not
OMPI_SUCCESS. */
}
/* Prepare the iovec with all informations */
outvec.iov_base = (char*) inbuf + (*position);
outvec.iov_len = insize - (*position);
/* Do the actual unpacking */
iov_count = 1;
rc = ompi_convertor_unpack( &local_convertor, &outvec, &iov_count,
&size, &freeAfter );
*position += size;
OBJ_DESTRUCT( &local_convertor );
/* All done. Note that the convertor returns 1 upon success, not
OMPI_SUCCESS. */
OMPI_ERRHANDLER_RETURN((rc == 1) ? OMPI_SUCCESS : OMPI_ERROR,
comm, MPI_ERR_UNKNOWN, FUNC_NAME);
}