Implement the MPI_Iprobe and MPI_Probe wrappers.
Remove some old, unused code. This commit was SVN r16178.
Этот коммит содержится в:
родитель
8bdd14ba40
Коммит
3e51d7bb25
@ -185,6 +185,9 @@
|
|||||||
/************************************
|
/************************************
|
||||||
* Locally Global vars
|
* Locally Global vars
|
||||||
************************************/
|
************************************/
|
||||||
|
#define PROBE_ANY_SIZE ((size_t) 0)
|
||||||
|
#define PROBE_ANY_COUNT ((size_t) 0)
|
||||||
|
|
||||||
/* Pointers to the 'real' PML */
|
/* Pointers to the 'real' PML */
|
||||||
static mca_pml_base_component_t *wrapped_pml_component = NULL;
|
static mca_pml_base_component_t *wrapped_pml_component = NULL;
|
||||||
static mca_pml_base_module_t *wrapped_pml_module = NULL;
|
static mca_pml_base_module_t *wrapped_pml_module = NULL;
|
||||||
@ -872,9 +875,66 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_iprobe(
|
|||||||
ompi_status_public_t* status,
|
ompi_status_public_t* status,
|
||||||
ompi_crcp_base_pml_state_t* pml_state )
|
ompi_crcp_base_pml_state_t* pml_state )
|
||||||
{
|
{
|
||||||
|
ompi_crcp_coord_pml_message_ref_t *drain_msg_ref = NULL;
|
||||||
|
int exit_status = OMPI_SUCCESS;
|
||||||
|
int ret;
|
||||||
|
|
||||||
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
||||||
"crcp:coord: pml_iprobe()");
|
"crcp:coord: pml_iprobe(%d, %d)", dst, tag);
|
||||||
pml_state->error_code = OMPI_SUCCESS;
|
|
||||||
|
/*
|
||||||
|
* Before PML Call
|
||||||
|
* - Determine if this can be satisfied from the drained list
|
||||||
|
* - Otherwise let the PML handle it
|
||||||
|
*/
|
||||||
|
if( OMPI_CRCP_PML_PRE == pml_state->state) {
|
||||||
|
/*
|
||||||
|
* Check to see if this message is in the drained message list
|
||||||
|
*/
|
||||||
|
if( OMPI_SUCCESS != (ret = find_drained_msg(PROBE_ANY_SIZE, PROBE_ANY_COUNT,
|
||||||
|
tag, dst,
|
||||||
|
&drain_msg_ref) ) ) {
|
||||||
|
opal_output(mca_crcp_coord_component.super.output_handle,
|
||||||
|
"crcp:coord: pml_iprobe(): Failed trying to find a drained message."
|
||||||
|
" This should never happen. (%d)",
|
||||||
|
ret);
|
||||||
|
exit_status = ret;
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the message is a drained message
|
||||||
|
* - Copy of the status structure to pass back to the user
|
||||||
|
* - Mark the 'matched' flag as true
|
||||||
|
*/
|
||||||
|
if( NULL != drain_msg_ref ) {
|
||||||
|
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||||
|
"crcp:coord: pml_iprobe(): Matched a drained message...");
|
||||||
|
|
||||||
|
/* Copy the status information */
|
||||||
|
if( MPI_STATUS_IGNORE != status ) {
|
||||||
|
memcpy(status, &drain_msg_ref->status, sizeof(ompi_status_public_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mark as complete */
|
||||||
|
*matched = 1;
|
||||||
|
|
||||||
|
/* This will identify to the wrapper that this message is complete */
|
||||||
|
pml_state->state = OMPI_CRCP_PML_DONE;
|
||||||
|
pml_state->error_code = OMPI_SUCCESS;
|
||||||
|
return pml_state;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Otherwise the message is not drained (common case), so let the PML deal with it
|
||||||
|
*/
|
||||||
|
else {
|
||||||
|
/* Mark as not complete */
|
||||||
|
*matched = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
pml_state->error_code = exit_status;
|
||||||
return pml_state;
|
return pml_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -884,9 +944,55 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_probe(
|
|||||||
ompi_status_public_t* status,
|
ompi_status_public_t* status,
|
||||||
ompi_crcp_base_pml_state_t* pml_state )
|
ompi_crcp_base_pml_state_t* pml_state )
|
||||||
{
|
{
|
||||||
|
ompi_crcp_coord_pml_message_ref_t *drain_msg_ref = NULL;
|
||||||
|
int exit_status = OMPI_SUCCESS;
|
||||||
|
int ret;
|
||||||
|
|
||||||
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
||||||
"crcp:coord: pml_probe()");
|
"crcp:coord: pml_probe(%d, %d)", dst, tag);
|
||||||
pml_state->error_code = OMPI_SUCCESS;
|
|
||||||
|
/*
|
||||||
|
* Before PML Call
|
||||||
|
* - Determine if this can be satisfied from the drained list
|
||||||
|
* - Otherwise let the PML handle it
|
||||||
|
*/
|
||||||
|
if( OMPI_CRCP_PML_PRE == pml_state->state) {
|
||||||
|
/*
|
||||||
|
* Check to see if this message is in the drained message list
|
||||||
|
*/
|
||||||
|
if( OMPI_SUCCESS != (ret = find_drained_msg(PROBE_ANY_SIZE, PROBE_ANY_COUNT,
|
||||||
|
tag, dst,
|
||||||
|
&drain_msg_ref) ) ) {
|
||||||
|
opal_output(mca_crcp_coord_component.super.output_handle,
|
||||||
|
"crcp:coord: pml_probe(): Failed trying to find a drained message."
|
||||||
|
" This should never happen. (%d)",
|
||||||
|
ret);
|
||||||
|
exit_status = ret;
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the message is a drained message
|
||||||
|
* - Copy of the status structure to pass back to the user
|
||||||
|
*/
|
||||||
|
if( NULL != drain_msg_ref ) {
|
||||||
|
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||||
|
"crcp:coord: pml_iprobe(): Matched a drained message...");
|
||||||
|
|
||||||
|
/* Copy the status information */
|
||||||
|
if( MPI_STATUS_IGNORE != status ) {
|
||||||
|
memcpy(status, &drain_msg_ref->status, sizeof(ompi_status_public_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This will identify to the wrapper that this message is complete */
|
||||||
|
pml_state->state = OMPI_CRCP_PML_DONE;
|
||||||
|
pml_state->error_code = OMPI_SUCCESS;
|
||||||
|
return pml_state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
pml_state->error_code = exit_status;
|
||||||
return pml_state;
|
return pml_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1592,224 +1698,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv(
|
|||||||
return pml_state;
|
return pml_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* JJH Alternative - RECV -> IRECV/WAIT -- Testing... */
|
|
||||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv(
|
|
||||||
void *buf, size_t count,
|
|
||||||
ompi_datatype_t *datatype,
|
|
||||||
int src, int tag,
|
|
||||||
struct ompi_communicator_t* comm,
|
|
||||||
ompi_status_public_t* status,
|
|
||||||
ompi_crcp_base_pml_state_t* pml_state)
|
|
||||||
{
|
|
||||||
ompi_crcp_coord_pml_peer_ref_t *peer_ref = NULL;
|
|
||||||
ompi_crcp_coord_pml_message_ref_t *msg_ref = NULL;
|
|
||||||
ompi_crcp_coord_pml_message_ref_t *drain_msg_ref = NULL;
|
|
||||||
int exit_status = OMPI_SUCCESS;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: pml_recv()");
|
|
||||||
|
|
||||||
if( OMPI_CRCP_PML_PRE != pml_state->state) {
|
|
||||||
pml_state->error_code = OMPI_SUCCESS;
|
|
||||||
return pml_state;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* - Determine if this can be satisfied from the drained list
|
|
||||||
* - Otherwise create a new reference to it so we can track it
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* Check to see if this message is in the drained message list
|
|
||||||
*/
|
|
||||||
if( OMPI_SUCCESS != (ret = find_drained_msg(datatype->size,
|
|
||||||
count, tag, src,
|
|
||||||
&drain_msg_ref) ) ) {
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: pml_recv(): Failed trying to find a drained message."
|
|
||||||
" This should never happen. (%d)",
|
|
||||||
ret);
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the message is a drained message
|
|
||||||
* - Complete it right now
|
|
||||||
* - We do not need to increment any counters here since we already have
|
|
||||||
* when we originally drained the message.
|
|
||||||
*/
|
|
||||||
if( NULL != drain_msg_ref ) {
|
|
||||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: pml_recv(): Matched a drained message...");
|
|
||||||
|
|
||||||
/* Copy the drained message */
|
|
||||||
src = drain_msg_ref->rank;
|
|
||||||
tag = drain_msg_ref->tag;
|
|
||||||
if( 0 != ompi_ddt_copy_content_same_ddt(datatype, count,
|
|
||||||
buf, drain_msg_ref->buffer) ) {
|
|
||||||
opal_output( mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: pml_recv(): Datatype copy failed (%d)",
|
|
||||||
ret);
|
|
||||||
}
|
|
||||||
if( MPI_STATUS_IGNORE != status ) {
|
|
||||||
memcpy(status, &drain_msg_ref->status, sizeof(ompi_status_public_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remove the message from the list */
|
|
||||||
opal_list_remove_item(&drained_msg_list, &(drain_msg_ref->super));
|
|
||||||
drain_msg_ref->request = NULL;
|
|
||||||
OBJ_RELEASE(drain_msg_ref);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the peer for this source
|
|
||||||
*/
|
|
||||||
if( NULL == peer_ref ) {
|
|
||||||
if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to find peer_ref\n");
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
if( NULL == peer_ref ) {
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to find peer_ref - peer_ref is NULL\n");
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
peer_ref->total_drained_msgs -= 1;
|
|
||||||
/* Do *not* increment:
|
|
||||||
* peer_ref->total_recv_msgs += 1;
|
|
||||||
* Because we accounted for this message during the last checkpoint.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Otherwise the message is not drained (common case)
|
|
||||||
* Magically convert RECV to IRECV+WAIT
|
|
||||||
*/
|
|
||||||
else {
|
|
||||||
ompi_request_t *request;
|
|
||||||
|
|
||||||
request = (ompi_request_t *)malloc(sizeof(ompi_request_t));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Post the IRECV directly to the PML
|
|
||||||
*/
|
|
||||||
if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(buf, count,
|
|
||||||
datatype,
|
|
||||||
src, tag,
|
|
||||||
comm,
|
|
||||||
&request))) {
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to post PML Irecv\n");
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create a new Message Object
|
|
||||||
*/
|
|
||||||
CREATE_NEW_MSG(msg_ref, COORD_MSG_TYPE_B_RECV,
|
|
||||||
buf,
|
|
||||||
count, datatype, tag, src,
|
|
||||||
comm, request,
|
|
||||||
ORTE_JOBID_INVALID,
|
|
||||||
ORTE_VPID_INVALID);
|
|
||||||
|
|
||||||
msg_ref->matched = false;
|
|
||||||
msg_ref->done = false;
|
|
||||||
msg_ref->active = true;
|
|
||||||
msg_ref->already_posted = true;
|
|
||||||
|
|
||||||
current_msg_id = msg_ref->msg_id;
|
|
||||||
current_msg_type = COORD_MSG_TYPE_B_RECV;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the Peer
|
|
||||||
*/
|
|
||||||
if( MPI_ANY_SOURCE == src || src < 0) {
|
|
||||||
opal_list_append(&(unknown_recv_from_list), &(msg_ref->super));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to find peer_ref\n");
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
if( NULL == peer_ref ) {
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to find peer_ref - peer_ref is NULL\n");
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
msg_ref->proc_name.jobid = peer_ref->proc_name.jobid;
|
|
||||||
msg_ref->proc_name.vpid = peer_ref->proc_name.vpid;
|
|
||||||
|
|
||||||
opal_list_append(&(peer_ref->recv_list), &(msg_ref->super));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for the irecv to complete
|
|
||||||
*/
|
|
||||||
coord_request_wait(request, status);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If MPI_ANY_SOUCE, then move the message from the unknown list
|
|
||||||
* to the list associated with the resolved process.
|
|
||||||
*/
|
|
||||||
if( NULL == peer_ref ) {
|
|
||||||
src = status->MPI_SOURCE;
|
|
||||||
|
|
||||||
if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to resolve peer_ref (rank %d)\n",
|
|
||||||
src);
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
if( NULL == peer_ref ) {
|
|
||||||
opal_output(mca_crcp_coord_component.super.output_handle,
|
|
||||||
"crcp:coord: recv: Failed to resolve peer_ref (rank %d) - peer_ref is NULL\n",
|
|
||||||
src);
|
|
||||||
exit_status = ret;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
msg_ref->proc_name.jobid = peer_ref->proc_name.jobid;
|
|
||||||
msg_ref->proc_name.vpid = peer_ref->proc_name.vpid;
|
|
||||||
|
|
||||||
opal_list_remove_item(&(unknown_recv_from_list), &(msg_ref->super));
|
|
||||||
opal_list_append(&(peer_ref->recv_list), &(msg_ref->super));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Do the update
|
|
||||||
* - some Booleans already set...
|
|
||||||
* msg_ref->matched = false;
|
|
||||||
* msg_ref->already_posted = true;
|
|
||||||
*/
|
|
||||||
msg_ref->done = true;
|
|
||||||
msg_ref->active = false;
|
|
||||||
|
|
||||||
peer_ref->total_recv_msgs += 1;
|
|
||||||
current_msg_id = 0;
|
|
||||||
current_msg_type = COORD_MSG_TYPE_UNKNOWN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This will identify to the wrapper that this message is complete */
|
|
||||||
pml_state->state = OMPI_CRCP_PML_DONE;
|
|
||||||
|
|
||||||
DONE:
|
|
||||||
pml_state->error_code = exit_status;
|
|
||||||
return pml_state;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* JJH Alternative - RECV -> IRECV/WAIT */
|
|
||||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv(
|
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv(
|
||||||
void *buf, size_t count,
|
void *buf, size_t count,
|
||||||
ompi_datatype_t *datatype,
|
ompi_datatype_t *datatype,
|
||||||
@ -2027,8 +1915,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv(
|
|||||||
pml_state->error_code = exit_status;
|
pml_state->error_code = exit_status;
|
||||||
return pml_state;
|
return pml_state;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
/* JJH End Alternative - RECV -> IRECV/WAIT */
|
|
||||||
|
|
||||||
|
|
||||||
/**************** Start *****************/
|
/**************** Start *****************/
|
||||||
@ -3009,10 +2895,14 @@ static int find_drained_msg(size_t ddt_size,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check the datatype size and count to make sure it matches */
|
/* Check the datatype size, if specified for a match */
|
||||||
if((drain_msg->count ) != count ||
|
if( ddt_size != PROBE_ANY_SIZE &&
|
||||||
(drain_msg->ddt_size) != ddt_size) {
|
count != PROBE_ANY_COUNT) {
|
||||||
continue;
|
/* Check the datatype size and count to make sure it matches */
|
||||||
|
if((drain_msg->count ) != count ||
|
||||||
|
(drain_msg->ddt_size) != ddt_size) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we get here then the message matches */
|
/* If we get here then the message matches */
|
||||||
|
@ -278,6 +278,10 @@ int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
||||||
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_iprobe(dst, tag, comm, matched, status) ) ) {
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_iprobe(dst, tag, comm, matched, status) ) ) {
|
||||||
PML_CRCP_STATE_RETURN(pml_state);
|
PML_CRCP_STATE_RETURN(pml_state);
|
||||||
@ -293,6 +297,7 @@ int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CLEANUP:
|
||||||
PML_CRCP_STATE_RETURN(pml_state);
|
PML_CRCP_STATE_RETURN(pml_state);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -316,6 +321,10 @@ int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, omp
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
||||||
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_probe(dst, tag, comm, status) ) ) {
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_probe(dst, tag, comm, status) ) ) {
|
||||||
PML_CRCP_STATE_RETURN(pml_state);
|
PML_CRCP_STATE_RETURN(pml_state);
|
||||||
@ -331,6 +340,7 @@ int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, omp
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CLEANUP:
|
||||||
PML_CRCP_STATE_RETURN(pml_state);
|
PML_CRCP_STATE_RETURN(pml_state);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user