1
1

Cleanup the utilities functions in tuned.

This commit was SVN r31987.
Этот коммит содержится в:
George Bosilca 2014-06-13 16:04:45 +00:00
родитель 0b856316f8
Коммит 542e4996a7
3 изменённых файлов: 90 добавлений и 234 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -49,6 +49,61 @@ static mca_base_var_enum_value_t barrier_algorithms[] = {
{0, NULL}
};
/**
* A quick version of the MPI_Sendreceive implemented for the barrier.
* No actual data is moved across the wire, we use 0-byte messages to
* signal a two peer synchronization.
*/
static inline int
ompi_coll_tuned_sendrecv_zero(int dest, int stag,
int source, int rtag,
MPI_Comm comm)
{
int err, line = 0;
ompi_request_t* reqs[2];
ompi_status_public_t statuses[2];
/* post new irecv */
err = MCA_PML_CALL(irecv( NULL, 0, MPI_BYTE, source, rtag,
comm, &reqs[0]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
/* send data to children */
err = MCA_PML_CALL(isend( NULL, 0, MPI_BYTE, dest, stag,
MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
err = ompi_request_wait_all( 2, reqs, statuses );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
return (MPI_SUCCESS);
error_handler:
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
* code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
if( MPI_ERR_IN_STATUS == err ) {
/* At least we know the error was detected during the wait_all */
int err_index = 1;
if( MPI_SUCCESS == statuses[0].MPI_ERROR ) {
err_index = 0;
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
" stage of ompi_coll_tuned_sendrecv_zero\n",
__FILE__, line, err, (0 == err_index ? "receive" : "send")));
} else {
/* Error discovered during the posting of the irecv or isend,
* and no status is available.
*/
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
__FILE__, line, err));
}
return err;
}
/*
* Barrier is ment to be a synchronous operation, as some BTLs can mark
* a request done before its passed to the NIC and progress might not be made
@ -157,11 +212,9 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
if (rank >= adjsize) {
/* send message to lower ranked node */
remote = rank - adjsize;
err = ompi_coll_tuned_sendrecv_actual(NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
remote, MCA_COLL_BASE_TAG_BARRIER,
comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
} else if (rank < (size - adjsize)) {
@ -184,11 +237,9 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
if (remote >= adjsize) continue;
/* post receive from the remote node */
err = ompi_coll_tuned_sendrecv_actual(NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
remote, MCA_COLL_BASE_TAG_BARRIER,
comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
}
}
@ -235,11 +286,9 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
to = (rank + distance) % size;
/* send message to lower ranked node */
err = ompi_coll_tuned_sendrecv_actual(NULL, 0, MPI_BYTE, to,
MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, from,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
err = ompi_coll_tuned_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
from, MCA_COLL_BASE_TAG_BARRIER,
comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
}
@ -266,11 +315,9 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
"ompi_coll_tuned_barrier_intra_two_procs rank %d", remote));
remote = (remote + 1) & 0x1;
err = ompi_coll_tuned_sendrecv_actual(NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, remote,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
remote, MCA_COLL_BASE_TAG_BARRIER,
comm);
return (err);
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -29,69 +29,6 @@
#include "ompi/mca/pml/pml.h"
#include "coll_tuned_util.h"
int ompi_coll_tuned_sendrecv_actual( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
void* recvbuf, size_t rcount,
ompi_datatype_t* rdatatype,
int source, int rtag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status )
{ /* post receive first, then send, then waitall... should be fast (I hope) */
int err, line = 0;
ompi_request_t* reqs[2];
ompi_status_public_t statuses[2];
/* post new irecv */
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
comm, &reqs[0]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
/* send data to children */
err = MCA_PML_CALL(isend( sendbuf, scount, sdatatype, dest, stag,
MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
err = ompi_request_wait_all( 2, reqs, statuses );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
if (MPI_STATUS_IGNORE != status) {
*status = statuses[0];
}
return (MPI_SUCCESS);
error_handler:
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
* code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
if( MPI_ERR_IN_STATUS == err ) {
/* At least we know the error was detected during the wait_all */
int err_index = 0;
if( MPI_SUCCESS == statuses[0].MPI_ERROR ) {
err_index = 1;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
__FILE__, line, err, err_index));
} else {
/* Error discovered during the posting of the irecv or isend,
* and no status is available.
*/
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
__FILE__, line, err));
if (MPI_STATUS_IGNORE != status) {
status->MPI_ERROR = err;
}
}
return (err);
}
int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
@ -133,10 +70,10 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
*status = statuses[0];
}
} else {
/* FIXME this is currently unsupported but unused */
assert (MPI_STATUS_IGNORE == status);
if( MPI_STATUS_IGNORE != status )
*status = ompi_status_empty;
}
return (MPI_SUCCESS);
error_handler:
@ -146,16 +83,17 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
*/
if( MPI_ERR_IN_STATUS == err ) {
/* At least we know the error was detected during the wait_all */
int err_index = 0;
int err_index = 1;
if( MPI_SUCCESS == statuses[0].MPI_ERROR ) {
err_index = 1;
err_index = 0;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
__FILE__, line, err, err_index));
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
" stage of ompi_coll_tuned_sendrecv_zero\n",
__FILE__, line, err, (0 == err_index ? "receive" : "send")));
} else {
/* Error discovered during the posting of the irecv or isend,
* and no status is available.
@ -168,73 +106,4 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
}
return (err);
}
/*
* localcompleted version that makes sure the send has completed locally
* Currently this is a sync call, but will change to locally completed
* version when available
*/
int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
void* recvbuf, size_t rcount,
ompi_datatype_t* rdatatype,
int source, int rtag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status )
{ /* post receive first, then [local] sync send, then wait... should be fast (I hope) */
int err, line = 0;
ompi_request_t* req[2];
ompi_status_public_t statuses[2];
/* post new irecv */
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
comm, &(req[0])));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
/* send data to children */
err = MCA_PML_CALL(isend( sendbuf, scount, sdatatype, dest, stag,
MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1])));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
err = ompi_request_wait_all( 2, req, statuses );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
if (MPI_STATUS_IGNORE != status) {
*status = statuses[0];
}
return (MPI_SUCCESS);
error_handler:
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
* code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
if( MPI_ERR_IN_STATUS == err ) {
/* At least we know the error was detected during the wait_all */
int err_index = 0;
if( MPI_SUCCESS == statuses[0].MPI_ERROR ) {
err_index = 1;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
__FILE__,line,err, err_index));
} else {
/* Error discovered during the posting of the irecv or isend,
* and no status is available.
*/
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
__FILE__, line, err));
if (MPI_STATUS_IGNORE != status) {
status->MPI_ERROR = err;
}
}
return (err);
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@ -31,16 +31,11 @@
BEGIN_C_DECLS
/* prototypes */
int ompi_coll_tuned_sendrecv_actual( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
void* recvbuf, size_t rcount,
ompi_datatype_t* rdatatype,
int source, int rtag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
/**
* A MPI_like function doing a send and a receive simultaneously.
* If one of the communications results in a zero-byte message the
* communication is ignored, and no message will cross to the peer.
*/
int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
@ -51,8 +46,12 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
ompi_status_public_t* status );
/* inline functions */
/**
* Similar to the function above this implementation of send-receive
* do not generate communications for zero-bytes messages. Thus, it is
* improper to use in the context of some algorithms for collective
* communications.
*/
static inline int
ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
int dest, int stag,
@ -61,7 +60,7 @@ ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdataty
struct ompi_communicator_t* comm,
ompi_status_public_t* status, int myid )
{
if ((dest == myid) && (source == myid)) {
if ((dest == source) && (source == myid)) {
return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype,
recvbuf, (int32_t) rcount, rdatatype);
}
@ -71,65 +70,6 @@ ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdataty
source, rtag, comm, status);
}
int
ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
void* recvbuf, size_t rcount,
ompi_datatype_t* rdatatype,
int source, int rtag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
/* inline functions */
static inline int
ompi_coll_tuned_sendrecv_localcompleted( void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
void* recvbuf, size_t rcount,
ompi_datatype_t* rdatatype,
int source, int rtag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status, int myid )
{
if ((dest == myid) && (source == myid)) {
return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype,
recvbuf, (int32_t) rcount, rdatatype);
}
return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount,
sdatatype, dest,
stag,
recvbuf, rcount,
rdatatype,
source, rtag, comm,
status);
}
/* inline functions */
static inline int
ompi_coll_tuned_isendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdtype,
int dest, int stag, ompi_request_t** sreq,
void* recvbuf, size_t rcount, ompi_datatype_t* rdtype,
int source, int rtag, ompi_request_t** rreq,
struct ompi_communicator_t* comm ) {
int ret, line;
ret = MCA_PML_CALL(irecv(recvbuf, rcount, rdtype, source, rtag, comm, rreq));
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_handler; }
ret = MCA_PML_CALL(isend(sendbuf, scount, sdtype, dest, stag,
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_handler; }
return MPI_SUCCESS;
error_handler:
OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%d\tError occurred %d\n",
__FILE__, line, ret));
return ret;
}
END_C_DECLS
#endif /* MCA_COLL_TUNED_UTIL_EXPORT_H */