
This is closely related to Platform-MPI's old -prot feature. The long-format of the tables it prints could look like this: > Host 0 [myhost001] ranks 0 - 1 > Host 1 [myhost002] ranks 2 - 3 > Host 2 [myhost003] ranks 4 > Host 3 [myhost004] ranks 5 > Host 4 [myhost005] ranks 6 > Host 5 [myhost006] ranks 7 > Host 6 [myhost007] ranks 8 > Host 7 [myhost008] ranks 9 > Host 8 [myhost009] ranks 10 > > host | 0 1 2 3 4 5 6 7 8 > ======|============================================== > 0 : sm tcp tcp tcp tcp tcp tcp tcp tcp > 1 : tcp sm tcp tcp tcp tcp tcp tcp tcp > 2 : tcp tcp self tcp tcp tcp tcp tcp tcp > 3 : tcp tcp tcp self tcp tcp tcp tcp tcp > 4 : tcp tcp tcp tcp self tcp tcp tcp tcp > 5 : tcp tcp tcp tcp tcp self tcp tcp tcp > 6 : tcp tcp tcp tcp tcp tcp self tcp tcp > 7 : tcp tcp tcp tcp tcp tcp tcp self tcp > 8 : tcp tcp tcp tcp tcp tcp tcp tcp self > > Connection summary: > on-host: all connections are sm or self > off-host: all connections are tcp In this example hosts 0 and 1 had multiple ranks so "sm" was more meaningful than "self" to identify how the ranks on the host are talking to each other. While host 2..8 were one rank per host so "self" was more meaningful as their btl. Above a certain number of hosts (12 by default) the above table gets too big so we shrink to a more abbreviated looking table that has the same data: > host | 0 1 2 3 4 8 > ======|==================== > 0 : A C C C C C C C C > 1 : C A C C C C C C C > 2 : C C B C C C C C C > 3 : C C C B C C C C C > 4 : C C C C B C C C C > 5 : C C C C C B C C C > 6 : C C C C C C B C C > 7 : C C C C C C C B C > 8 : C C C C C C C C B > key: A == sm > key: B == self > key: C == tcp Then above 36 hosts we stop printing the 2d table entirely and just print the summary: > Connection summary: > on-host: all connections are sm or self > off-host: all connections are tcp The options to control it are -mca comm_method 1 : print the above table at the end of MPI_Init -mca comm_method 2 : print the above table at the beginning of MPI_Finalize -mca comm_method_max <n> : number of hosts <n> for which to print a full size 2d -mca comm_method_brief 1 : only print summary output, no 2d table -mca comm_method_fakefile <filename> : for debugging only * printing at init vs finalize: The most important difference between these two is that when printing the table during MPI_Init(), we send extra messages to make sure all hosts are connected to each other. So the table ends up working against the idea of on-demand connections (although it's only forcing the n^2 connections in the number of hosts, not the total ranks). If printing at MPI_Finalize() we don't create any connections that aren't already connected, so the table is more likely to have "n/a" entries if some hosts never connected to each other. * how many hosts <n> for which to print a full size 2d table The option -mca comm_method_max <n> can be used to specify a number of hosts <n> (default 12) that controls at what host-count the unabbreviated / abbreviated 2d tables get printed: 1 - n : full size 2d table n+1 - 3n : shortened 2d table 3n+1 - inf : summary only, no 2d table * brief The option -mca comm_method_brief 1 can be used to skip the printing of the 2d table and only show the short summary * fakefile This is a debugging option that allows easeir testing of all the printout routines by letting all the detected communication methods between the hosts be overridden by fake data from a file. The source of the information used in the table is the .mca_component_name In the case of BTLs, the module always had a .btl_component linking back to the component. The vars mca_pml_base_selected_component and ompi_mtl_base_selected_component offer similar functionality for pml/mtl. So with the ability to identify the component, we can then access the component name with code like this mca_pml_base_selected_component.pmlm_version.mca_component_name See the three lookup_{pml,mtl,btl}_name() functions in hook_comm_method_fns.c, and their use in comm_method() to parse the strings and produce an integer to represent the connection type being used. Signed-off-by: Mark Allen <markalle@us.ibm.com>
845 строки
26 KiB
C
845 строки
26 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2018 IBM Corporation. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "opal/mca/btl/base/base.h"
|
|
#include "ompi/mca/pml/pml.h"
|
|
#include "ompi/mca/pml/crcpw/pml_crcpw.h"
|
|
#include "ompi/mca/bml/base/base.h"
|
|
|
|
#include "opal/class/opal_free_list.h"
|
|
|
|
mca_pml_crcpw_module_t mca_pml_crcpw_module = {
|
|
{
|
|
mca_pml_crcpw_add_procs,
|
|
mca_pml_crcpw_del_procs,
|
|
mca_pml_crcpw_enable,
|
|
mca_pml_crcpw_progress,
|
|
mca_pml_crcpw_add_comm,
|
|
mca_pml_crcpw_del_comm,
|
|
mca_pml_crcpw_irecv_init,
|
|
mca_pml_crcpw_irecv,
|
|
mca_pml_crcpw_recv,
|
|
mca_pml_crcpw_isend_init,
|
|
mca_pml_crcpw_isend,
|
|
mca_pml_crcpw_send,
|
|
mca_pml_crcpw_iprobe,
|
|
mca_pml_crcpw_probe,
|
|
mca_pml_crcpw_start,
|
|
mca_pml_crcpw_improbe,
|
|
mca_pml_crcpw_mprobe,
|
|
mca_pml_crcpw_imrecv,
|
|
mca_pml_crcpw_mrecv,
|
|
mca_pml_crcpw_dump,
|
|
mca_pml_crcpw_ft_event,
|
|
|
|
32768,
|
|
INT_MAX,
|
|
0 /* flags */
|
|
}
|
|
};
|
|
|
|
#define PML_CRCP_STATE_ALLOC(pml_state) \
|
|
do { \
|
|
if( !pml_crcpw_is_finalized ) { \
|
|
pml_state = (ompi_crcp_base_pml_state_t*) \
|
|
opal_free_list_wait (&pml_state_list); \
|
|
} \
|
|
} while(0);
|
|
|
|
#define PML_CRCP_STATE_RETURN(pml_state) \
|
|
do { \
|
|
if( !pml_crcpw_is_finalized ) { \
|
|
opal_free_list_return (&pml_state_list, \
|
|
(opal_free_list_item_t*)pml_state); \
|
|
} \
|
|
} while(0);
|
|
|
|
int mca_pml_crcpw_enable(bool enable)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_enable) ) {
|
|
return mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable);
|
|
}
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_enable(enable, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_enable(enable, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_add_comm(ompi_communicator_t* comm)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_add_comm) ) {
|
|
return mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm);
|
|
}
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_add_comm(comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_add_comm(comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_del_comm(ompi_communicator_t* comm)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_del_comm) ) {
|
|
return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm);
|
|
}
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
if( NULL == pml_state ) {
|
|
return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm);
|
|
}
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_del_comm(comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_del_comm(comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_add_procs(ompi_proc_t** procs, size_t nprocs)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_procs(procs, nprocs) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_del_procs(ompi_proc_t** procs, size_t nprocs)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_procs(procs, nprocs) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_iprobe(dst, tag, comm, matched, status) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
CLEANUP:
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_probe(dst, tag, comm, status) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
CLEANUP:
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
|
|
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
|
|
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend(buf, count, datatype, dst, tag, mode, comm, request) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
opal_cr_stall_check = false;
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
|
|
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_send(buf, count, datatype, dst, tag, mode, comm) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
opal_cr_stall_check = false;
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
|
|
struct ompi_communicator_t* comm, struct ompi_request_t **request)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv_init(buf, count, datatype, src, tag, comm, request) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
|
|
struct ompi_communicator_t* comm, struct ompi_request_t **request )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv(buf, count, datatype, src, tag, comm, request) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
CLEANUP:
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
|
|
struct ompi_communicator_t* comm, ompi_status_public_t* given_status)
|
|
{
|
|
int ret = OMPI_SUCCESS, actual_ret = OMPI_SUCCESS;
|
|
ompi_status_public_t* status = NULL;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
if( given_status == NULL) {
|
|
status = (ompi_status_public_t*)malloc(sizeof(ompi_status_public_t));
|
|
}
|
|
else {
|
|
status = given_status;
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (actual_ret = mca_pml_crcpw_module.wrapped_pml_module.pml_recv(buf, count, datatype, src, tag, comm, status) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( given_status == NULL) {
|
|
free(status);
|
|
}
|
|
|
|
CLEANUP:
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
opal_cr_stall_check = false;
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
return actual_ret;
|
|
}
|
|
|
|
int mca_pml_crcpw_dump( struct ompi_communicator_t* comm, int verbose )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_dump(comm, verbose) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_progress(void)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
if( OPAL_LIKELY(NULL == ompi_crcp.pml_progress) ) {
|
|
return mca_pml_crcpw_module.wrapped_pml_module.pml_progress();
|
|
}
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_progress(pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_progress() ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_progress(pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_pml_crcpw_start( size_t count, ompi_request_t** requests )
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_start(count, requests, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_DONE == pml_state->state) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_start(count, requests) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_start(count, requests, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
CLEANUP:
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_crcpw_improbe(int dst,
|
|
int tag,
|
|
struct ompi_communicator_t* comm,
|
|
int *matched,
|
|
struct ompi_message_t **message,
|
|
ompi_status_public_t* status)
|
|
{
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_crcpw_mprobe(int dst,
|
|
int tag,
|
|
struct ompi_communicator_t* comm,
|
|
struct ompi_message_t **message,
|
|
ompi_status_public_t* status)
|
|
{
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_crcpw_imrecv(void *buf,
|
|
size_t count,
|
|
ompi_datatype_t *datatype,
|
|
struct ompi_message_t **message,
|
|
struct ompi_request_t **request)
|
|
{
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
|
|
int
|
|
mca_pml_crcpw_mrecv(void *buf,
|
|
size_t count,
|
|
ompi_datatype_t *datatype,
|
|
struct ompi_message_t **message,
|
|
ompi_status_public_t* status)
|
|
{
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
|
|
int mca_pml_crcpw_ft_event(int state)
|
|
{
|
|
int ret;
|
|
ompi_crcp_base_pml_state_t * pml_state = NULL;
|
|
|
|
PML_CRCP_STATE_ALLOC(pml_state);
|
|
|
|
pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
|
|
pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module);
|
|
|
|
pml_state->state = OMPI_CRCP_PML_PRE;
|
|
pml_state = ompi_crcp.pml_ft_event(state, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
if( OMPI_CRCP_PML_SKIP != pml_state->state &&
|
|
NULL != mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event) {
|
|
if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event(state) ) ) {
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
pml_state->state = OMPI_CRCP_PML_POST;
|
|
pml_state = ompi_crcp.pml_ft_event(state, pml_state);
|
|
if( OMPI_SUCCESS != pml_state->error_code) {
|
|
ret = pml_state->error_code;
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
return ret;
|
|
}
|
|
|
|
PML_CRCP_STATE_RETURN(pml_state);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|