2006-03-29 16:19:17 +00:00
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
/*
|
2007-03-16 23:11:45 +00:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-22 17:24:47 +00:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2007-07-11 22:21:04 +00:00
|
|
|
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
2005-11-22 17:24:47 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2006-03-27 22:44:26 +00:00
|
|
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
2005-11-22 17:24:47 +00:00
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/class/ompi_bitmap.h"
|
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/btl/btl.h"
|
|
|
|
#include "ompi/mca/btl/base/base.h"
|
2005-11-22 17:24:47 +00:00
|
|
|
#include "pml_dr.h"
|
|
|
|
#include "pml_dr_component.h"
|
|
|
|
#include "pml_dr_comm.h"
|
|
|
|
#include "pml_dr_hdr.h"
|
|
|
|
#include "pml_dr_recvfrag.h"
|
|
|
|
#include "pml_dr_sendreq.h"
|
|
|
|
#include "pml_dr_recvreq.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/mca/bml/base/base.h"
|
2006-03-29 16:19:17 +00:00
|
|
|
#include "orte/mca/ns/ns.h"
|
2006-08-16 20:21:38 +00:00
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2007-02-09 16:38:16 +00:00
|
|
|
#include "ompi/mca/pml/base/base.h"
|
2005-11-22 17:24:47 +00:00
|
|
|
|
|
|
|
mca_pml_dr_t mca_pml_dr = {
|
|
|
|
{
|
|
|
|
mca_pml_dr_add_procs,
|
|
|
|
mca_pml_dr_del_procs,
|
|
|
|
mca_pml_dr_enable,
|
|
|
|
mca_pml_dr_progress,
|
|
|
|
mca_pml_dr_add_comm,
|
|
|
|
mca_pml_dr_del_comm,
|
|
|
|
mca_pml_dr_irecv_init,
|
|
|
|
mca_pml_dr_irecv,
|
|
|
|
mca_pml_dr_recv,
|
|
|
|
mca_pml_dr_isend_init,
|
|
|
|
mca_pml_dr_isend,
|
|
|
|
mca_pml_dr_send,
|
|
|
|
mca_pml_dr_iprobe,
|
|
|
|
mca_pml_dr_probe,
|
|
|
|
mca_pml_dr_start,
|
2006-03-17 18:46:48 +00:00
|
|
|
mca_pml_dr_dump,
|
2007-03-16 23:11:45 +00:00
|
|
|
NULL,
|
2005-11-22 17:24:47 +00:00
|
|
|
32768,
|
|
|
|
INT_MAX
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2007-07-10 23:45:23 +00:00
|
|
|
void mca_pml_dr_error_handler( struct mca_btl_base_module_t* btl,
|
|
|
|
int32_t flags );
|
2006-08-16 20:21:38 +00:00
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
int mca_pml_dr_enable(bool enable)
|
|
|
|
{
|
|
|
|
if( false == enable ) return OMPI_SUCCESS;
|
2007-07-10 23:45:23 +00:00
|
|
|
|
|
|
|
/* requests */
|
|
|
|
ompi_free_list_init( &mca_pml_base_send_requests,
|
|
|
|
sizeof(mca_pml_dr_send_request_t),
|
|
|
|
OBJ_CLASS(mca_pml_dr_send_request_t),
|
|
|
|
mca_pml_dr.free_list_num,
|
|
|
|
mca_pml_dr.free_list_max,
|
|
|
|
mca_pml_dr.free_list_inc,
|
|
|
|
NULL );
|
|
|
|
|
|
|
|
ompi_free_list_init( &mca_pml_base_recv_requests,
|
|
|
|
sizeof(mca_pml_dr_recv_request_t),
|
|
|
|
OBJ_CLASS(mca_pml_dr_recv_request_t),
|
|
|
|
mca_pml_dr.free_list_num,
|
|
|
|
mca_pml_dr.free_list_max,
|
|
|
|
mca_pml_dr.free_list_inc,
|
|
|
|
NULL );
|
|
|
|
|
|
|
|
/* fragments */
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.recv_frags, ompi_free_list_t);
|
|
|
|
ompi_free_list_init( &mca_pml_dr.recv_frags,
|
|
|
|
sizeof(mca_pml_dr_recv_frag_t),
|
|
|
|
OBJ_CLASS(mca_pml_dr_recv_frag_t),
|
|
|
|
mca_pml_dr.free_list_num,
|
|
|
|
mca_pml_dr.free_list_max,
|
|
|
|
mca_pml_dr.free_list_inc,
|
|
|
|
NULL );
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.vfrags, ompi_free_list_t);
|
|
|
|
ompi_free_list_init( &mca_pml_dr.vfrags,
|
|
|
|
sizeof(mca_pml_dr_vfrag_t),
|
|
|
|
OBJ_CLASS(mca_pml_dr_vfrag_t),
|
|
|
|
mca_pml_dr.free_list_num,
|
|
|
|
mca_pml_dr.free_list_max,
|
|
|
|
mca_pml_dr.free_list_inc,
|
|
|
|
NULL );
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.send_pending, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.send_active, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.acks_pending, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.buffers, ompi_free_list_t);
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.endpoints, ompi_pointer_array_t);
|
|
|
|
OBJ_CONSTRUCT(&mca_pml_dr.lock, opal_mutex_t);
|
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
mca_pml_dr.enabled = true;
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_pml_dr_add_comm(ompi_communicator_t* comm)
|
|
|
|
{
|
|
|
|
/* allocate pml specific comm data */
|
|
|
|
mca_pml_dr_comm_t* pml_comm = OBJ_NEW(mca_pml_dr_comm_t);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (NULL == pml_comm) {
|
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
2005-12-20 21:42:58 +00:00
|
|
|
mca_pml_dr_comm_init(pml_comm, comm);
|
2005-11-22 17:24:47 +00:00
|
|
|
comm->c_pml_comm = pml_comm;
|
|
|
|
|
2006-09-20 22:14:46 +00:00
|
|
|
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
|
|
|
|
pml_comm->procs[i].ompi_proc = comm->c_remote_group->grp_proc_pointers[i];
|
2005-11-22 17:24:47 +00:00
|
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_pml_dr_del_comm(ompi_communicator_t* comm)
|
|
|
|
{
|
|
|
|
OBJ_RELEASE(comm->c_pml_comm);
|
|
|
|
comm->c_pml_comm = NULL;
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each proc setup a datastructure that indicates the PTLs
|
|
|
|
* that can be used to reach the destination.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs)
|
|
|
|
{
|
|
|
|
ompi_bitmap_t reachable;
|
2006-07-04 01:20:20 +00:00
|
|
|
struct mca_bml_base_endpoint_t **bml_endpoints = NULL;
|
2005-11-22 17:24:47 +00:00
|
|
|
int rc;
|
2007-01-17 14:23:46 +00:00
|
|
|
size_t i;
|
2005-11-22 17:24:47 +00:00
|
|
|
|
|
|
|
if(nprocs == 0)
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
|
2006-12-30 16:17:56 +00:00
|
|
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
2007-01-08 22:02:17 +00:00
|
|
|
for (i = 0 ; i < nprocs ; ++i) {
|
|
|
|
if (procs[i]->proc_arch != ompi_proc_local()->proc_arch) {
|
2006-12-30 16:17:56 +00:00
|
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-02-09 16:38:16 +00:00
|
|
|
/* make sure remote procs are using the same PML as us */
|
|
|
|
if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("dr",
|
|
|
|
procs,
|
|
|
|
nprocs))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
|
2006-10-20 03:57:44 +00:00
|
|
|
rc = ompi_bitmap_init(&reachable, (int)nprocs);
|
2005-11-22 17:24:47 +00:00
|
|
|
if(OMPI_SUCCESS != rc)
|
|
|
|
return rc;
|
|
|
|
|
2006-08-24 16:38:08 +00:00
|
|
|
bml_endpoints = (mca_bml_base_endpoint_t**)malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
|
2006-07-04 01:20:20 +00:00
|
|
|
if (NULL == bml_endpoints) {
|
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
2005-11-22 17:24:47 +00:00
|
|
|
}
|
2006-03-29 16:19:17 +00:00
|
|
|
|
2006-07-04 01:20:20 +00:00
|
|
|
/* initialize bml endpoint data */
|
2005-11-22 17:24:47 +00:00
|
|
|
rc = mca_bml.bml_add_procs(
|
|
|
|
nprocs,
|
|
|
|
procs,
|
2006-07-04 01:20:20 +00:00
|
|
|
bml_endpoints,
|
2005-11-22 17:24:47 +00:00
|
|
|
&reachable
|
|
|
|
);
|
|
|
|
if(OMPI_SUCCESS != rc)
|
|
|
|
return rc;
|
2006-08-16 20:21:38 +00:00
|
|
|
|
|
|
|
/* register recv handler */
|
2005-11-22 17:24:47 +00:00
|
|
|
rc = mca_bml.bml_register(
|
|
|
|
MCA_BTL_TAG_PML,
|
|
|
|
mca_pml_dr_recv_frag_callback,
|
|
|
|
NULL);
|
|
|
|
|
2006-08-16 20:21:38 +00:00
|
|
|
if(OMPI_SUCCESS != rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* register error handlers */
|
|
|
|
rc = mca_bml.bml_register_error(mca_pml_dr_error_handler);
|
|
|
|
|
|
|
|
if(OMPI_SUCCESS != rc)
|
|
|
|
return rc;
|
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
ompi_free_list_init(
|
|
|
|
&mca_pml_dr.buffers,
|
|
|
|
sizeof(mca_pml_dr_buffer_t) + mca_pml_dr.eager_limit,
|
|
|
|
OBJ_CLASS(mca_pml_dr_buffer_t),
|
|
|
|
0,
|
|
|
|
mca_pml_dr.free_list_max,
|
|
|
|
mca_pml_dr.free_list_inc,
|
|
|
|
NULL);
|
2006-07-04 01:20:20 +00:00
|
|
|
|
|
|
|
/* initialize pml endpoint data */
|
2007-01-18 17:14:06 +00:00
|
|
|
for (i = 0 ; i < nprocs ; ++i) {
|
2006-03-29 16:19:17 +00:00
|
|
|
int idx;
|
2006-07-04 01:20:20 +00:00
|
|
|
mca_pml_dr_endpoint_t *endpoint;
|
|
|
|
|
|
|
|
|
|
|
|
endpoint = OBJ_NEW(mca_pml_dr_endpoint_t);
|
|
|
|
endpoint->proc_ompi = procs[i];
|
|
|
|
procs[i]->proc_pml = (struct mca_pml_base_endpoint_t*) endpoint;
|
2007-01-30 20:56:31 +00:00
|
|
|
MCA_PML_DR_DEBUG(10, (0, "%s:%d: adding endpoint %p to proc_pml %p\n",
|
2007-02-01 19:27:11 +00:00
|
|
|
__FILE__, __LINE__, (void*)endpoint, (void*)procs[i]));
|
2006-08-15 21:44:55 +00:00
|
|
|
|
2006-05-20 02:39:05 +00:00
|
|
|
/* this won't work for comm spawn and other dynamic
|
2006-03-29 16:19:17 +00:00
|
|
|
processes, but will work for initial job start */
|
2006-05-20 02:39:05 +00:00
|
|
|
idx = ompi_pointer_array_add(&mca_pml_dr.endpoints,
|
2006-07-04 01:20:20 +00:00
|
|
|
(void*) endpoint);
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 19:34:59 +00:00
|
|
|
if(orte_ns.compare_fields(ORTE_NS_CMP_ALL,
|
2006-05-20 02:39:05 +00:00
|
|
|
orte_process_info.my_name,
|
Bring over the update to terminate orteds that are generated by a dynamic spawn such as comm_spawn. This introduces the concept of a job "family" - i.e., jobs that have a parent/child relationship. Comm_spawn'ed jobs have a parent (the one that spawned them). We track that relationship throughout the lineage - i.e., if a comm_spawned job in turn calls comm_spawn, then it has a parent (the one that spawned it) and a "root" job (the original job that started things).
Accordingly, there are new APIs to the name service to support the ability to get a job's parent, root, immediate children, and all its descendants. In addition, the terminate_job, terminate_orted, and signal_job APIs for the PLS have been modified to accept attributes that define the extent of their actions. For example, doing a "terminate_job" with an attribute of ORTE_NS_INCLUDE_DESCENDANTS will terminate the given jobid AND all jobs that descended from it.
I have tested this capability on a MacBook under rsh, Odin under SLURM, and LANL's Flash (bproc). It worked successfully on non-MPI jobs (both simple and including a spawn), and MPI jobs (again, both simple and with a spawn).
This commit was SVN r12597.
2006-11-14 19:34:59 +00:00
|
|
|
&(endpoint->proc_ompi->proc_name)) == ORTE_EQUAL) {
|
2006-03-29 16:19:17 +00:00
|
|
|
mca_pml_dr.my_rank = idx;
|
|
|
|
}
|
2006-07-04 01:20:20 +00:00
|
|
|
endpoint->local = endpoint->dst = idx;
|
2006-08-15 21:44:55 +00:00
|
|
|
MCA_PML_DR_DEBUG(10, (0, "%s:%d: setting endpoint->dst to %d\n",
|
|
|
|
__FILE__, __LINE__, idx));
|
|
|
|
|
2006-07-04 01:20:20 +00:00
|
|
|
endpoint->bml_endpoint = bml_endpoints[i];
|
2006-03-29 16:19:17 +00:00
|
|
|
}
|
2006-08-15 21:44:55 +00:00
|
|
|
|
2007-01-18 17:14:06 +00:00
|
|
|
for(i = 0; i < nprocs; i++) {
|
2006-08-15 21:44:55 +00:00
|
|
|
mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*)
|
|
|
|
ompi_pointer_array_get_item(&mca_pml_dr.endpoints, i);
|
|
|
|
ep->src = mca_pml_dr.my_rank;
|
|
|
|
}
|
2006-03-29 16:19:17 +00:00
|
|
|
/* no longer need this */
|
2006-07-04 01:20:20 +00:00
|
|
|
if ( NULL != bml_endpoints ) {
|
2007-01-08 22:02:17 +00:00
|
|
|
free ( bml_endpoints);
|
2006-03-29 16:19:17 +00:00
|
|
|
}
|
2005-11-22 17:24:47 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* iterate through each proc and notify any PTLs associated
|
|
|
|
* with the proc that it is/has gone away
|
|
|
|
*/
|
|
|
|
|
|
|
|
int mca_pml_dr_del_procs(ompi_proc_t** procs, size_t nprocs)
|
|
|
|
{
|
2006-07-04 01:20:20 +00:00
|
|
|
size_t i;
|
|
|
|
|
|
|
|
/* clean up pml endpoint data */
|
|
|
|
for (i = 0 ; i < nprocs ; ++i) {
|
|
|
|
if (NULL != procs[i]->proc_pml) {
|
|
|
|
OBJ_RELEASE(procs[i]->proc_pml);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-11-22 17:24:47 +00:00
|
|
|
return mca_bml.bml_del_procs(nprocs, procs);
|
|
|
|
}
|
|
|
|
|
2006-03-20 15:41:45 +00:00
|
|
|
int mca_pml_dr_dump(
|
2006-03-17 18:46:48 +00:00
|
|
|
struct ompi_communicator_t* comm,
|
|
|
|
int verbose)
|
|
|
|
{
|
2006-03-20 15:41:45 +00:00
|
|
|
return OMPI_SUCCESS;
|
2006-03-17 18:46:48 +00:00
|
|
|
}
|
|
|
|
|
2006-03-27 22:44:26 +00:00
|
|
|
|
|
|
|
|
2006-08-16 20:56:22 +00:00
|
|
|
void mca_pml_dr_error_handler(
|
|
|
|
struct mca_btl_base_module_t* btl,
|
|
|
|
int32_t flags) {
|
2006-11-03 15:40:26 +00:00
|
|
|
/* try failover ! */
|
2006-11-06 21:27:17 +00:00
|
|
|
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
|
|
|
|
btl->btl_component->btl_version.mca_component_name);
|
2006-11-03 15:40:26 +00:00
|
|
|
mca_pml_dr_sendreq_cleanup_active(btl);
|
|
|
|
mca_bml.bml_del_btl(btl);
|
|
|
|
/* orte_errmgr.abort(); */
|
2006-08-16 20:56:22 +00:00
|
|
|
}
|
2006-11-03 15:40:26 +00:00
|
|
|
|
|
|
|
|