1
1

Fold in the revised modex scheme. Move the ompi_proc_t modex portions to the RTE level since the daemons already have that info. Provide each process with the equivalent of a "nidmap" - both a map of what nodes are in the job, and a map of which node each process is on. This enables the use of static ports, though that hasn't been turned "on" in this commit.

Update the rsh tree spawn capability so we spawn the next wave of daemons before launching our own local procs.

Add an ability to encode nodenames for large clusters with contiguous node name numbering schemes - this allows communication of all node names in a few bytes instead of tens-of-bytes/node.

This commit was SVN r18338.
Этот коммит содержится в:
Ralph Castain 2008-04-30 19:49:53 +00:00
родитель f5dfc005a4
Коммит 3e55fe6f6d
66 изменённых файлов: 2407 добавлений и 1250 удалений

Просмотреть файл

@ -15,8 +15,8 @@ with_devel_headers=yes
enable_heterogeneous=no
enable_picky=yes
enable_debug=yes
enable_shared=yes
enable_static=no
enable_shared=no
enable_static=yes
with_slurm=no
enable_contrib_no_build=libnbc,vt
enable_visibility=yes

Просмотреть файл

@ -536,7 +536,7 @@ int mca_pml_ob1_ft_event( int state )
ret);
return ret;
}
#if 0
/*
* Fill in remote proc information
*/
@ -546,7 +546,7 @@ int mca_pml_ob1_ft_event( int state )
ret);
return ret;
}
#endif
/*
* Startup the PML stack now that the modex is running again
* Add the new procs (BTLs redo modex recv's)

Просмотреть файл

@ -28,6 +28,7 @@
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
@ -60,20 +61,20 @@ void ompi_proc_construct(ompi_proc_t* proc)
proc->proc_pml = NULL;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
/* By default all processors are supposelly having the same architecture as me. Thus,
* by default we run in a homogeneous environment. Later when the registry callback
* get fired we will have to set the convertors to the correct architecture.
/* By default all processors are supposedly having the same architecture as me. Thus,
* by default we run in a homogeneous environment. Later, when the RTE can tell us
* the arch of the remote nodes, we will have to set the convertors to the correct
* architecture.
*/
proc->proc_arch = orte_process_info.arch;
proc->proc_convertor = ompi_mpi_local_convertor;
OBJ_RETAIN( ompi_mpi_local_convertor );
proc->proc_arch = ompi_mpi_local_arch;
proc->proc_flags = 0;
/* By default, put NULL in the hostname. It may or may not get
filled in later -- consumer of this field beware! */
/* initialize this pointer to NULL */
proc->proc_hostname = NULL;
OPAL_THREAD_LOCK(&ompi_proc_lock);
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
@ -88,9 +89,9 @@ void ompi_proc_destruct(ompi_proc_t* proc)
* destroyed here. It will be destroyed later when the ompi_ddt_finalize is called.
*/
OBJ_RELEASE( proc->proc_convertor );
if (NULL != proc->proc_hostname) {
free(proc->proc_hostname);
}
/* DO NOT FREE THE HOSTNAME FIELD AS THIS POINTS
* TO AN AREA ALLOCATED/FREE'D ELSEWHERE
*/
OPAL_THREAD_LOCK(&ompi_proc_lock);
opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
@ -101,8 +102,6 @@ void ompi_proc_destruct(ompi_proc_t* proc)
int ompi_proc_init(void)
{
orte_vpid_t i;
int rc;
uint32_t ui32;
OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t);
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
@ -112,178 +111,37 @@ int ompi_proc_init(void)
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
proc->proc_name.vpid = i;
if( i == ORTE_PROC_MY_NAME->vpid ) {
if (i == ORTE_PROC_MY_NAME->vpid) {
ompi_proc_local_proc = proc;
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
}
/* Fill in our local information */
rc = opal_arch_compute_local_id(&ui32);
if (OMPI_SUCCESS != rc) return rc;
ompi_proc_local_proc->proc_nodeid = orte_process_info.nodeid;
ompi_proc_local_proc->proc_arch = ui32;
if (ompi_mpi_keep_peer_hostnames) {
if (ompi_mpi_keep_fqdn_hostnames) {
/* use the entire FQDN name */
ompi_proc_local_proc->proc_hostname = strdup(orte_process_info.nodename);
proc->proc_hostname = orte_process_info.nodename;
proc->proc_arch = orte_process_info.arch;
} else {
/* use the unqualified name */
char *tmp, *ptr;
tmp = strdup(orte_process_info.nodename);
if (NULL != (ptr = strchr(tmp, '.'))) {
*ptr = '\0';
if (orte_ess.proc_is_local(&proc->proc_name)) {
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
ompi_proc_local_proc->proc_hostname = strdup(tmp);
free(tmp);
}
}
rc = ompi_proc_publish_info();
return rc;
}
int ompi_proc_publish_info(void)
{
orte_std_cntr_t datalen;
void *data;
opal_buffer_t* buf;
int rc;
/* pack our local data for others to use */
buf = OBJ_NEW(opal_buffer_t);
rc = ompi_proc_pack(&ompi_proc_local_proc, 1, buf);
if (OMPI_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send our data into the ether */
rc = opal_dss.unload(buf, &data, &datalen);
if (OMPI_SUCCESS != rc) return rc;
OBJ_RELEASE(buf);
rc = ompi_modex_send_string("ompi-proc-info", data, datalen);
free(data);
return rc;
}
int
ompi_proc_get_info(void)
{
int ret = OMPI_SUCCESS;
opal_list_item_t *item;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for (item = opal_list_get_first(&ompi_proc_list) ;
item != opal_list_get_end(&ompi_proc_list) ;
item = opal_list_get_next(item)) {
ompi_proc_t *proc = (ompi_proc_t*) item;
uint32_t arch;
char *hostname;
void *data;
size_t datalen;
orte_nodeid_t nodeid;
/* Don't reset the information determined about the current
process during the init step. Saves time and problems if
modex is unimplemented */
if (ompi_proc_local() == proc) continue;
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_JOBID,
&ompi_proc_local_proc->proc_name,
&proc->proc_name)) {
/* not in our jobid -- this shouldn't happen */
ret = OMPI_ERR_FATAL;
goto out;
}
ret = ompi_modex_recv_string("ompi-proc-info", proc, &data, &datalen);
if (OMPI_SUCCESS == ret) {
opal_buffer_t *buf;
orte_std_cntr_t count=1;
orte_process_name_t name;
buf = OBJ_NEW(opal_buffer_t);
ret = opal_dss.load(buf, data, datalen);
if (OMPI_SUCCESS != ret)
goto out;
/* This isn't needed here, but packed just so that you
could, in theory, use the unpack code on this proc. We
don't,because we aren't adding procs, but need to
update them */
ret = opal_dss.unpack(buf, &name, &count, ORTE_NAME);
if (ret != ORTE_SUCCESS)
goto out;
ret = opal_dss.unpack(buf, &nodeid, &count, ORTE_NODEID);
if (ret != ORTE_SUCCESS) {
ORTE_ERROR_LOG(ret);
goto out;
}
ret = opal_dss.unpack(buf, &arch, &count, OPAL_UINT32);
if (ret != ORTE_SUCCESS) {
ORTE_ERROR_LOG(ret);
goto out;
}
ret = opal_dss.unpack(buf, &hostname, &count, OPAL_STRING);
if (ret != ORTE_SUCCESS) {
ORTE_ERROR_LOG(ret);
goto out;
}
/* Free the buffer for the next proc */
OBJ_RELEASE(buf);
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
nodeid = ORTE_NODEID_INVALID;
arch = ompi_proc_local_proc->proc_arch;
hostname = strdup("");
ret = ORTE_SUCCESS;
} else {
goto out;
}
proc->proc_nodeid = nodeid;
proc->proc_arch = arch;
/* if arch is different than mine, create a new convertor for this proc */
if (proc->proc_arch != ompi_proc_local_proc->proc_arch) {
proc->proc_hostname = orte_ess.proc_get_hostname(&proc->proc_name);
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
/* if arch is different than mine, create a new convertor for this proc */
if (proc->proc_arch != orte_process_info.arch) {
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
OBJ_RELEASE(proc->proc_convertor);
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
OBJ_RELEASE(proc->proc_convertor);
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
#else
opal_show_help("help-mpi-runtime",
"heterogeneous-support-unavailable",
true, orte_process_info.nodename,
hostname == NULL ? "<hostname unavailable>" :
hostname);
ret = OMPI_ERR_NOT_SUPPORTED;
goto out;
opal_show_help("help-mpi-runtime",
"heterogeneous-support-unavailable",
true, orte_process_info.nodename,
proc->proc_hostname == NULL ? "<hostname unavailable>" :
proc->proc_hostname);
return OMPI_ERR_NOT_SUPPORTED;
#endif
}
if ((ompi_proc_local_proc->proc_nodeid == proc->proc_nodeid) &&
(proc->proc_nodeid != ORTE_NODEID_INVALID)) {
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
}
/* Save the hostname. The dss code will have strdup'ed this
for us -- no need to do so again */
proc->proc_hostname = hostname;
}
out:
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return ret;
return OMPI_SUCCESS;
}
int ompi_proc_finalize (void)
{
ompi_proc_t *proc, *nextproc, *endproc;
@ -408,46 +266,57 @@ ompi_proc_t * ompi_proc_find ( const orte_process_name_t * name )
}
static ompi_proc_t *
ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew)
{
ompi_proc_t *proc, *rproc = NULL;
orte_ns_cmp_bitmask_t mask;
int ompi_proc_refresh(void) {
ompi_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
orte_vpid_t i = 0;
/* return the proc-struct which matches this jobid+process id */
mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) {
rproc = proc;
*isnew = false;
break;
}
}
if (NULL == rproc) {
*isnew = true;
rproc = OBJ_NEW(ompi_proc_t);
if (NULL != rproc) {
rproc->proc_name = *name;
for( item = opal_list_get_first(&ompi_proc_list), i = 0;
item != opal_list_get_end(&ompi_proc_list);
item = opal_list_get_next(item), ++i ) {
proc = (ompi_proc_t*)item;
if (i == ORTE_PROC_MY_NAME->vpid) {
ompi_proc_local_proc = proc;
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
proc->proc_hostname = orte_process_info.nodename;
proc->proc_arch = orte_process_info.arch;
} else {
if (orte_ess.proc_is_local(&proc->proc_name)) {
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
proc->proc_hostname = orte_ess.proc_get_hostname(&proc->proc_name);
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
/* if arch is different than mine, create a new convertor for this proc */
if (proc->proc_arch != orte_process_info.arch) {
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
OBJ_RELEASE(proc->proc_convertor);
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
#else
opal_show_help("help-mpi-runtime",
"heterogeneous-support-unavailable",
true, orte_process_info.nodename,
proc->proc_hostname == NULL ? "<hostname unavailable>" :
proc->proc_hostname);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return OMPI_ERR_NOT_SUPPORTED;
#endif
}
}
/* caller had better fill in the rest of the proc, or there's
going to be pain later... */
}
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return rproc;
return OMPI_SUCCESS;
}
int
ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf)
{
int i, rc;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for (i=0; i<proclistsize; i++) {
rc = opal_dss.pack(buf, &(proclist[i]->proc_name), 1, ORTE_NAME);
@ -456,12 +325,6 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf)
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return rc;
}
rc = opal_dss.pack(buf, &(proclist[i]->proc_nodeid), 1, ORTE_NODEID);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return rc;
}
rc = opal_dss.pack(buf, &(proclist[i]->proc_arch), 1, OPAL_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
@ -479,6 +342,40 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf)
return OMPI_SUCCESS;
}
static ompi_proc_t *
ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew)
{
ompi_proc_t *proc, *rproc = NULL;
orte_ns_cmp_bitmask_t mask;
/* return the proc-struct which matches this jobid+process id */
mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proc->proc_name, name)) {
rproc = proc;
*isnew = false;
break;
}
}
if (NULL == rproc) {
*isnew = true;
rproc = OBJ_NEW(ompi_proc_t);
if (NULL != rproc) {
rproc->proc_name = *name;
}
/* caller had better fill in the rest of the proc, or there's
going to be pain later... */
}
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return rproc;
}
int
ompi_proc_unpack(opal_buffer_t* buf,
@ -488,9 +385,9 @@ ompi_proc_unpack(opal_buffer_t* buf,
int i;
size_t newprocs_len = 0;
ompi_proc_t **plist=NULL, **newprocs = NULL;
/* do not free plist *ever*, since it is used in the remote group
structure of a communicator */
structure of a communicator */
plist = (ompi_proc_t **) calloc (proclistsize, sizeof (ompi_proc_t *));
if ( NULL == plist ) {
return OMPI_ERR_OUT_OF_RESOURCE;
@ -500,7 +397,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
if (NULL == newprocs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for ( i=0; i<proclistsize; i++ ){
orte_std_cntr_t count=1;
orte_process_name_t new_name;
@ -508,18 +405,12 @@ ompi_proc_unpack(opal_buffer_t* buf,
char *new_hostname;
bool isnew = false;
int rc;
orte_nodeid_t new_nodeid;
rc = opal_dss.unpack(buf, &new_name, &count, ORTE_NAME);
if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = opal_dss.unpack(buf, &new_nodeid, &count, ORTE_NODEID);
if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = opal_dss.unpack(buf, &new_arch, &count, OPAL_UINT32);
if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
@ -530,14 +421,13 @@ ompi_proc_unpack(opal_buffer_t* buf,
ORTE_ERROR_LOG(rc);
return rc;
}
plist[i] = ompi_proc_find_and_add(&new_name, &isnew);
if (isnew) {
newprocs[newprocs_len++] = plist[i];
plist[i]->proc_nodeid = new_nodeid;
plist[i]->proc_arch = new_arch;
/* if arch is different than mine, create a new convertor for this proc */
if (plist[i]->proc_arch != ompi_mpi_local_arch) {
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
@ -548,82 +438,26 @@ ompi_proc_unpack(opal_buffer_t* buf,
"heterogeneous-support-unavailable",
true, orte_process_info.nodename,
new_hostname == NULL ? "<hostname unavailable>" :
new_hostname);
new_hostname);
return OMPI_ERR_NOT_SUPPORTED;
#endif
}
if (ompi_proc_local_proc->proc_nodeid == plist[i]->proc_nodeid) {
if (0 == strcmp(ompi_proc_local_proc->proc_hostname,new_hostname)) {
plist[i]->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
/* Save the hostname */
plist[i]->proc_hostname = new_hostname;
}
}
if (NULL != newproclistsize) *newproclistsize = newprocs_len;
if (NULL != newproclist) {
*newproclist = newprocs;
} else if (newprocs != NULL) {
free(newprocs);
}
*proclist = plist;
return OMPI_SUCCESS;
}
int ompi_proc_refresh(void) {
ompi_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
orte_vpid_t i = 0;
int rc;
uint32_t ui32;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for( item = opal_list_get_first(&ompi_proc_list), i = 0;
item != opal_list_get_end(&ompi_proc_list);
item = opal_list_get_next(item), ++i ) {
proc = (ompi_proc_t*)item;
/* Does not change: orte_process_info.num_procs */
/* Does not change: proc->proc_name.vpid */
proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
if( i == ORTE_PROC_MY_NAME->vpid ) {
ompi_proc_local_proc = proc;
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
} else {
proc->proc_flags = 0;
}
}
/* Fill in our local information */
rc = opal_arch_compute_local_id(&ui32);
if (OMPI_SUCCESS != rc) {
return rc;
}
ompi_proc_local_proc->proc_nodeid = orte_process_info.nodeid;
ompi_proc_local_proc->proc_arch = ui32;
if (ompi_mpi_keep_peer_hostnames) {
if (ompi_mpi_keep_fqdn_hostnames) {
/* use the entire FQDN name */
ompi_proc_local_proc->proc_hostname = strdup(orte_process_info.nodename);
} else {
/* use the unqualified name */
char *tmp, *ptr;
tmp = strdup(orte_process_info.nodename);
if (NULL != (ptr = strchr(tmp, '.'))) {
*ptr = '\0';
}
ompi_proc_local_proc->proc_hostname = strdup(tmp);
free(tmp);
}
}
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
rc = ompi_proc_publish_info();
return rc;
}

Просмотреть файл

@ -34,10 +34,10 @@
#include "ompi/types.h"
#include "opal/class/opal_list.h"
#include "opal/threads/mutex.h"
#include "orte/types.h"
#include "opal/dss/dss_types.h"
#include "orte/runtime/orte_globals.h"
BEGIN_C_DECLS
/* ******************************************************************** */
@ -54,8 +54,6 @@ struct ompi_proc_t {
opal_list_item_t super;
/** this process' name */
orte_process_name_t proc_name;
/** "nodeid" on which the proc resides - equiv to vpid of local daemon */
orte_nodeid_t proc_nodeid;
/** PML specific proc data */
struct mca_pml_base_endpoint_t* proc_pml;
/** BML specific proc data */
@ -66,7 +64,9 @@ struct ompi_proc_t {
struct ompi_convertor_t* proc_convertor;
/** Lock protecting data inside the given ompi_proc_t */
opal_mutex_t proc_lock;
/** Keep the hostname around for debugging purposes */
/** A pointer to the name of this host - data is
* actually stored in the RTE
*/
char* proc_hostname;
/** flags for this proc */
uint8_t proc_flags;
@ -119,35 +119,6 @@ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc;
*/
OMPI_DECLSPEC int ompi_proc_init(void);
/**
* Publish local process information
*
* Used by ompi_proc_init() and elsewhere in the code to refresh any
* local information not easily determined by the run-time ahead of time
* (architecture and hostname).
*
* @note While an ompi_proc_t will exist with mostly valid information
* for each process in the MPI_COMM_WORLD at the conclusion of this
* call, some information will not be immediately available. This
* includes the architecture and hostname, which will be available by
* the conclusion of the stage gate.
*
* @retval OMPI_SUCESS Information available in the modex
* @retval OMPI_ERROR Failure due to unspecified error
*/
OMPI_DECLSPEC int ompi_proc_publish_info(void);
/**
* Get data exchange information from remote processes
*
* Get data exchanged from remote processes and populate the ompi proc
* structures for the associated processes.
*
* @retval OMPI_SUCCESS Information successfully received
* @retval OMPI_ERROR Information update failure
*/
OMPI_DECLSPEC int ompi_proc_get_info(void);
/**
* Finalize the OMPI Process subsystem
@ -249,7 +220,6 @@ static inline ompi_proc_t* ompi_proc_local(void)
*/
OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const orte_process_name_t* name );
/**
* Pack proc list into portable buffer
*
@ -316,7 +286,6 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
int proclistsize, ompi_proc_t ***proclist,
int *newproclistsize, ompi_proc_t ***newproclist);
/**
* Refresh the OMPI process subsystem
*

Просмотреть файл

@ -65,3 +65,13 @@ environment.
WARNING: The MCA parameter mpi_use_sparse_group_storage has been set
to true, but sparse group support was not compiled into Open MPI. The
mpi_use_sparse_group_storage value has therefore been ignored.
#
[heterogeneous-support-unavailable]
This installation of Open MPI was configured without support for
heterogeneous architectures, but at least one node in the allocation
was detected to have a different architecture. The detected node was:
Node: %s
In order to operate in a heterogeneous environment, please reconfigure
Open MPI with --enable-heterogeneous.

Просмотреть файл

@ -521,13 +521,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL);
}
/* Fill in remote proc information */
if (OMPI_SUCCESS != (ret = ompi_proc_get_info())) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: ompi_proc_get_info failed";
goto error;
}
/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow
MPI_THREAD_MULTIPLE. */

Просмотреть файл

@ -154,11 +154,6 @@ int ompi_mpi_register_params(void)
false, false, 1, &value);
ompi_mpi_keep_peer_hostnames = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("mpi", "keep_fqdn_hostnames",
"If nonzero, use the FQDN host name when saving hostnames. This can add quite a bit of memory usage to each MPI process.",
false, false, 1, &value);
ompi_mpi_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
/* MPI_ABORT controls */
mca_base_param_reg_int_name("mpi", "abort_delay",

Просмотреть файл

@ -103,14 +103,6 @@ OMPI_DECLSPEC extern char * ompi_mpi_show_mca_params_file;
*/
OMPI_DECLSPEC extern bool ompi_mpi_keep_peer_hostnames;
/**
* Whether or not to use the FQDN for the peer hostnames. This
* can eat up a good bit of memory as well as a lot of communication
* during startup - it can be reduced by just using the hostname
* instead of the FQDN
*/
OMPI_DECLSPEC extern bool ompi_mpi_keep_fqdn_hostnames;
/**
* Whether an MPI_ABORT should print out a stack trace or not.
*/

Просмотреть файл

@ -99,14 +99,6 @@ struct orte_process_name_t {
};
typedef struct orte_process_name_t orte_process_name_t;
/*
* define a generic id for nodes
*/
typedef int32_t orte_nodeid_t;
#define ORTE_NODEID OPAL_INT32
#define ORTE_NODEID_WILDCARD -1
#define ORTE_NODEID_INVALID INT32_MIN
/**
* handle differences in iovec

Просмотреть файл

@ -94,9 +94,13 @@ void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code)
ORTE_ERROR_LOG(rc);
}
/* wakeup orterun so we can exit - the appropriate exit status
* for orterun will have been set by whomever called us
/* set the exit status, just in case whomever called us failed
* to do so - it can only be done once, so we are protected
* from overwriting it
*/
ORTE_UPDATE_EXIT_STATUS(exit_code);
/* wakeup orterun so we can exit */
if (ORTE_SUCCESS != (rc = orte_wakeup())) {
ORTE_ERROR_LOG(rc);
}
@ -133,9 +137,13 @@ void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code)
ORTE_ERROR_LOG(rc);
}
/* wakeup orterun so we can exit - the appropriate exit status
* for orterun will have been set by whomever called us
/* set the exit status, just in case whomever called us failed
* to do so - it can only be done once, so we are protected
* from overwriting it
*/
ORTE_UPDATE_EXIT_STATUS(exit_code);
/* wakeup orterun so we can exit */
if (ORTE_SUCCESS != (rc = orte_wakeup())) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -23,6 +23,8 @@
#include <catamount/cnos_mpi_os.h>
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/base/base.h"
@ -37,21 +39,34 @@ static int alps_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_alps_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
char *error = NULL;
/* Start by getting a unique name */
alps_set_name();
@ -72,14 +87,26 @@ static int rte_init(char flags)
goto error;
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
*/
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
@ -95,6 +122,8 @@ error:
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
@ -107,9 +136,22 @@ static int rte_finalize(void)
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
*/
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
@ -118,6 +160,83 @@ static int rte_finalize(void)
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s has arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:alps: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
static int alps_set_name(void)
{

Просмотреть файл

@ -29,5 +29,6 @@ libmca_ess_la_SOURCES += \
base/ess_base_put.c \
base/ess_base_std_tool.c \
base/ess_base_std_app.c \
base/ess_base_std_orted.c
base/ess_base_std_orted.c \
base/ess_base_build_nidmap.c

Просмотреть файл

@ -25,6 +25,11 @@
#include "orte/types.h"
#include "opal/mca/mca.h"
#include "opal/dss/dss_types.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
BEGIN_C_DECLS
@ -68,6 +73,9 @@ ORTE_DECLSPEC int orte_ess_base_tool_finalize(void);
ORTE_DECLSPEC int orte_ess_base_orted_setup(void);
ORTE_DECLSPEC int orte_ess_base_orted_finalize(void);
ORTE_DECLSPEC int orte_ess_base_build_nidmap(opal_buffer_t *buffer,
opal_pointer_array_t *nidmap,
orte_pmap_t **pmap, orte_vpid_t *num_procs);
/*
* Put functions

80
orte/mca/ess/base/ess_base_build_nidmap.c Обычный файл
Просмотреть файл

@ -0,0 +1,80 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/dss/dss.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/base/base.h"
int orte_ess_base_build_nidmap(opal_buffer_t *buffer,
opal_pointer_array_t *nidmap,
orte_pmap_t **pmap, orte_vpid_t *num_procs)
{
int rc;
opal_byte_object_t *bo;
int32_t cnt;
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
"%s ess:build:nidmap: received buffer with %ld bytes",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(long)buffer->bytes_used));
/* it is okay if the buffer is empty - could be a non-MPI proc */
if (0 == buffer->bytes_used) {
return ORTE_SUCCESS;
}
/* extract the byte object holding the daemonmap */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the node map */
if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(bo, nidmap))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* the bytes in the object were free'd by the decode */
/* extract the byte object holding the process map */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the process map */
if (ORTE_SUCCESS != (rc = orte_util_decode_pidmap(bo, num_procs,
pmap, NULL, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* the bytes in the object were free'd by the decode */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -57,14 +57,5 @@ int orte_ess_env_get(void)
true, false, (int)ORTE_VPID_INVALID, &num_procs);
orte_process_info.local_rank = (orte_vpid_t)num_procs;
/* it is okay for this param not to be found - for example, we don't bother
* to set it for orteds - so just set it to a value which indicates
* it wasn't found if it isn't there
*/
mca_base_param_reg_int_name("orte", "ess_num_local_procs",
"Used to discover the number of processes on a node",
true, false, -1, &num_procs);
orte_process_info.num_local_procs = (orte_std_cntr_t)num_procs;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -37,18 +37,29 @@
static int rte_init(char flags);
static int rte_finalize(void);
static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_cnos_module = {
rte_init,
rte_finalize,
rte_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static cnos_nidpid_map_t *map;
static int rte_init(char flags)
{
int rc;
cnos_nidpid_map_t *map;
int nprocs;
/* Get our process information */
@ -66,14 +77,13 @@ static int rte_init(char flags)
/* Get the number of procs in the job from cnos */
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
/* Set the nodeid to the machine nid */
/* Get the nid map */
nprocs = cnos_get_nidpid_map(&map);
if (nprocs <= 0) {
opal_output(0, "%5d: cnos_get_nidpid_map() returned %d",
cnos_get_rank(), nprocs);
return ORTE_ERR_FATAL;
}
orte_process_info.nodeid = map[cnos_get_rank()].nid;
/* MPI_Init needs the grpcomm framework, so we have to init it */
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
@ -105,3 +115,39 @@ static void rte_abort(int status, bool report)
{
exit(status);
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (map[ORTE_PROC_MY_NAME->vpid].nid ==
map[proc->vpid].nid) {
return true;
}
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
return map[proc->vpid].nid;
}
static uint32_t proc_get_arch(orte_process_name_t *proc);
{
return 0;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
/* RHC: someone more familiar with CNOS needs to
* fix this to return the correct value
*/
return 0;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
/* RHC: someone more familiar with CNOS needs to
* fix this to return the correct value
*/
return 0;
}

128
orte/mca/ess/env/ess_env_module.c поставляемый
Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/threads/mutex.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
@ -42,6 +43,7 @@
#include "opal/util/os_path.h"
#include "opal/util/cmd_line.h"
#include "opal/util/malloc.h"
#include "opal/util/argv.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
@ -79,6 +81,12 @@ static int env_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
#if OPAL_ENABLE_FT == 1
static int rte_ft_event(int state);
static int ess_env_ft_event_update_process_info(orte_process_name_t proc, pid_t pid);
@ -88,6 +96,11 @@ orte_ess_base_module_t orte_ess_env_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
#if OPAL_ENABLE_FT == 1
rte_ft_event
#else
@ -95,6 +108,10 @@ orte_ess_base_module_t orte_ess_env_module = {
#endif
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
@ -122,8 +139,8 @@ static int rte_init(char flags)
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
@ -131,6 +148,17 @@ static int rte_init(char flags)
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
@ -146,6 +174,8 @@ error:
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
@ -158,9 +188,22 @@ static int rte_finalize(void)
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
@ -169,6 +212,83 @@ static int rte_finalize(void)
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s has arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
static int env_set_name(void)
{
char *jobid_str, *procid_str;

Просмотреть файл

@ -24,6 +24,9 @@
#ifndef ORTE_ESS_H
#define ORTE_ESS_H
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/mca.h"
BEGIN_C_DECLS
@ -72,6 +75,54 @@ typedef int (*orte_ess_base_module_finalize_fn_t)(void);
*/
typedef void (*orte_ess_base_module_abort_fn_t)(int status, bool report);
/**
* Determine if a process is local to me
*
* MPI procs need to know if a process is "local" or not - i.e.,
* if they share the same node. Different environments are capable
* of making that determination in different ways - e.g., they may
* provide a callable utility to return the answer, or download
* a map of information into each process. This API provides a
* means for each environment to do the "right thing".
*/
typedef bool (*orte_ess_base_module_proc_is_local_fn_t)(orte_process_name_t *proc);
/**
* Get the hostname where a proc resides
*
* MPI procs need to know the hostname where a specified proc resides.
* Different environments provide that info in different ways - e.g., they may
* provide a callable utility to return the answer, or download
* a map of information into each process. This API provides a
* means for each environment to do the "right thing".
*
* NOTE: To avoid memory waste, this function returns a pointer
* to a static storage. IT MUST NOT BE FREED!
*/
typedef char* (*orte_ess_base_module_proc_get_hostname_fn_t)(orte_process_name_t *proc);
/**
* Determine the arch of the node where a specified proc resides
*
* MPI procs need to know the arch being used by a specified proc.
* Different environments provide that info in different ways - e.g., they may
* provide a callable utility to return the answer, or download
* a map of information into each process. This API provides a
* means for each environment to do the "right thing".
*/
typedef uint32_t (*orte_ess_base_module_proc_get_arch_fn_t)(orte_process_name_t *proc);
/**
* Get the local rank of a remote process
*/
typedef uint8_t (*orte_ess_base_module_proc_get_local_rank_fn_t)(orte_process_name_t *proc);
/**
* Get the node rank of a remote process
*/
typedef uint8_t (*orte_ess_base_module_proc_get_node_rank_fn_t)(orte_process_name_t *proc);
/**
* Handle fault tolerance updates
*
@ -86,10 +137,15 @@ typedef int (*orte_ess_base_module_ft_event_fn_t)(int state);
* the standard module data structure
*/
struct orte_ess_base_module_1_0_0_t {
orte_ess_base_module_init_fn_t init;
orte_ess_base_module_finalize_fn_t finalize;
orte_ess_base_module_abort_fn_t abort;
orte_ess_base_module_ft_event_fn_t ft_event;
orte_ess_base_module_init_fn_t init;
orte_ess_base_module_finalize_fn_t finalize;
orte_ess_base_module_abort_fn_t abort;
orte_ess_base_module_proc_is_local_fn_t proc_is_local;
orte_ess_base_module_proc_get_hostname_fn_t proc_get_hostname;
orte_ess_base_module_proc_get_arch_fn_t proc_get_arch;
orte_ess_base_module_proc_get_local_rank_fn_t get_local_rank;
orte_ess_base_module_proc_get_node_rank_fn_t get_node_rank;
orte_ess_base_module_ft_event_fn_t ft_event;
};

Просмотреть файл

@ -77,6 +77,11 @@ orte_ess_base_module_t orte_ess_hnp_module = {
rte_init,
rte_finalize,
rte_abort,
NULL, /* don't need a proc_is_local fn */
NULL, /* don't need a proc_get_hostname fn */
NULL, /* don't need a proc_get_arch fn */
NULL, /* don't need a proc_get_local_rank fn */
NULL, /* don't need a proc_get_node_rank fn */
NULL /* ft_event */
};
@ -295,9 +300,8 @@ static int rte_init(char flags)
/* create and store a node object where we are */
node = OBJ_NEW(orte_node_t);
node->name = strdup(orte_process_info.nodename);
node->arch = orte_process_info.arch;
node->index = opal_pointer_array_add(orte_node_pool, node);
/* record our node */
orte_hnpnode = node;
/* create and store a proc object for us */
proc = OBJ_NEW(orte_proc_t);
@ -308,6 +312,7 @@ static int rte_init(char flags)
proc->state = ORTE_PROC_STATE_RUNNING;
OBJ_RETAIN(node); /* keep accounting straight */
proc->node = node;
proc->nodename = node->name;
opal_pointer_array_add(jdata->procs, proc);
/* record that the daemon (i.e., us) is on this node

Просмотреть файл

@ -33,6 +33,7 @@
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
@ -47,14 +48,29 @@ static int lsf_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_lsf_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
@ -80,14 +96,27 @@ static int rte_init(char flags)
goto error;
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
@ -103,7 +132,9 @@ error:
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
@ -115,9 +146,22 @@ static int rte_finalize(void)
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
*/
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
@ -126,6 +170,84 @@ static int rte_finalize(void)
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s has arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:lsf: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
static int lsf_set_name(void)
{
int rc;

Просмотреть файл

@ -23,6 +23,7 @@
#include <string.h>
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/util/name_fns.h"
@ -36,14 +37,26 @@
static int rte_init(char flags);
static int rte_finalize(void);
static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_portals_utcp_module = {
rte_init,
rte_finalize,
rte_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static char **nidmap=NULL;
static int rte_init(char flags)
{
int rc, i, len, num_procs;
@ -80,13 +93,9 @@ static int rte_init(char flags)
* : seperated list of nids, and the utcp reference implementation
* assumes all will be present
*/
len = strlen(nidmap_string);
num_procs = 1;
for (i = 0 ; i < len ; ++i) {
if (nidmap_string[i] == ':') num_procs++;
}
orte_process_info.num_procs = (orte_std_cntr_t) num_procs;
/* split the nidmap string */
nidmap = opal_argv_split(nidmap_string, ':');
orte_process_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap);
/* MPI_Init needs the grpcomm framework, so we have to init it */
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
@ -143,6 +152,9 @@ static int rte_init(char flags)
static int rte_finalize(void)
{
/* destruct the nidmap */
opal_argv_free(nidmap);
/* just cleanup the things we used */
orte_grpcomm_base_close();
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
@ -157,3 +169,41 @@ static void rte_abort(int status, bool report)
{
exit(status);
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (NULL != nidmap[proc->name.vpid] &&
NULL != nidmap[ORTE_PROC_MY_NAME->vpid] &&
0 == strcmp(nidmap[proc->name.vpid],
nidmap[ORTE_PROC_MY_NAME->vpid])) {
return true;
}
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
return nidmap[proc->name.vpid];
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
return 0;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
/* RHC: someone more familiar with CNOS needs to
* fix this to return the correct value
*/
return 0;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
/* RHC: someone more familiar with CNOS needs to
* fix this to return the correct value
*/
return 0;
}

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/util/show_help.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/installdirs/installdirs.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
@ -65,14 +66,29 @@ static void set_handler_default(int sig)
}
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_singleton_module = {
rte_init,
orte_ess_base_app_finalize,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int rc;
@ -118,6 +134,7 @@ static int rte_init(char flags)
}
orte_process_info.num_procs = 1;
/* since we are a singleton, then we must have a local_rank of 0
* and only 1 local process
*/
@ -135,9 +152,48 @@ static int rte_init(char flags)
* library wrt pty's and stdin
*/
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 1,
INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (rc = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* deconstruct my nidmap arrays */
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
#define ORTE_URI_MSG_LGTH 256
@ -338,3 +394,52 @@ static int fork_hnp(void)
return ORTE_SUCCESS;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
return pmap[proc->vpid].node_rank;
}

Просмотреть файл

@ -32,6 +32,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "opal/util/show_help.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
@ -47,16 +48,30 @@ static int slurm_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_slurm_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
@ -82,14 +97,26 @@ static int rte_init(char flags)
goto error;
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
@ -105,7 +132,9 @@ error:
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
@ -117,9 +146,22 @@ static int rte_finalize(void)
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process, so
* use that default procedure
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
@ -128,6 +170,83 @@ static int rte_finalize(void)
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has arch %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
static int slurm_set_name(void)
{
int slurm_nodeid;

Просмотреть файл

@ -51,6 +51,11 @@ orte_ess_base_module_t orte_ess_tool_module = {
rte_init,
orte_ess_base_tool_finalize,
rte_abort,
NULL, /* don't need a local procs fn */
NULL, /* don't need a proc_get_hostname fn */
NULL, /* don't need a proc_get_arch fn */
NULL, /* don't need a proc_get_local_rank fn */
NULL, /* don't need a proc_get_node_rank fn */
NULL /* ft_event */
};

Просмотреть файл

@ -79,8 +79,7 @@ static int daemon_collective(orte_jobid_t jobid,
orte_std_cntr_t num_local_contributors,
orte_grpcomm_coll_t type,
opal_buffer_t *data,
orte_rmaps_dp_t flag,
opal_value_array_t *participants);
bool hnp_has_local_procs);
static int update_trees(void);
/* Module def */
@ -1116,8 +1115,7 @@ static int daemon_leader(orte_jobid_t jobid,
orte_std_cntr_t num_local_contributors,
orte_grpcomm_coll_t type,
opal_buffer_t *data,
orte_rmaps_dp_t flag,
opal_value_array_t *participants)
bool hnp_has_local_procs)
{
int rc;
opal_buffer_t buf;
@ -1127,13 +1125,13 @@ static int daemon_leader(orte_jobid_t jobid,
"%s grpcomm:basic daemon_collective - I am the leader!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_RMAPS_ALL_DAEMONS == flag) {
if (hnp_has_local_procs) {
/* if everyone is participating, then I must be the HNP,
* so the #children is just the #children determined for
* my outgoing xcast
*/
num_children = my_num_children;
} else if (ORTE_RMAPS_ALL_EXCEPT_HNP == flag) {
} else {
/* if the HNP has no local procs, then it won't
* know that a collective is underway, so that means
* I must be rank=1. The number of messages I must get
@ -1148,18 +1146,8 @@ static int daemon_leader(orte_jobid_t jobid,
* my peers sending to me, plus my own children
*/
num_children = num_children - 1 + my_num_children;
} else if (ORTE_RMAPS_DAEMON_SUBSET == flag) {
/* for this first cut, all members will send to me direct,
* so the #children I should hear from is just the
* size of the value array - 1
*/
num_children = opal_value_array_get_size(participants) - 1;
} else {
/* no idea */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* setup to recv the messages from my children */
collective_num_recvd = 0;
collective_failed = false;
@ -1240,44 +1228,35 @@ static int daemon_collective(orte_jobid_t jobid,
orte_std_cntr_t num_local_contributors,
orte_grpcomm_coll_t type,
opal_buffer_t *data,
orte_rmaps_dp_t flag,
opal_value_array_t *participants)
bool hnp_has_local_procs)
{
orte_process_name_t lead, parent;
orte_vpid_t *vptr;
int num_children;
opal_buffer_t buf;
int rc;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
"%s grpcomm:basic daemon_collective entered with dp flag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)flag));
"%s grpcomm:basic daemon_collective entered - %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
hnp_has_local_procs ? "HNP HAS LOCAL PROCS" : "HNP DOES NOT HAVE LOCAL PROCS"));
parent.jobid = ORTE_PROC_MY_NAME->jobid;
lead.jobid = ORTE_PROC_MY_NAME->jobid;
/* if the participation is full, then the HNP is the lead */
if (ORTE_RMAPS_ALL_DAEMONS == flag) {
if (hnp_has_local_procs) {
lead.vpid = ORTE_PROC_MY_HNP->vpid;
} else if (ORTE_RMAPS_ALL_EXCEPT_HNP == flag) {
} else {
/* if the HNP has no local procs, then it won't
* know that a collective is underway, so let
* rank=1 be the lead
*/
lead.vpid = 1;
} else if (ORTE_RMAPS_DAEMON_SUBSET == flag) {
/* let the first proc in the array be the lead */
vptr = (orte_vpid_t*)opal_value_array_get_item(participants, 0);
lead.vpid = *vptr;
} else {
/* no idea */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* if I am the lead, do my own thing */
if (ORTE_PROC_MY_NAME->vpid == lead.vpid) {
return daemon_leader(jobid, num_local_contributors, type, data, flag, participants);
return daemon_leader(jobid, num_local_contributors, type, data, hnp_has_local_procs);
}
@ -1285,13 +1264,13 @@ static int daemon_collective(orte_jobid_t jobid,
* I need to collect messages from and who my parent will be
*/
if (ORTE_RMAPS_ALL_DAEMONS == flag) {
if (hnp_has_local_procs) {
/* everyone is participating, so my parent and
* num_children can be as initially computed
*/
parent.vpid = my_parent.vpid;
num_children = my_num_children;
} else if (ORTE_RMAPS_ALL_EXCEPT_HNP == flag) {
} else {
/* if the HNP has no local procs, then it won't
* know that a collective is underway, so we need
* to send to rank=1 if our parent would have been
@ -1305,14 +1284,6 @@ static int daemon_collective(orte_jobid_t jobid,
parent.vpid = my_parent.vpid;
}
num_children = my_num_children;
} else if (ORTE_RMAPS_DAEMON_SUBSET == flag) {
/* regardless of mode, we always send direct */
num_children = 0;
parent.vpid = lead.vpid;
} else {
/* no idea */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,

Просмотреть файл

@ -79,8 +79,7 @@ typedef int (*orte_grpcomm_base_module_daemon_collective_fn_t)(orte_jobid_t jobi
orte_std_cntr_t num_local_contributors,
orte_grpcomm_coll_t type,
opal_buffer_t *data,
orte_rmaps_dp_t flag,
opal_value_array_t *participants);
bool hnp_has_local_procs);
/* update the xcast trees - called after a change to the number of daemons
* in the system

Просмотреть файл

@ -45,7 +45,10 @@ int orte_odls_base_close(void)
OBJ_DESTRUCT(&orte_odls_globals.cond);
OBJ_DESTRUCT(&orte_odls_globals.children);
OBJ_DESTRUCT(&orte_odls_globals.jobs);
if (NULL != orte_odls_globals.dmap && NULL != orte_odls_globals.dmap->bytes) {
free(orte_odls_globals.dmap->bytes);
free(orte_odls_globals.dmap);
}
nodes = (char**)orte_daemonmap.addr;
for (i=0; i < orte_daemonmap.size; i++) {
if (NULL != nodes[i]) {

Просмотреть файл

@ -55,6 +55,7 @@
#include "orte/util/name_fns.h"
#include "orte/util/session_dir.h"
#include "orte/util/proc_info.h"
#include "orte/util/nidmap.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
@ -67,6 +68,8 @@
#include "orte/mca/odls/base/odls_private.h"
static int8_t *app_idx;
static char **slot_str;
/* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN
* THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW
@ -74,19 +77,38 @@
int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
orte_jobid_t job)
{
orte_node_t **nodes, *node;
orte_proc_t **procs, *proc;
int rc;
orte_job_t *jdata;
orte_job_map_t *map;
orte_std_cntr_t i;
orte_vpid_t j;
orte_vpid_t invalid_vpid=ORTE_VPID_INVALID;
char *nodename;
opal_buffer_t *wireup;
opal_byte_object_t bo, *boptr;
int32_t numbytes;
/* get the job data pointer */
if (NULL == (jdata = orte_get_job_data_object(job))) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* get a pointer to the job map */
map = jdata->map;
/* construct a nodemap */
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* store it */
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
return rc;
}
/* release the data since it has now been copied into our buffer */
free(bo.bytes);
/* get wireup info for daemons per the selected routing module */
wireup = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(ORTE_PROC_MY_NAME->jobid, wireup))) {
@ -124,36 +146,18 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
}
OBJ_RELEASE(wireup);
/* get the job data pointer */
if (NULL == (jdata = orte_get_job_data_object(job))) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* pack the jobid so it can be extracted later */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &job, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of procs in the job - equates to the vpid range for the job */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_procs, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the total slots allocated to us */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->total_slots_alloc, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the override_oversubscribed flag */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->oversubscribe_override, 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of app_contexts for this job */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_apps, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
@ -168,143 +172,38 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
return rc;
}
/* the remainder of our required info is in the node objects in this job's map,
* so pickup a pointer to that map
*/
map = jdata->map;
/* pack the flag indicating daemon participation in this launch */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->daemon_participation, 1, ORTE_RMAPS_DP_T))) {
/* encode the pidmap */
if (ORTE_SUCCESS != (rc = orte_util_encode_pidmap(jdata, &bo))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of nodes participating in this launch */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->num_nodes, 1, ORTE_STD_CNTR))) {
/* store it */
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* cycle through the participating nodes */
nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->num_nodes; i++) {
node = nodes[i];
/* PACK NODE-SPECIFIC DATA */
/* pack the vpid of the daemon on this node - this will be
* later used to tell the daemon it has something to do
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &(node->daemon->name.vpid), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we are not keeping FQDN hostnames, abbreviate
* the nodename as required
*/
if (!orte_keep_fqdn_hostnames) {
char *ptr;
nodename = strdup(node->name);
if (NULL != (ptr = strchr(nodename, '.'))) {
*ptr = '\0';
}
} else {
nodename = strdup(node->name);
}
/* pack the nodename so that all daemons know where this one is located */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(nodename);
return rc;
}
free(nodename);
/* pack the number of procs on this node */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &node->num_procs, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the oversubscribed flag for the node */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &node->oversubscribed, 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* PACK THE PROC-SPECIFIC DATA FOR THE PROCS **TO BE LAUNCHED** ON THIS NODE
* FOR THIS JOB
*
* NOTE: The nodes object contains info on ALL procs on the node, not just those
* to be launched for the specified job. Thus, we must take care to CLEARLY
* demarcate info on those procs to be launched, or else we will get
* duplicate processes!!
*/
/* we already packed the number of procs on the node, so cycle
* through them and pack each one's launch data
*/
procs = (orte_proc_t**)node->procs->addr;
for (j=0; j < node->num_procs; j++) {
proc = procs[j]; /* convenience */
/* the mapped node includes ALL procs on it, not just those for the
* job to be launched. Hence, check first to see if this proc is
* part of the indicated job - if not, don't include it here
*/
if (proc->name.jobid != job) {
continue;
}
/* pack the vpid for this proc */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &(proc->name.vpid), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the app_context index */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->app_idx, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the local rank */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->local_rank, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the cpu_list string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &proc->slot_list, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* pack an INVALID vpid as a flag that we are done with procs for this node */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &invalid_vpid, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return ORTE_ERR_OUT_OF_RESOURCE;
}
OBJ_RELEASE(wireup);
return rc;
}
/* release the data since it has now been copied into our buffer */
free(bo.bytes);
return ORTE_SUCCESS;
}
int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
orte_jobid_t *job,
orte_std_cntr_t *num_local_procs,
orte_vpid_t *vpid_range,
orte_std_cntr_t *total_slots_alloc,
bool *node_included,
bool *oversubscribed,
bool *override_oversubscribed,
orte_std_cntr_t *num_contexts,
orte_app_context_t ***app_contexts)
orte_jobid_t *job)
{
int rc;
orte_vpid_t local_rank, num_procs;
int rc, ret;
orte_vpid_t j;
orte_odls_child_t *child;
orte_std_cntr_t cnt, j, num_nodes, app_idx;
orte_std_cntr_t cnt;
orte_process_name_t proc, daemon;
char *slot_str, *nodename;
bool node_oversubscribed;
orte_odls_job_t *jobdat;
opal_buffer_t wireup;
opal_byte_object_t *bo;
int32_t numbytes;
orte_nid_t *node;
opal_buffer_t alert;
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:constructing child list",
@ -318,18 +217,29 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
/* set the default values since they may not be included in the data */
*job = ORTE_JOBID_INVALID;
*num_local_procs = 0;
*vpid_range = ORTE_VPID_INVALID;
*total_slots_alloc = 0;
*node_included = false;
*oversubscribed = false;
*override_oversubscribed = false;
/* extract the byte object holding the daemonmap */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
/* retain a copy for downloading to child processes */
opal_dss.copy((void**)&orte_odls_globals.dmap, bo, OPAL_BYTE_OBJECT);
/* construct the daemon map, if required - the decode function
* knows what to do - it will also free the bytes in the bo
*/
if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(bo, &orte_daemonmap))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
/* unpack the #bytes of daemon wireup info in the message */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &numbytes, &cnt, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* any bytes there? */
if (0 < numbytes) {
@ -337,7 +247,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* load it into a buffer */
OBJ_CONSTRUCT(&wireup, opal_buffer_t);
@ -346,7 +256,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
if (ORTE_SUCCESS != (rc = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &wireup))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&wireup);
return rc;
goto REPORT_ERROR;
}
/* done with the buffer - dump it */
OBJ_DESTRUCT(&wireup);
@ -356,213 +266,162 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* setup jobdat object for this job */
jobdat = OBJ_NEW(orte_odls_job_t);
jobdat->jobid = *job;
opal_list_append(&orte_odls_globals.jobs, &jobdat->super);
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:construct_child_list unpacking data to launch job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
/* setup jobdat object for this job */
jobdat = OBJ_NEW(orte_odls_job_t);
jobdat->jobid = *job;
opal_list_append(&orte_odls_globals.jobs, &jobdat->super);
/* UNPACK JOB-SPECIFIC DATA */
/* unpack the number of procs in the job - equates to the vpid range */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, vpid_range, &cnt, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the total slots allocated to us */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, total_slots_alloc, &cnt, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->total_slots_alloc, &cnt, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the override_oversubscribed flag */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, override_oversubscribed, &cnt, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* unpack the number of app_contexts for this job */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, num_contexts, &cnt, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_apps, &cnt, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:construct_child_list unpacking %ld app_contexts",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)*num_contexts));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)jobdat->num_apps));
/* allocate space and unpack the app_contexts for this job - the HNP checked
* that there must be at least one, so don't bother checking here again
*/
*app_contexts = (orte_app_context_t**)malloc(*num_contexts * sizeof(orte_app_context_t*));
if (NULL == *app_contexts) {
jobdat->apps = (orte_app_context_t**)malloc(jobdat->num_apps * sizeof(orte_app_context_t*));
if (NULL == jobdat->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
goto REPORT_ERROR;
}
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, *app_contexts, num_contexts, ORTE_APP_CONTEXT))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, jobdat->apps, &jobdat->num_apps, ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* UNPACK THE JOB MAP DATA */
/* unpack the flag indicating daemon participation in this launch */
/* unpack the pidmap byte object */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->dp, &cnt, ORTE_RMAPS_DP_T))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* unpack the number of nodes participating in this launch */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &num_nodes, &cnt, ORTE_STD_CNTR))) {
/* retain a copy for downloading to child processes */
opal_dss.copy((void**)&jobdat->pmap, bo, OPAL_BYTE_OBJECT);
/* decode the pidmap - this will also free the bytes in bo */
if (ORTE_SUCCESS != (rc = orte_util_decode_pidmap(bo, &jobdat->num_procs, &jobdat->procmap, &app_idx, &slot_str))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* set the size of the daemonmap to minimize realloc's */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_daemonmap, num_nodes))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* setup the proc and daemon names */
/* cycle through the procs and find mine */
proc.jobid = *job;
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
/* cycle through them */
for (j=0; j < num_nodes; j++) {
/* unpack the vpid of the daemon */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &daemon.vpid, &cnt, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
for (j=0; j < jobdat->num_procs; j++) {
proc.vpid = j;
/* ident this proc's node */
node = (orte_nid_t*)orte_daemonmap.addr[jobdat->procmap[j].node];
/* is this proc on the HNP? */
if (0 == jobdat->procmap[j].node) {
jobdat->hnp_has_local_procs = true;
}
/* unpack the name of the node so we know where this daemon is located */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &nodename, &cnt, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* is this daemon already known to us? */
if (NULL == opal_pointer_array_get_item(&orte_daemonmap, daemon.vpid)) {
/* record it */
if (ORTE_SUCCESS != (opal_pointer_array_set_item(&orte_daemonmap, daemon.vpid, strdup(nodename)))) {
/* does this data belong to us? */
if ((int32_t)ORTE_PROC_MY_NAME->vpid == jobdat->procmap[j].node) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:constructing child list - found proc %s for me!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j)));
/* keep tabs of the number of local procs */
jobdat->num_local_procs++;
/* add this proc to our child list */
child = OBJ_NEW(orte_odls_child_t);
/* copy the name to preserve it */
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
}
/* if daemon participation is sparse, add this daemon to the
* list of those participating
*/
if (ORTE_RMAPS_DAEMON_SUBSET == jobdat->dp) {
opal_value_array_append_item(&jobdat->daemons, &daemon.vpid);
}
/* unpack the number of procs on this node */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &num_procs, &cnt, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the oversubscribed flag for the node */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &node_oversubscribed, &cnt, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* cycle through the procs and unpack their data */
/* unpack the vpid for this proc */
cnt=1;
while (ORTE_SUCCESS == (rc = opal_dss.unpack(data, &(proc.vpid), &cnt, ORTE_VPID))) {
if (ORTE_VPID_INVALID == proc.vpid) {
/* this flags that all data from this node has been read - there
* will be no further entries for it
*/
break;
child->app_idx = app_idx[j]; /* save the index into the app_context objects */
child->local_rank = jobdat->procmap[j].local_rank; /* save the local_rank */
if (NULL != slot_str && NULL != slot_str[j]) {
child->slot_list = strdup(slot_str[j]);
}
/* unpack the app_context index */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &app_idx, &cnt, ORTE_STD_CNTR))) {
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
opal_list_append(&orte_odls_globals.children, &child->super);
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
/* set the routing info to be direct - we need to do this
* prior to launch as the procs may want to communicate right away
*/
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &proc))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* unpack the local rank */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &local_rank, &cnt, ORTE_VPID))) {
} else {
/* set the routing info through the other daemon - we need to do this
* prior to launch as the procs may want to communicate right away
*/
daemon.vpid = jobdat->procmap[j].node;
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &daemon))) {
ORTE_ERROR_LOG(rc);
return rc;
goto REPORT_ERROR;
}
/* unpack the cpu_list string */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &slot_str, &cnt, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* does this data belong to us? */
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
/* yes it does - add this proc to our child list */
child = OBJ_NEW(orte_odls_child_t);
/* copy the name to preserve it */
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
child->app_idx = app_idx; /* save the index into the app_context objects */
child->local_rank = local_rank; /* save the local_rank */
if (NULL != slot_str) {
child->slot_list = strdup(slot_str);
free(slot_str);
}
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
opal_list_append(&orte_odls_globals.children, &child->super);
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
/* pass along the local info */
*num_local_procs = num_procs;
*oversubscribed = node_oversubscribed;
/* set the routing info to be direct - we need to do this
* prior to launch as the procs may want to communicate right away
*/
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &proc))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else {
/* set the routing info through the other daemon - we need to do this
* prior to launch as the procs may want to communicate right away
*/
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &daemon))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
if (ORTE_SUCCESS != rc && ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
}
/* do we have any launching to do? */
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
/* pass that along for later */
*node_included = true;
}
}
if (NULL != app_idx) {
free(app_idx);
app_idx = NULL;
}
if (NULL != slot_str) {
for (j=0; j < jobdat->num_procs; j++) {
free(slot_str[j]);
}
free(slot_str);
slot_str = NULL;
}
return ORTE_SUCCESS;
REPORT_ERROR:
/* we have to report an error back to the HNP so we don't just
* hang. Although there shouldn't be any errors once this is
* all debugged, it is still good practice to have a way
* for it to happen - especially so developers don't have to
* deal with the hang!
*/
OBJ_CONSTRUCT(&alert, opal_buffer_t);
*job = ORTE_JOBID_INVALID;
opal_dss.pack(&alert, job, 1, ORTE_JOBID);
/* if we are the HNP, then we would rather not send this to ourselves -
* instead, we queue it up for local processing
*/
if (orte_process_info.hnp) {
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
orte_plm_base_app_report_launch);
} else {
/* go ahead and send the update to the HNP */
if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &alert, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, 0))) {
ORTE_ERROR_LOG(ret);
}
}
OBJ_DESTRUCT(&alert);
return rc;
}
static int odls_base_default_setup_fork(orte_app_context_t *context,
orte_std_cntr_t num_local_procs,
uint8_t num_local_procs,
orte_vpid_t vpid_range,
orte_std_cntr_t total_slots_alloc,
bool oversubscribed, char ***environ_copy)
@ -686,18 +545,6 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
opal_setenv(param, orte_process_info.my_hnp_uri, true, environ_copy);
free(param);
/* pass our vpid to the process as a "nodeid" so it can
* identify which procs are local to it
*/
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&param2, ORTE_PROC_MY_NAME->vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
param = mca_base_param_environ_variable("orte","nodeid",NULL);
opal_setenv(param, param2, true, environ_copy);
free(param);
free(param2);
/* setup yield schedule and processor affinity
* We default here to always setting the affinity processor if we want
* it. The processor affinity system then determines
@ -738,11 +585,6 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
opal_setenv("OMPI_UNIVERSE_SIZE", param2, true, environ_copy);
free(param2);
/* use same nodename as the starting daemon (us) */
param = mca_base_param_environ_variable("orte", "base", "nodename");
opal_setenv(param, orte_process_info.nodename, true, environ_copy);
free(param);
/* push data into environment - don't push any single proc
* info, though. We are setting the environment up on a
* per-context basis, and will add the individual proc
@ -819,18 +661,12 @@ static int pack_state_update(opal_buffer_t *alert, bool pack_pid, orte_jobid_t j
int orte_odls_base_default_launch_local(orte_jobid_t job,
orte_std_cntr_t num_apps,
orte_app_context_t **apps,
orte_std_cntr_t num_local_procs,
orte_vpid_t vpid_range,
orte_std_cntr_t total_slots_alloc,
bool node_oversubscribed,
bool override_oversubscribed,
orte_odls_base_fork_local_proc_fn_t fork_local)
{
char *job_str, *vpid_str, *param, *value;
opal_list_item_t *item;
orte_app_context_t *app;
orte_app_context_t *app, **apps;
orte_std_cntr_t num_apps;
orte_odls_child_t *child=NULL;
int i, num_processors, int_value;
bool want_processor, oversubscribed;
@ -843,6 +679,26 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
/* protect operations involving the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
/* find the jobdat for this job */
jobdat = NULL;
for (item = opal_list_get_first(&orte_odls_globals.jobs);
item != opal_list_get_end(&orte_odls_globals.jobs);
item = opal_list_get_next(item)) {
jobdat = (orte_odls_job_t*)item;
/* is this the specified job? */
if (jobdat->jobid == job) {
break;
}
}
if (NULL == jobdat) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto CLEANUP;
}
apps = jobdat->apps;
num_apps = jobdat->num_apps;
#if OPAL_ENABLE_FT == 1
/*
* Notify the local SnapC component regarding new job
@ -861,14 +717,11 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
NULL != apps[i]->preload_files) {
if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(apps[i])) ) {
ORTE_ERROR_LOG(rc);
goto unlock;
goto CLEANUP;
}
}
}
/* default oversubscribe to what the mapper told us */
oversubscribed = node_oversubscribed;
/* setup for processor affinity. If there are enough physical processors on this node, then
* we indicate which processor each process should be assigned to, IFF the user has requested
* processor affinity be used - the paffinity subsystem will make that final determination. All
@ -879,6 +732,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
* settings
*/
want_processor = false; /* default to not being a hog */
oversubscribed = true;
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch could not get number of processors - using conservative settings",
@ -890,38 +744,26 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
"%s odls:launch got %ld processors",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_processors));
/* only do this if we can actually get info on the number of processors */
/* grab a processor if we can */
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
want_processor = false;
} else {
want_processor = true;
}
/* now let's deal with the oversubscribed flag - and the use-case where a hostfile or some
* other non-guaranteed-accurate method was used to inform us about our allocation. Since
* the information on the number of slots on this node could have been incorrect, we need
* to check it against the local number of processors to ensure we don't overload them
*/
if (override_oversubscribed) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch overriding oversubscription",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
/* if the #procs > #processors, declare us oversubscribed regardless
* of what the mapper claimed - the user may have told us something
* incorrect
*/
oversubscribed = true;
} else {
/* likewise, if there are more processors here than we were told,
* declare us to not be oversubscribed so we can be aggressive. This
* covers the case where the user didn't tell us anything about the
* number of available slots, so we defaulted to a value of 1
*/
oversubscribed = false;
}
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
/* if the #procs > #processors, declare us oversubscribed regardless
* of what the mapper claimed - the user may have told us something
* incorrect
*/
oversubscribed = true;
} else {
/* likewise, if there are more processors here than we were told,
* declare us to not be oversubscribed so we can be aggressive. This
* covers the case where the user didn't tell us anything about the
* number of available slots, so we defaulted to a value of 1
*/
oversubscribed = false;
}
}
@ -932,31 +774,13 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
/* setup to report the proc state to the HNP */
OBJ_CONSTRUCT(&alert, opal_buffer_t);
/* find the jobdat for this job */
jobdat = NULL;
for (item = opal_list_get_first(&orte_odls_globals.jobs);
item != opal_list_get_end(&orte_odls_globals.jobs);
item = opal_list_get_next(item)) {
jobdat = (orte_odls_job_t*)item;
/* is this the specified job? */
if (jobdat->jobid == job) {
break;
}
}
if (NULL == jobdat) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto unlock;
}
/* setup the environment for each context */
for (i=0; i < num_apps; i++) {
if (ORTE_SUCCESS != (rc = odls_base_default_setup_fork(apps[i],
num_local_procs,
vpid_range,
total_slots_alloc,
jobdat->num_local_procs,
jobdat->num_procs,
jobdat->total_slots_alloc,
oversubscribed,
&apps[i]->env))) {
@ -1203,7 +1027,6 @@ CLEANUP:
}
OBJ_DESTRUCT(&alert);
unlock:
if (!launch_failed) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch setting waitpids",
@ -1390,12 +1213,13 @@ static int pack_child_contact_info(orte_jobid_t job, opal_buffer_t *buf)
}
int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t *buf)
int orte_odls_base_default_require_sync(orte_process_name_t *proc,
opal_buffer_t *buf,
bool drop_nidmap)
{
opal_buffer_t buffer;
opal_list_item_t *item;
orte_odls_child_t *child;
int8_t dummy;
orte_std_cntr_t cnt;
int rc;
bool found=false;
@ -1439,9 +1263,14 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t
/* setup jobdat object for its job so daemon collectives work */
jobdat = OBJ_NEW(orte_odls_job_t);
jobdat->jobid = proc->jobid;
jobdat->dp = ORTE_RMAPS_ALL_DAEMONS;
if (orte_process_info.hnp) {
jobdat->hnp_has_local_procs = true;
}
jobdat->procmap = (orte_pmap_t*)malloc(sizeof(orte_pmap_t));
jobdat->procmap[0].node = ORTE_PROC_MY_NAME->vpid;
jobdat->procmap[0].local_rank = 0;
jobdat->procmap[0].node_rank = opal_list_get_size(&orte_odls_globals.children);
opal_list_append(&orte_odls_globals.jobs, &jobdat->super);
}
/* if the contact info is already set, then we are "de-registering" the child
@ -1462,17 +1291,41 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t
/* ack the call */
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
opal_dss.pack(&buffer, &dummy, 1, OPAL_INT8); /* put anything in */
/* do they want the nidmap? */
if (drop_nidmap) {
/* get the jobdata object */
for (item = opal_list_get_first(&orte_odls_globals.jobs);
item != opal_list_get_end(&orte_odls_globals.jobs);
item = opal_list_get_next(item)) {
jobdat = (orte_odls_job_t*)item;
if (jobdat->jobid == child->name->jobid) {
break;
}
}
if (NULL == jobdat) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:sync nidmap requested for job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jobdat->jobid)));
/* the proc needs a copy of both the daemon/node map, and
* the process map for its peers
*/
opal_dss.pack(&buffer, &orte_odls_globals.dmap, 1, OPAL_BYTE_OBJECT);
opal_dss.pack(&buffer, &jobdat->pmap, 1, OPAL_BYTE_OBJECT);
}
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls: sending sync ack to child %s",
"%s odls: sending sync ack to child %s with %ld bytes of data",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
ORTE_NAME_PRINT(proc), (long)buffer.bytes_used));
if (0 > (rc = orte_rml.send_buffer(proc, &buffer, ORTE_RML_TAG_SYNC, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buffer);
return rc;
goto CLEANUP;
}
OBJ_DESTRUCT(&buffer);
@ -2133,6 +1986,16 @@ int orte_odls_base_default_collect_data(orte_process_name_t *proc,
* alive
*/
child->alive = true;
/* setup a jobdat for it */
jobdat = OBJ_NEW(orte_odls_job_t);
jobdat->jobid = child->name->jobid;
if (orte_process_info.hnp) {
jobdat->hnp_has_local_procs = true;
}
jobdat->procmap = (orte_pmap_t*)malloc(sizeof(orte_pmap_t));
jobdat->procmap[0].node = ORTE_PROC_MY_NAME->vpid;
jobdat->procmap[0].local_rank = 0;
opal_list_append(&orte_odls_globals.jobs, &jobdat->super);
}
/* unpack the collective type */
@ -2182,7 +2045,7 @@ int orte_odls_base_default_collect_data(orte_process_name_t *proc,
if (ORTE_SUCCESS != (rc = orte_grpcomm.daemon_collective(proc->jobid, num_local_contributors,
collective_type, collection_bucket,
jobdat->dp, &jobdat->daemons))) {
jobdat->hnp_has_local_procs))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -67,7 +67,6 @@ static void orte_odls_child_constructor(orte_odls_child_t *ptr)
*/
ptr->state = ORTE_PROC_STATE_FAILED_TO_START;
ptr->exit_code = 0;
ptr->cpu_set = 0xffffffff;
ptr->rml_uri = NULL;
ptr->slot_list = NULL;
}
@ -82,17 +81,39 @@ OBJ_CLASS_INSTANCE(orte_odls_child_t,
orte_odls_child_constructor,
orte_odls_child_destructor);
/* instance the job list object */
static void orte_odls_job_constructor(orte_odls_job_t *ptr)
{
ptr->jobid = ORTE_JOBID_INVALID;
ptr->dp = 0;
OBJ_CONSTRUCT(&ptr->daemons, opal_value_array_t);
opal_value_array_init(&ptr->daemons, sizeof(orte_vpid_t));
ptr->apps = NULL;
ptr->num_apps = 0;
ptr->total_slots_alloc = 0;
ptr->num_procs = 0;
ptr->num_local_procs = 0;
ptr->hnp_has_local_procs = false;
ptr->procmap = NULL;
ptr->pmap = NULL;
}
static void orte_odls_job_destructor(orte_odls_job_t *ptr)
{
OBJ_DESTRUCT(&ptr->daemons);
orte_std_cntr_t i;
if (NULL != ptr->apps) {
for (i=0; i < ptr->num_apps; i++) {
OBJ_RELEASE(ptr->apps[i]);
}
if (NULL != ptr->apps) {
free(ptr->apps);
}
}
if (NULL != ptr->procmap) {
free(ptr->procmap);
}
if (NULL != ptr->pmap && NULL != ptr->pmap->bytes) {
free(ptr->pmap->bytes);
free(ptr->pmap);
}
}
OBJ_CLASS_INSTANCE(orte_odls_job_t,
opal_list_item_t,
@ -124,7 +145,8 @@ int orte_odls_base_open(void)
OBJ_CONSTRUCT(&orte_odls_globals.cond, opal_condition_t);
OBJ_CONSTRUCT(&orte_odls_globals.children, opal_list_t);
OBJ_CONSTRUCT(&orte_odls_globals.jobs, opal_list_t);
orte_odls_globals.dmap = NULL;
/* initialize and setup the daemonmap */
OBJ_CONSTRUCT(&orte_daemonmap, opal_pointer_array_t);
opal_pointer_array_init(&orte_daemonmap, 8, INT32_MAX, 8);

Просмотреть файл

@ -61,21 +61,30 @@ typedef struct {
bool coll_recvd; /* collective operation recvd */
orte_proc_state_t state; /* the state of the process */
orte_exit_code_t exit_code; /* process exit code */
unsigned long cpu_set;
char *rml_uri; /* contact info for this child */
char *slot_list; /* list of slots for this child */
} orte_odls_child_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t);
typedef struct {
/*
* List object to locally store job related info
*/
typedef struct orte_odls_job_t {
opal_list_item_t super; /* required to place this on a list */
orte_jobid_t jobid; /* jobid for this job */
orte_rmaps_dp_t dp; /* daemon participation for this job */
opal_value_array_t daemons; /* vpids of participating daemons */
orte_jobid_t jobid; /* jobid for this data */
orte_app_context_t **apps; /* app_contexts for this job */
orte_std_cntr_t num_apps; /* number of app_contexts */
orte_std_cntr_t total_slots_alloc;
orte_vpid_t num_procs;
uint8_t num_local_procs;
bool hnp_has_local_procs;
orte_pmap_t *procmap; /* map of procs/node, local ranks */
opal_byte_object_t *pmap; /* byte object version of procmap */
} orte_odls_job_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_job_t);
typedef struct {
/** Verbose/debug output stream */
int output;
@ -87,8 +96,10 @@ typedef struct {
opal_condition_t cond;
/* list of children for this orted */
opal_list_t children;
/* list of jobs for this orted */
/* list of job data for this orted */
opal_list_t jobs;
/* byte object to store daemon map for later xmit to procs */
opal_byte_object_t *dmap;
} orte_odls_globals_t;
ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals;
@ -105,15 +116,7 @@ orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
ORTE_DECLSPEC int
orte_odls_base_default_construct_child_list(opal_buffer_t *data,
orte_jobid_t *job,
orte_std_cntr_t *num_local_procs,
orte_vpid_t *vpid_range,
orte_std_cntr_t *total_slots_allocated,
bool *node_included,
bool *oversubscribed,
bool *override_oversubscribed,
orte_std_cntr_t *num_contexts,
orte_app_context_t ***app_contexts);
orte_jobid_t *job);
/* define a function that will fork a local proc */
typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context,
@ -122,13 +125,6 @@ typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context,
ORTE_DECLSPEC int
orte_odls_base_default_launch_local(orte_jobid_t job,
orte_std_cntr_t num_apps,
orte_app_context_t **apps,
orte_std_cntr_t num_local_procs,
orte_vpid_t vpid_range,
orte_std_cntr_t total_slots_allocated,
bool oversubscribed,
bool override_oversubscribed,
orte_odls_base_fork_local_proc_fn_t fork_local);
ORTE_DECLSPEC int
@ -154,7 +150,9 @@ orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
orte_odls_base_kill_local_fn_t kill_local,
orte_odls_base_child_died_fn_t child_died);
ORTE_DECLSPEC int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_buffer_t *buf);
ORTE_DECLSPEC int orte_odls_base_default_require_sync(orte_process_name_t *proc,
opal_buffer_t *buffer,
bool drop_nidmap);
/*
* Preload binary/files functions

Просмотреть файл

@ -348,45 +348,18 @@ static int odls_default_fork_local_proc(
int orte_odls_default_launch_local_procs(opal_buffer_t *data)
{
int rc;
orte_std_cntr_t total_slots_alloc, num_local_procs;
orte_jobid_t job;
orte_vpid_t range;
bool node_included;
bool override_oversubscribed;
bool oversubscribed;
orte_std_cntr_t i, num_contexts;
orte_app_context_t **app_contexts;
/* construct the list of children we are to launch */
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job,
&num_local_procs,
&range,
&total_slots_alloc,
&node_included,
&oversubscribed,
&override_oversubscribed,
&num_contexts,
&app_contexts))) {
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:default:launch:local failed to construct child list on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
goto CLEANUP;
}
/* if there is nothing for us to do, just return */
if (!node_included) {
rc = ORTE_SUCCESS;
goto CLEANUP;
}
/* launch the local procs */
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job,
num_contexts, app_contexts,
num_local_procs,
range, total_slots_alloc,
oversubscribed,
override_oversubscribed,
odls_default_fork_local_proc))) {
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, odls_default_fork_local_proc))) {
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:default:launch:local failed to launch on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
@ -394,12 +367,7 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data)
}
CLEANUP:
/* cleanup */
for (i=0; i < num_contexts; i++) {
if (NULL != app_contexts[i]) OBJ_RELEASE(app_contexts[i]);
};
if (NULL != app_contexts) free(app_contexts);
return rc;
}

Просмотреть файл

@ -82,7 +82,9 @@ typedef int (*orte_odls_base_module_deliver_message_fn_t)(orte_jobid_t job, opal
/**
* Register to require sync before termination
*/
typedef int (*orte_odls_base_module_require_sync_fn_t)(orte_process_name_t *proc, opal_buffer_t *buffer);
typedef int (*orte_odls_base_module_require_sync_fn_t)(orte_process_name_t *proc,
opal_buffer_t *buffer,
bool drop_nidmap);
/**
* Collect data as part of a collective operation by the procs

Просмотреть файл

@ -43,21 +43,22 @@ typedef uint8_t orte_daemon_cmd_flag_t;
#define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 9
#define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 10
#define ORTE_DAEMON_SYNC_BY_PROC (orte_daemon_cmd_flag_t) 11
#define ORTE_DAEMON_SYNC_WANT_NIDMAP (orte_daemon_cmd_flag_t) 12
/* commands for use by tools */
#define ORTE_DAEMON_REPORT_JOB_INFO_CMD (orte_daemon_cmd_flag_t) 12
#define ORTE_DAEMON_REPORT_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 13
#define ORTE_DAEMON_REPORT_PROC_INFO_CMD (orte_daemon_cmd_flag_t) 14
#define ORTE_DAEMON_ATTACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 15
#define ORTE_DAEMON_ATTACH_STDERR_CMD (orte_daemon_cmd_flag_t) 16
#define ORTE_DAEMON_DETACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 17
#define ORTE_DAEMON_DETACH_STDERR_CMD (orte_daemon_cmd_flag_t) 18
#define ORTE_DAEMON_SPAWN_JOB_CMD (orte_daemon_cmd_flag_t) 19
#define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 20
#define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 21
#define ORTE_DAEMON_REPORT_JOB_INFO_CMD (orte_daemon_cmd_flag_t) 13
#define ORTE_DAEMON_REPORT_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 14
#define ORTE_DAEMON_REPORT_PROC_INFO_CMD (orte_daemon_cmd_flag_t) 15
#define ORTE_DAEMON_ATTACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 16
#define ORTE_DAEMON_ATTACH_STDERR_CMD (orte_daemon_cmd_flag_t) 17
#define ORTE_DAEMON_DETACH_STDOUT_CMD (orte_daemon_cmd_flag_t) 18
#define ORTE_DAEMON_DETACH_STDERR_CMD (orte_daemon_cmd_flag_t) 19
#define ORTE_DAEMON_SPAWN_JOB_CMD (orte_daemon_cmd_flag_t) 20
#define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 21
#define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 22
/* collective-based cmds */
#define ORTE_DAEMON_COLL_CMD (orte_daemon_cmd_flag_t) 22
#define ORTE_DAEMON_COLL_CMD (orte_daemon_cmd_flag_t) 23
END_C_DECLS

Просмотреть файл

@ -179,45 +179,18 @@ static int odls_process_fork_local_proc(
static int odls_process_launch_local_procs(opal_buffer_t *data)
{
int rc;
orte_std_cntr_t total_slots_alloc, num_local_procs;
orte_jobid_t job;
orte_vpid_t range;
bool node_included;
bool override_oversubscribed;
bool oversubscribed;
orte_std_cntr_t i, num_contexts;
orte_app_context_t **app_contexts;
/* construct the list of children we are to launch */
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job,
&num_local_procs,
&range,
&total_slots_alloc,
&node_included,
&oversubscribed,
&override_oversubscribed,
&num_contexts,
&app_contexts))) {
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:process:launch:local failed to construct child list on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
goto CLEANUP;
}
/* if there is nothing for us to do, just return */
if (!node_included) {
rc = ORTE_SUCCESS;
goto CLEANUP;
}
/* launch the local procs */
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job,
num_contexts, app_contexts,
num_local_procs,
range, total_slots_alloc,
oversubscribed,
override_oversubscribed,
odls_process_fork_local_proc))) {
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, odls_process_fork_local_proc))) {
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:process:launch:local failed to launch on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
@ -225,12 +198,7 @@ static int odls_process_launch_local_procs(opal_buffer_t *data)
}
CLEANUP:
/* cleanup */
for (i=0; i < num_contexts; i++) {
if (NULL != app_contexts[i]) OBJ_RELEASE(app_contexts[i]);
};
if (NULL != app_contexts) free(app_contexts);
return rc;
}
@ -256,5 +224,6 @@ orte_odls_base_module_t orte_odls_process_module = {
odls_process_kill_local_procs,
odls_process_signal_local_proc,
orte_odls_base_default_deliver_message,
orte_odls_base_default_require_sync
orte_odls_base_default_require_sync,
orte_odls_base_default_collect_data
};

Просмотреть файл

@ -590,6 +590,16 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
}
#endif /* OPAL_WANT_IPV6 */
#if 0
/* flag whether or not static ports are in use so that other
* parts of ORTE can act appropriately
* LEAVE OFF FOR MOMENT PENDING FURTHER TEST
*/
if (0 != port) {
orte_static_ports = true;
}
#endif
for (index = 0; index < range; index++ ) {
if (AF_INET == af_family) {
((struct sockaddr_in*) &inaddr)->sin_port = port + index;

Просмотреть файл

@ -49,6 +49,11 @@
#include "orte/runtime/orte_wait.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/tools/orterun/totalview.h"
#include "orte/mca/plm/base/plm_private.h"
@ -73,12 +78,35 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
#if 0
/* RHC: Please leave this code here - it is needed for
* rare debugging that doesn't merit a separate debug-flag,
* but is a pain to have to replicate when needed
*/
{
opal_byte_object_t bo;
/* construct a nodemap */
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* construct the daemon map, if required - the decode function
* knows what to do
*/
if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(&bo, &orte_daemonmap))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
#endif
/* if we don't want to launch, now is the time to leave */
if (orte_do_not_launch) {
orte_finalize();
@ -247,6 +275,8 @@ static void process_orted_launch_report(int fd, short event, void *data)
opal_buffer_t *buffer = mev->buffer;
char *rml_uri;
int rc, idx;
int32_t arch;
orte_node_t **nodes;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s",
@ -279,6 +309,23 @@ static void process_orted_launch_report(int fd, short event, void *data)
goto CLEANUP;
}
/* get the remote arch */
idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &arch, &idx, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
/* lookup the node */
nodes = (orte_node_t**)orte_node_pool->addr;
if (NULL == nodes[mev->sender.vpid]) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
orted_failed_launch = true;
goto CLEANUP;
}
/* store the arch */
nodes[mev->sender.vpid]->arch = arch;
CLEANUP:
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
@ -405,6 +452,17 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
orte_errmgr.incomplete_start(-1, -1); /* no way to know the jobid or exit code */
return;
}
/* if the jobid is invalid, then we know that this is a failed
* launch report from before we could even attempt to launch the
* procs - most likely, while we were attempting to unpack the
* launch cmd itself. In this case, just abort
*/
if (ORTE_JOBID_INVALID == jobid) {
jdata = NULL;
app_launch_failed = true;
goto CLEANUP;
}
/* get the job data object */
if (NULL == (jdata = orte_get_job_data_object(jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -483,7 +541,11 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
CLEANUP:
if (app_launch_failed) {
orte_errmgr.incomplete_start(jdata->jobid, jdata->aborted_proc->exit_code);
if (NULL == jdata) {
orte_errmgr.incomplete_start(ORTE_JOBID_INVALID, ORTE_ERROR_DEFAULT_EXIT_CODE);
} else {
orte_errmgr.incomplete_start(jdata->jobid, jdata->aborted_proc->exit_code);
}
}
}

Просмотреть файл

@ -78,6 +78,7 @@
#include "orte/runtime/orte_wakeup.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
@ -751,6 +752,29 @@ static void ssh_child(int argc, char **argv,
static opal_buffer_t collected_uris;
static int construct_daemonmap(opal_buffer_t *data)
{
opal_byte_object_t *bo;
orte_std_cntr_t cnt;
int rc;
/* extract the byte object holding the daemonmap */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the nodemap */
if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(bo, &orte_daemonmap))) {
ORTE_ERROR_LOG(rc);
return rc;
}
free(bo->bytes);
return ORTE_SUCCESS;
}
/*
* launch a set of daemons from a remote daemon
*/
@ -758,7 +782,7 @@ static int remote_spawn(opal_buffer_t *launch)
{
opal_list_item_t *item;
orte_vpid_t vpid;
char **nodes;
orte_nid_t **nodes;
int node_name_index1;
int node_name_index2;
int proc_vpid_index;
@ -775,27 +799,34 @@ static int remote_spawn(opal_buffer_t *launch)
int num_children;
orte_std_cntr_t n;
nodes = (char**)orte_daemonmap.addr;
vpid=ORTE_PROC_MY_NAME->vpid;
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: remote spawn called",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* extract the prefix from the launch buffer */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(launch, &prefix, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
goto cleanup;
}
/* construct the daemonmap, if required - the decode function
* will know what to do
*/
if (ORTE_SUCCESS != (rc = construct_daemonmap(launch))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
nodes = (orte_nid_t**)orte_daemonmap.addr;
vpid=ORTE_PROC_MY_NAME->vpid;
/* rewind the buffer for use by our children */
launch->unpack_ptr = launch->base_ptr;
/* setup the launch cmd */
launch_cmd = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(launch_cmd, launch);
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: remote spawn called",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* clear out any previous child info */
while (NULL != (item = opal_list_remove_first(&mca_plm_rsh_component.children))) {
OBJ_RELEASE(item);
@ -808,12 +839,13 @@ static int remote_spawn(opal_buffer_t *launch)
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: remote spawn - have no children!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return ORTE_SUCCESS;
failed_launch = false;
rc = ORTE_SUCCESS;
goto cleanup;
}
/* setup the launch */
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, nodes[0], &node_name_index1, &node_name_index2,
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1, &node_name_index2,
&local_exec_index, &proc_vpid_index, &lib_base, &bin_base,
&remote_sh, &remote_csh))) {
ORTE_ERROR_LOG(rc);
@ -843,14 +875,15 @@ static int remote_spawn(opal_buffer_t *launch)
if (NULL == nodes[vpid]) {
opal_output(0, "%s NULL in daemonmap at position %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)vpid);
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
free(argv[node_name_index1]);
argv[node_name_index1] = strdup(nodes[vpid]);
argv[node_name_index1] = strdup(nodes[vpid]->name);
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(nodes[vpid]);
argv[node_name_index2] = strdup(nodes[vpid]->name);
/* fork a child to exec the rsh/ssh session */
pid = fork();
@ -865,7 +898,7 @@ static int remote_spawn(opal_buffer_t *launch)
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
"%s plm:rsh: launching on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nodes[vpid]));
nodes[vpid]->name));
/* do the ssh launch - this will exit if it fails */
ssh_child(argc, argv, vpid,

Просмотреть файл

@ -93,21 +93,6 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
while (NULL != (item = opal_list_remove_first(nodes))) {
node = (orte_node_t*)item;
#if 0
/* if we are not keeping FQDN hostnames, abbreviate
* the nodename as required
*/
if (!orte_keep_fqdn_hostnames) {
char *tmp, *ptr;
tmp = strdup(node->name);
if (NULL != (ptr = strchr(tmp, '.'))) {
*ptr = '\0';
free(node->name);
node->name = strdup(tmp);
}
free(tmp);
}
#endif
/* the HNP had to already enter its node on the array - that entry is in the
* first position since it is the first one entered. We need to check to see
* if this node is the same as the HNP's node so we don't double-enter it

Просмотреть файл

@ -88,17 +88,18 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-available-resources",
true);
return ORTE_ERR_SILENT;
}
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-available-resources",
true);
return ORTE_ERR_SILENT;
}
/* did the app_context contain a hostfile? */
if (NULL != app && NULL != app->hostfile) {
if (NULL != app->hostfile) {
/* yes - filter the node list through the file, removing
* any nodes not found in the file
*/
@ -107,27 +108,27 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, app->hostfile);
return ORTE_ERR_SILENT;
}
}
/* now filter the list through any -host specification */
if (NULL != app) {
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
app->dash_host))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, "");
return ORTE_ERR_SILENT;
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, app->hostfile);
return ORTE_ERR_SILENT;
}
/* now filter the list through any -host specification */
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
app->dash_host))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, "");
return ORTE_ERR_SILENT;
}
/* If the "no local" option was set, then remove the local node
@ -242,10 +243,6 @@ PROCESS:
OBJ_RETAIN(proc);
++node->num_procs;
/* if this is the HNP, flag that the HNP has local procs */
if (node == orte_hnpnode) {
map->hnp_has_local_procs = true;
}
return ORTE_SUCCESS;
}
@ -283,9 +280,7 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
}
current_node->slot_list = NULL;
proc->node = current_node;
if (NULL != current_node->name) {
proc->nodename = strdup(current_node->name);
}
proc->nodename = current_node->name;
/* add this proc to the job's data - we don't have to worry here
* about keeping the array left-justified as all vpids
@ -344,10 +339,12 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
int orte_rmaps_base_compute_usage(orte_job_t *jdata)
{
orte_std_cntr_t i, j;
orte_std_cntr_t i;
orte_vpid_t j, k;
orte_node_t **nodes;
orte_proc_t **procs, *psave = NULL;
orte_vpid_t minv, local_rank;
orte_proc_t **procs, *psave, *psave2;
orte_vpid_t minv, minv2;
uint8_t local_rank;
orte_job_map_t *map;
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
@ -360,45 +357,47 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
/* for each node in the map... */
nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->num_nodes; i++) {
/* cycle through the array of procs IN THIS JOB on this node, looking for
* the minimum vpid one and setting that local rank, until we
* have done so for all procs on the node and/or in the job
/* cycle through the array of procs on this node, setting
* local and node ranks, until we
* have done so for all procs on nodes in this map
*/
/* init search values */
procs = (orte_proc_t**)nodes[i]->procs->addr;
local_rank = 0;
while (local_rank < nodes[i]->num_procs) {
for (k=0; k < nodes[i]->num_procs; k++) {
minv = ORTE_VPID_MAX;
minv2 = ORTE_VPID_MAX;
psave = NULL;
/* find the minimum vpid proc IN THIS JOB */
for (j=0; j < nodes[i]->procs->size; j++) {
if (NULL == procs[j]) {
/* the array is left justified, so this
* means we are done
*/
break;
}
if (procs[j]->name.jobid != jdata->jobid) {
/* not in our job */
continue;
}
if (ORTE_VPID_INVALID != procs[j]->local_rank) {
/* already did this one */
continue;
}
if (procs[j]->name.vpid < minv) {
psave2 = NULL;
/* find the minimum vpid proc */
for (j=0; j < nodes[i]->num_procs; j++) {
if (procs[j]->name.jobid == jdata->jobid &&
UINT8_MAX == procs[j]->local_rank &&
procs[j]->name.vpid < minv) {
minv = procs[j]->name.vpid;
psave = procs[j];
}
/* no matter what job...still have to handle node_rank */
if (UINT8_MAX == procs[j]->node_rank &&
procs[j]->name.vpid < minv2) {
minv2 = procs[j]->name.vpid;
psave2 = procs[j];
}
}
if (NULL == psave) {
if (NULL == psave && NULL == psave2) {
/* we must have processed them all! */
goto DONE;
}
psave->local_rank = local_rank;
++local_rank;
if (NULL != psave) {
psave->local_rank = local_rank;
++local_rank;
}
if (NULL != psave2) {
psave2->node_rank = nodes[i]->next_node_rank;
nodes[i]->next_node_rank++;
}
}
}
@ -450,6 +449,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
}
proc->name.vpid = daemons->num_procs; /* take the next available vpid */
proc->node = node;
proc->nodename = node->name;
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s rmaps:base:define_daemons add new daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -483,18 +483,5 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
}
}
/* check how many daemons we are using and set flag accordingly - this
* is required so that daemon-based collectives can correctly operate
*/
if (numdaemons == daemons->num_procs) {
/* everyone is being used */
map->daemon_participation = ORTE_RMAPS_ALL_DAEMONS;
} else if (numdaemons == daemons->num_procs-1 &&
!map->hnp_has_local_procs) {
map->daemon_participation = ORTE_RMAPS_ALL_EXCEPT_HNP;
} else {
map->daemon_participation = ORTE_RMAPS_DAEMON_SUBSET;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -372,6 +372,9 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
map = jdata->map;
apps = (orte_app_context_t**)jdata->apps->addr;
/* flag the map as containing cpu_lists */
map->cpu_lists = true;
/* start at the beginning... */
vpid_start = 0;

Просмотреть файл

@ -41,18 +41,6 @@ BEGIN_C_DECLS
#define ORTE_RMAPS_NO_USE_LOCAL 0x08
/*
* Define a flag that indicates the level of daemon participation
* in a launch
*/
typedef uint8_t orte_rmaps_dp_t;
#define ORTE_RMAPS_DP_T OPAL_UINT8
#define ORTE_RMAPS_ALL_DAEMONS 0x01
#define ORTE_RMAPS_ALL_EXCEPT_HNP 0x02
#define ORTE_RMAPS_DAEMON_SUBSET 0x04
/*
* Structure that represents the mapping of a job to an
* allocated set of resources.
@ -61,12 +49,11 @@ struct orte_job_map_t {
opal_object_t super;
/* save the mapping configuration */
uint8_t policy;
bool hnp_has_local_procs;
bool pernode;
orte_std_cntr_t npernode;
bool oversubscribe;
bool display_map;
orte_rmaps_dp_t daemon_participation;
bool cpu_lists;
/* *** */
/* number of new daemons required to be launched
* to support this job map

Просмотреть файл

@ -53,6 +53,10 @@ int orte_rml_base_get_contact_info(orte_jobid_t job, opal_buffer_t *data)
/* cycle through all procs in the job, adding their contact info to the buffer */
procs = (orte_proc_t**)jdata->procs->addr;
for (i=0; i < jdata->num_procs; i++) {
/* if this proc doesn't have any contact info, ignore it */
if (NULL == procs[i]->rml_uri) {
continue;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &procs[i]->rml_uri, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;

Просмотреть файл

@ -29,7 +29,7 @@ ORTE_DECLSPEC int orte_routed_base_close(void);
ORTE_DECLSPEC extern int orte_routed_base_output;
ORTE_DECLSPEC extern opal_list_t orte_routed_base_components;
ORTE_DECLSPEC extern int orte_routed_base_register_sync(void);
ORTE_DECLSPEC extern int orte_routed_base_register_sync(bool setup);
ORTE_DECLSPEC int orte_routed_base_comm_start(void);
ORTE_DECLSPEC int orte_routed_base_comm_stop(void);

Просмотреть файл

@ -26,23 +26,42 @@
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/routed/base/base.h"
int orte_routed_base_register_sync(void)
static bool sync_recvd;
static void report_sync(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata)
{
opal_buffer_t buffer, ack;
/* just copy the payload to the sync_buf */
opal_dss.copy_payload(orte_process_info.sync_buf, buffer);
/* flag as complete */
sync_recvd = true;
}
int orte_routed_base_register_sync(bool setup)
{
opal_buffer_t buffer;
int rc;
orte_daemon_cmd_flag_t command=ORTE_DAEMON_SYNC_BY_PROC;
char *rml_uri;
/* we need to send a very small message to get the oob to establish
/* we need to get the oob to establish
* the connection - the oob will leave the connection "alive"
* thereafter so we can communicate readily
*/
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
/* if we are setting up, tell the daemon to send back a nidmap */
if (setup) {
command = ORTE_DAEMON_SYNC_WANT_NIDMAP;
}
/* tell the daemon to sync */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &command, 1, ORTE_DAEMON_CMD))) {
ORTE_ERROR_LOG(rc);
@ -74,13 +93,15 @@ int orte_routed_base_register_sync(void)
* gets serviced by the event library on the orted prior to the
* process exiting
*/
OBJ_CONSTRUCT(&ack, opal_buffer_t);
if (0 > orte_rml.recv_buffer(ORTE_PROC_MY_DAEMON, &ack, ORTE_RML_TAG_SYNC, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&ack);
return ORTE_ERR_COMM_FAILURE;
sync_recvd = false;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
ORTE_RML_NON_PERSISTENT, report_sync, NULL);
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
ORTE_ERROR_LOG(rc);
return rc;
}
OBJ_DESTRUCT(&ack);
ORTE_PROGRESSED_WAIT(sync_recvd, 0, 1);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -100,7 +100,7 @@ static int finalize(void)
if (!orte_process_info.hnp &&
!orte_process_info.daemon &&
!orte_process_info.tool) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync())) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -541,7 +541,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
* is attempted until the overall ORTE system knows how to talk to everyone -
* otherwise, the system can just hang.
*/
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync())) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(true))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -576,6 +576,13 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
{
int rc;
/* if we are not using static ports, then we need to share the
* comm info - otherwise, just return
*/
if (orte_static_ports) {
return ORTE_SUCCESS;
}
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);

Просмотреть файл

@ -97,7 +97,7 @@ static int finalize(void)
if (!orte_process_info.hnp &&
!orte_process_info.daemon &&
!orte_process_info.tool) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync())) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -519,7 +519,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
* This also will cause the local orted to send our contact
* into to the HNP once all my local peers have registered
*/
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync())) {
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(true))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -379,6 +379,16 @@ static int process_commands(orte_process_name_t* sender,
}
/* save our current buffer location */
save_buf = buffer->unpack_ptr;
/* if the PLM supports remote spawn, pass it all along */
if (NULL != orte_plm.remote_spawn) {
if (ORTE_SUCCESS != (ret = orte_plm.remote_spawn(buffer))) {
ORTE_ERROR_LOG(ret);
}
} else {
opal_output(0, "%s remote spawn is NULL!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
/* rewind the buffer so we can reuse it */
buffer->unpack_ptr = save_buf;
/* unpack the prefix and throw it away - we don't need it here */
n = 1;
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &prefix, &n, OPAL_STRING))) {
@ -391,16 +401,6 @@ static int process_commands(orte_process_name_t* sender,
"%s orted:comm:add_procs failed to launch on error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(ret)));
}
/* rewind the buffer so the plm can reuse it */
buffer->unpack_ptr = save_buf;
/* if the PLM supports remote spawn, pass it all along */
if (NULL != orte_plm.remote_spawn) {
if (ORTE_SUCCESS != (ret = orte_plm.remote_spawn(buffer))) {
ORTE_ERROR_LOG(ret);
}
} else {
opal_output(0, "%s remote spawn is NULL!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
break;
/**** DELIVER A MESSAGE TO THE LOCAL PROCS ****/
@ -708,11 +708,11 @@ static int process_commands(orte_process_name_t* sender,
/* if we are the HNP, process the request */
orte_std_cntr_t i, num_nodes=0;
orte_node_t **nodes;
orte_nodeid_t nid;
char *nid;
/* unpack the nodeid */
/* unpack the nodename */
n = 1;
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &nid, &n, ORTE_NODEID))) {
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &nid, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
@ -721,12 +721,12 @@ static int process_commands(orte_process_name_t* sender,
answer = OBJ_NEW(opal_buffer_t);
/* if they asked for a specific node, then just get that info */
if (ORTE_NODEID_WILDCARD != nid) {
if (NULL != nid) {
/* find this node */
nodes = (orte_node_t**)orte_node_pool->addr;
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == nodes[i]) break; /* stop when we get past the end of data */
if (nid == nodes[i]->nodeid) {
if (0 == strcmp(nid, nodes[i]->name)) {
nodes = &nodes[i];
num_nodes = 1;
break;
@ -966,7 +966,19 @@ SEND_ANSWER:
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender));
}
if (ORTE_SUCCESS != (ret = orte_odls.require_sync(sender, buffer))) {
if (ORTE_SUCCESS != (ret = orte_odls.require_sync(sender, buffer, false))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
break;
case ORTE_DAEMON_SYNC_WANT_NIDMAP:
if (orte_debug_daemons_flag) {
opal_output(0, "%s orted_recv: received sync+nidmap from local proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender));
}
if (ORTE_SUCCESS != (ret = orte_odls.require_sync(sender, buffer, true))) {
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}

Просмотреть файл

@ -527,8 +527,24 @@ int orte_daemon(int argc, char *argv[])
orte_process_name_t parent;
buffer = OBJ_NEW(opal_buffer_t);
rml_uri = orte_rml.get_contact_info();
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &rml_uri, 1, OPAL_STRING))) {
/* if we are using static ports, there is no need to send our
* contact info back to HNP - it already knows how to reach us
* Instead, just send a zero-byte buffer for barrier purposes
*/
if (!orte_static_ports) {
if (orte_debug_daemons_flag) {
fprintf(stderr, "Daemon %s not using static ports\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
rml_uri = orte_rml.get_contact_info();
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &rml_uri, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
return ret;
}
}
/* send our architecture */
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &orte_process_info.arch, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
return ret;

Просмотреть файл

@ -271,7 +271,6 @@ int orte_dt_copy_map(orte_job_map_t **dest, orte_job_map_t *src, opal_data_type_
/* copy data into it */
(*dest)->policy = src->policy;
(*dest)->hnp_has_local_procs = src->hnp_has_local_procs;
(*dest)->pernode = src->pernode;
(*dest)->npernode = src->npernode;
(*dest)->oversubscribe = src->oversubscribe;

Просмотреть файл

@ -308,13 +308,6 @@ int orte_dt_pack_node(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the nodeid */
if (ORTE_SUCCESS != (rc = opal_dss.pack_buffer(buffer,
(void*)(&(nodes[i]->nodeid)), 1, ORTE_NODEID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* do not pack the allocate flag, daemon name, or launch id */
/* pack the number of procs on the node */
@ -410,7 +403,14 @@ int orte_dt_pack_proc(opal_buffer_t *buffer, const void *src,
/* pack the local rank */
if (ORTE_SUCCESS != (rc = opal_dss.pack_buffer(buffer,
(void*)(&(procs[i]->local_rank)), 1, ORTE_VPID))) {
(void*)(&(procs[i]->local_rank)), 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the node rank */
if (ORTE_SUCCESS != (rc = opal_dss.pack_buffer(buffer,
(void*)(&(procs[i]->node_rank)), 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -739,12 +739,6 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the hnp_has_local_procs flag */
if (ORTE_SUCCESS != (rc = opal_dss.pack_buffer(buffer, &(maps[i]->hnp_has_local_procs), 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the pernode flag */
if (ORTE_SUCCESS != (rc = opal_dss.pack_buffer(buffer, &(maps[i]->pernode), 1, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -253,7 +253,7 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
}
asprintf(&tmp2, "%s\n%s\tNum launched: %ld\tNum reported: %ld\n%s\tNum terminated: %ld\tOversubscribe override?: %s",
tmp, pfx2, (long)src->num_launched, (long)src->num_reported, pfx2,
tmp, pfx, (long)src->num_launched, (long)src->num_reported, pfx,
(long)src->num_terminated, src->oversubscribe_override ? "True" : "False");
free(tmp);
tmp = tmp2;
@ -284,8 +284,8 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "\n%sData for node: Name: %s\tNode id: %s\tAllocate: %s\n%s\tLaunch id: %ld\tArch: %0x\tState: %0x",
pfx2, src->name, ORTE_NODEID_PRINT(src->nodeid), (src->allocate) ? "Yes" : "No",
asprintf(&tmp, "\n%sData for node: Name: %s\tAllocate: %s\n%s\tLaunch id: %ld\tArch: %0x\tState: %0x",
pfx2, src->name, (src->allocate) ? "Yes" : "No",
pfx2, (long)src->launch_id,
src->arch, src->state);
@ -314,7 +314,8 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
free(tmp);
tmp = tmp2;
asprintf(&tmp2, "%s\n%s\tNum procs: %ld", tmp, pfx2, (long)src->num_procs);
asprintf(&tmp2, "%s\n%s\tNum procs: %ld\tNext node_rank: %ld", tmp, pfx2,
(long)src->num_procs, (long)src->next_node_rank);
free(tmp);
tmp = tmp2;
@ -360,8 +361,8 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
asprintf(&tmp, "\n%sData for proc: %s", pfx2, ORTE_NAME_PRINT(&src->name));
asprintf(&tmp2, "%s\n%s\tPid: %ld\tLocal rank: %ld", tmp, pfx2,
(long)src->pid, (ORTE_VPID_INVALID == src->local_rank) ? -1 : (long)src->local_rank);
asprintf(&tmp2, "%s\n%s\tPid: %ld\tLocal rank: %ld\tNode rank: %ld", tmp, pfx2,
(long)src->pid, (long)src->local_rank, (long)src->node_rank);
free(tmp);
tmp = tmp2;
@ -453,12 +454,11 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
}
asprintf(&pfx, "%s\t", pfx2);
asprintf(&tmp, "\n%sMap generated by mapping policy: %x\n%sHNP has local procs: %s\tPernode: %s\tNpernode: %ld\tOversubscribe allowed: %s\tDisplay: %s",
pfx2, src->policy,
pfx, (src->hnp_has_local_procs) ? "TRUE" : "FALSE",
asprintf(&tmp, "\n%sMap generated by mapping policy: %x\n%s\tPernode: %s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s",
pfx2, src->policy, pfx2,
(src->pernode) ? "TRUE" : "FALSE", (long)src->npernode,
(src->oversubscribe) ? "TRUE" : "FALSE",
(src->display_map) ? "TRUE" : "FALSE");
(src->cpu_lists) ? "TRUE" : "FALSE");
free(pfx2);
if (ORTE_VPID_INVALID == src->daemon_vpid_start) {

Просмотреть файл

@ -330,13 +330,6 @@ int orte_dt_unpack_node(opal_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the nodeid */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack_buffer(buffer,
&(nodes[i]->nodeid), &n, ORTE_NODEID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* do not unpack the allocate flag, daemon name, or launch id */
/* unpack the number of procs on the node */
@ -447,7 +440,15 @@ int orte_dt_unpack_proc(opal_buffer_t *buffer, void *dest,
/* unpack the local rank */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack_buffer(buffer,
(&(procs[i]->local_rank)), &n, ORTE_VPID))) {
(&(procs[i]->local_rank)), &n, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the local rank */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack_buffer(buffer,
(&(procs[i]->node_rank)), &n, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -811,14 +812,6 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the hnp_has_local_procs flag */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack_buffer(buffer,
&(maps[i]->hnp_has_local_procs), &n, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the pernode flag */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack_buffer(buffer,

Просмотреть файл

@ -52,6 +52,7 @@ bool orted_spin_flag = false;
bool orte_static_ports = false;
bool orte_keep_fqdn_hostnames = false;
int32_t orte_contiguous_nodes;
int orte_debug_output = -1;
char **orte_launch_environ;
opal_pointer_array_t orte_daemonmap;
@ -59,7 +60,6 @@ char **orted_cmd_line=NULL;
int orte_exit, orteds_exit;
int orte_exit_status = 0;
bool orte_abnormal_term_ordered = false;
orte_node_t *orte_hnpnode = NULL;
int orte_timeout_usec_per_proc;
float orte_max_timeout;
@ -171,7 +171,18 @@ int orte_register_params(void)
"Whether or not to keep FQDN hostnames [default: no]",
false, false, (int)false, &value);
orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
/* whether or not static ports exist */
mca_base_param_reg_int_name("orte", "static_ports",
"Whether or not static ports are in use [default: no]",
false, false, (int)false, &value);
orte_static_ports = OPAL_INT_TO_BOOL(value);
/* whether or not contiguous nodenames are in use */
mca_base_param_reg_int_name("orte", "contiguous_nodes",
"Number of nodes after which contiguous nodenames will be used [default: INT_MAX]",
false, false, INT32_MAX, &orte_contiguous_nodes);
/* All done */
params_set = true;
return ORTE_SUCCESS;

Просмотреть файл

@ -67,7 +67,7 @@ typedef struct {
/** Parent object */
opal_object_t super;
/** Unique index when multiple apps per job */
orte_std_cntr_t idx;
int8_t idx;
/** Absolute pathname of argv[0] */
char *app;
/** Number of copies of this process that are to be launched */
@ -107,11 +107,9 @@ typedef struct {
orte_std_cntr_t index;
/** String node name */
char *name;
/* an id for the node in case we need it */
orte_nodeid_t nodeid;
/* whether or not this node is available for allocation */
bool allocate;
/* daemon on this node - it's vpid equates to the nodeid in many environments */
/* daemon on this node */
struct orte_proc_t *daemon;
/* whether or not this daemon has been launched */
bool daemon_launched;
@ -121,6 +119,8 @@ typedef struct {
orte_vpid_t num_procs;
/* array of pointers to procs on this node */
opal_pointer_array_t *procs;
/* next node rank on this node */
uint8_t next_node_rank;
/* whether or not we are oversubscribed */
bool oversubscribed;
/** The node architecture, as reported by the remote node. This
@ -224,14 +224,25 @@ struct orte_proc_t {
orte_process_name_t name;
/* pid */
pid_t pid;
/* local rank on the node where this is running */
orte_vpid_t local_rank;
/* local rank amongst my peers on the node
* where this is running - this value is
* needed by MPI procs so that the lowest
* rank on a node can perform certain fns -
* e.g., open an sm backing file
*/
uint8_t local_rank;
/* local rank on the node across all procs
* and jobs known to this HNP - this is
* needed so that procs can do things like
* know which static IP port to use
*/
uint8_t node_rank;
/* process state */
orte_proc_state_t state;
/* exit code */
orte_exit_code_t exit_code;
/* the app_context that generated this proc */
orte_std_cntr_t app_idx;
int8_t app_idx;
/* a cpu list, if specified by the user */
char *slot_list;
/* pointer to the node where this proc is executing */
@ -255,6 +266,23 @@ struct orte_proc_t {
typedef struct orte_proc_t orte_proc_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_proc_t);
typedef struct {
/* nodename */
char *name;
/* arch of node */
uint32_t arch;
} orte_nid_t;
typedef struct {
/* index to node */
int32_t node;
/* local rank */
uint8_t local_rank;
/* node rank */
uint8_t node_rank;
} orte_pmap_t;
/**
* Get a job data object
* We cannot just reference a job data object with its jobid as
@ -288,6 +316,7 @@ ORTE_DECLSPEC extern bool orte_debug_daemons_flag, orte_debug_daemons_file_flag;
ORTE_DECLSPEC extern bool orte_do_not_launch;
ORTE_DECLSPEC extern bool orted_spin_flag;
ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
ORTE_DECLSPEC extern int orte_debug_output;
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
@ -298,7 +327,6 @@ ORTE_DECLSPEC extern char **orted_cmd_line;
ORTE_DECLSPEC extern int orte_exit, orteds_exit;
ORTE_DECLSPEC extern int orte_exit_status;
ORTE_DECLSPEC extern bool orte_abnormal_term_ordered;
ORTE_DECLSPEC extern orte_node_t *orte_hnpnode;
ORTE_DECLSPEC extern int orte_timeout_usec_per_proc;
ORTE_DECLSPEC extern float orte_max_timeout;

Просмотреть файл

@ -180,7 +180,6 @@ OBJ_CLASS_INSTANCE(orte_job_t,
static void orte_node_construct(orte_node_t* node)
{
node->name = NULL;
node->nodeid = ORTE_NODEID_INVALID;
node->allocate = false;
node->index = -1;
node->daemon = NULL;
@ -193,6 +192,7 @@ static void orte_node_construct(orte_node_t* node)
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
node->next_node_rank = 0;
node->oversubscribed = false;
node->arch = 0;
@ -236,7 +236,8 @@ static void orte_proc_construct(orte_proc_t* proc)
{
proc->name = *ORTE_NAME_INVALID;
proc->pid = 0;
proc->local_rank = ORTE_VPID_INVALID;
proc->local_rank = UINT8_MAX;
proc->node_rank = UINT8_MAX;
proc->state = ORTE_PROC_STATE_UNDEF;
proc->app_idx = -1;
proc->slot_list = NULL;
@ -252,14 +253,18 @@ static void orte_proc_construct(orte_proc_t* proc)
static void orte_proc_destruct(orte_proc_t* proc)
{
/* do NOT free the nodename field as this is
* simply a pointer to a field in the
* associated node object - the node object
* will free it
*/
if (NULL != proc->slot_list) {
free(proc->slot_list);
}
if (NULL != proc->node) OBJ_RELEASE(proc->node);
if (NULL != proc->nodename) free(proc->nodename);
if (NULL != proc->rml_uri) free(proc->rml_uri);
#if OPAL_ENABLE_FT == 1
@ -280,11 +285,11 @@ OBJ_CLASS_INSTANCE(orte_proc_t,
static void orte_job_map_construct(orte_job_map_t* map)
{
map->policy = ORTE_RMAPS_BYSLOT; /* default to byslot mapping as per orterun options */
map->hnp_has_local_procs = false;
map->pernode = false;
map->npernode = 0;
map->oversubscribe = true; /* default to allowing oversubscribe */
map->display_map = false;
map->cpu_lists = false;
map->num_new_daemons = 0;
map->daemon_vpid_start = ORTE_VPID_INVALID;
map->num_nodes = 0;

Просмотреть файл

@ -39,9 +39,9 @@
BEGIN_C_DECLS
/* some convenience definitions for code clarity */
#define ORTE_NON_TOOL 0x00
#define ORTE_TOOL 0x01
#define ORTE_TOOL_WITH_NAME 0x02
#define ORTE_NON_TOOL 0x00
#define ORTE_TOOL 0x01
#define ORTE_TOOL_WITH_NAME 0x02
/**
* Initialize the Open Run Time Environment

Просмотреть файл

@ -418,7 +418,6 @@ static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) {
static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
int line_len;
int len_name = 0,
len_id = 0,
len_arch = 0,
len_state = 0,
len_slots = 0,
@ -426,13 +425,11 @@ static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
len_slots_m = 0;
orte_node_t *node;
orte_std_cntr_t i;
char *nid=NULL;
/*
* Caculate segment lengths
*/
len_name = (int) strlen("Node Name");
len_id = (int) strlen("Node ID");
len_arch = (int) strlen("Arch");
len_state = (int) strlen("State");
len_slots = (int) strlen("Slots");
@ -445,19 +442,12 @@ static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
if( NULL != node->name &&
(int)strlen(node->name) > len_name)
len_name = (int) strlen(node->name);
/* setup the printed nodeid - do -not- free this! */
nid = ORTE_NODEID_PRINT(node->nodeid);
if ((int)strlen(nid) > len_id)
len_id = (int)strlen(nid);
if( (int)strlen(pretty_node_state(node->state)) > len_state )
len_state = (int)strlen(pretty_node_state(node->state));
}
line_len = (len_name + 3 +
len_id + 3 +
len_arch + 3 +
len_state + 3 +
len_slots + 3 +
@ -468,7 +458,6 @@ static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
* Print the header
*/
printf("%*s | ", len_name, "Node Name");
printf("%*s | ", len_id, "Node ID");
printf("%*s | ", len_arch, "Arch");
printf("%*s | ", len_state, "State");
printf("%*s | ", len_slots, "Slots");
@ -488,7 +477,6 @@ static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
node = nodes[i];
printf("%*s | ", len_name, node->name);
printf("%*s | ", len_id, nid);
printf("%*x | ", len_arch, node->arch);
printf("%*s | ", len_state, pretty_node_state(node->state));
printf("%*d | ", len_slots, (uint)node->slots);
@ -675,9 +663,6 @@ static int pretty_print_vpids(orte_job_t *job) {
if ((int)strlen(o_proc_name) > len_o_proc_name)
len_o_proc_name = strlen(o_proc_name);
if( ORTE_VPID_INVALID == vpid->local_rank ) {
vpid->local_rank = vpid->name.vpid;
}
asprintf(&rankstr, "%u", (uint)vpid->local_rank);
if ((int)strlen(rankstr) > len_rank)
len_rank = strlen(rankstr);
@ -825,7 +810,7 @@ static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo) {
static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo) {
int ret;
if (ORTE_SUCCESS != (ret = orte_util_comm_query_node_info(&(hnpinfo->hnp->name), ORTE_NODEID_WILDCARD,
if (ORTE_SUCCESS != (ret = orte_util_comm_query_node_info(&(hnpinfo->hnp->name), NULL,
&hnpinfo->num_nodes, &hnpinfo->nodes))) {
ORTE_ERROR_LOG(ret);
}

Просмотреть файл

@ -34,7 +34,8 @@ headers += \
util/hostfile/hostfile.h \
util/hostfile/hostfile_lex.h \
util/dash_host/dash_host.h \
util/comm/comm.h
util/comm/comm.h \
util/nidmap.h
libopen_rte_la_SOURCES += \
util/error_strings.c \
@ -47,5 +48,5 @@ libopen_rte_la_SOURCES += \
util/hostfile/hostfile_lex.l \
util/hostfile/hostfile.c \
util/dash_host/dash_host.c \
util/comm/comm.c
util/comm/comm.c \
util/nidmap.c

Просмотреть файл

@ -97,7 +97,7 @@ int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t j
return ORTE_SUCCESS;
}
int orte_util_comm_query_node_info(const orte_process_name_t *hnp, orte_nodeid_t nodeid,
int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node,
int *num_nodes, orte_node_t ***node_info_array)
{
int ret;
@ -117,7 +117,7 @@ int orte_util_comm_query_node_info(const orte_process_name_t *hnp, orte_nodeid_t
OBJ_DESTRUCT(&cmd);
return ret;
}
if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmd, &nodeid, 1, ORTE_NODEID))) {
if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmd, &node, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&cmd);
return ret;

Просмотреть файл

@ -37,7 +37,7 @@ BEGIN_C_DECLS
ORTE_DECLSPEC int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t job,
int *num_jobs, orte_job_t ***job_info_array);
ORTE_DECLSPEC int orte_util_comm_query_node_info(const orte_process_name_t *hnp, orte_nodeid_t nodeid,
ORTE_DECLSPEC int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node,
int *num_nodes, orte_node_t ***node_info_array);
ORTE_DECLSPEC int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t job, orte_vpid_t vpid,

Просмотреть файл

@ -230,44 +230,6 @@ char* orte_util_print_vpids(const orte_vpid_t vpid)
return ptr->buffers[ptr->cntr-1];
}
char* orte_util_print_nodeids(const orte_nodeid_t nodeid)
{
orte_print_args_buffers_t *ptr;
int rc;
if (!fns_init) {
/* setup the print_args function */
if (ORTE_SUCCESS != (rc = opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
fns_init = true;
}
ptr = get_print_name_buffer();
if (NULL == ptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return orte_print_args_null;
}
/* cycle around the ring */
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
ptr->cntr = 0;
}
if (ORTE_NODEID_INVALID == nodeid) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "INVALID");
} else if (ORTE_NODEID_WILDCARD == nodeid) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD");
} else {
snprintf(ptr->buffers[ptr->cntr++],
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
"%ld", (long)nodeid);
}
return ptr->buffers[ptr->cntr-1];
}
/*** STRING FUNCTIONS ***/

Просмотреть файл

@ -53,15 +53,13 @@ ORTE_DECLSPEC char* orte_util_print_vpids(const orte_vpid_t vpid);
#define ORTE_VPID_PRINT(n) \
orte_util_print_vpids(n)
ORTE_DECLSPEC char* orte_util_print_nodeids(const orte_nodeid_t nodeid);
#define ORTE_NODEID_PRINT(n) \
orte_util_print_nodeids(n)
/* a macro for identifying the job family - i.e., for
* extracting the mpirun-specific id field of the jobid
*/
#define ORTE_JOB_FAMILY(n) \
(((n) >> 16) & 0x0000ffff)
(((n) >> 16) & 0x0000ffff)
/* List of names for general use */
struct orte_namelist_t {

571
orte/util/nidmap.c Обычный файл
Просмотреть файл

@ -0,0 +1,571 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "orte/constants.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/nidmap.h"
int orte_util_encode_nodemap(opal_byte_object_t *boptr)
{
orte_job_t *jdata;
orte_proc_t **procs;
char prefix[ORTE_MAX_NODE_PREFIX], *tmp;
int32_t i, len, firstnode, lastnode, nodenum, num_nodes;
uint8_t command = ORTE_CONTIG_NODE_CMD;
uint8_t num_digs;
uint8_t incdec;
int rc;
char *nodename;
opal_buffer_t buf;
int step;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
int32_t *arch;
#endif
/* get the daemon job's data */
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
procs = (orte_proc_t**)(jdata->procs)->addr;
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* indicate number of nodes */
num_nodes = jdata->num_procs;
opal_dss.pack(&buf, &num_nodes, 1, OPAL_INT32);
/* pack the HNP's node name - don't mess with
* trying to encode it - it could be different
*/
/* if we are not keeping FQDN hostnames, abbreviate
* the nodename as required
*/
if (!orte_keep_fqdn_hostnames) {
char *ptr;
nodename = strdup(procs[0]->nodename);
if (NULL != (ptr = strchr(nodename, '.'))) {
*ptr = '\0';
}
opal_dss.pack(&buf, &nodename, 1, OPAL_STRING);
free(nodename);
} else {
opal_dss.pack(&buf, &procs[0]->nodename, 1, OPAL_STRING);
}
/* see if the cluster is configured with contiguous
* node names and we have more than the HNP
*/
if (orte_contiguous_nodes < num_nodes) {
/* discover the prefix - find first non-alpha character */
len = strlen(procs[1]->nodename);
memset(prefix, 0, ORTE_MAX_NODE_PREFIX);
prefix[0] = procs[1]->nodename[0]; /* must start with alpha */
for (i=1; i < len; i++) {
if (!isalpha(procs[1]->nodename[i])) {
/* found a non-alpha char */
if (!isdigit(procs[1]->nodename[i])) {
/* if it is anything but a digit,
* then that's not good
*/
opal_output(0, "%s encode:nidmap Nodename pattern is nonstandard",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERROR;
}
/* okay, this defines end of the prefix.
* convert rest of name to an offset
*/
firstnode = strtol(&(procs[1]->nodename[i]), NULL, 10);
/* figure out how many digits are in the index */
for (num_digs=0; isdigit(procs[1]->nodename[i+num_digs]); num_digs++);
goto PACK;
}
prefix[i] = procs[1]->nodename[i];
}
PACK:
/* begin encoding rest of map by indicating that this will
* be a contiguous node map
*/
opal_dss.pack(&buf, &command, 1, OPAL_UINT8);
/* pack the prefix */
tmp = &prefix[0];
opal_dss.pack(&buf, &tmp, 1, OPAL_STRING);
len = strlen(prefix);
/* pack the number of digits in the index */
opal_dss.pack(&buf, &num_digs, 1, OPAL_UINT8);
/* and the starting offset */
opal_dss.pack(&buf, &firstnode, 1, OPAL_INT32);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s encode:nidmap:contig_nodes prefix %s num_digits %d offset %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), prefix, num_digs, firstnode));
lastnode = strtol(&(procs[2]->nodename[i]), NULL, 10);
if ((lastnode - firstnode) < 0) {
/* we are decrementing */
incdec = 0;
opal_dss.pack(&buf, &incdec, 1, OPAL_INT8);
} else {
/* we are incrementing */
incdec = 1;
opal_dss.pack(&buf, &incdec, 1, OPAL_INT8);
}
lastnode = firstnode;
/* cycle through the nodes - pack the starting offset
* and total number of nodes in each contiguous range
*/
for (i=2; i < (int)jdata->num_procs; i++) {
nodenum = strtol(&(procs[i]->nodename[len]), NULL, 10);
step = nodenum -lastnode;
if (step < 0) {
/* we are decrementing */
step = lastnode - nodenum;
}
if (step > 1) {
/* have a break - indicate end of range */
opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32);
/* indicate start of new range */
opal_dss.pack(&buf, &nodenum, 1, OPAL_INT32);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s encode:nidmap:contig_nodes end range %d start next range %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lastnode, nodenum));
}
lastnode = nodenum;
}
/* pack end of range */
opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s encode:nidmap:contig_nodes end range %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lastnode));
/* pack flag end of ranges */
lastnode = -1;
opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32);
} else {
/* if the nodes aren't contiguous, then we need
* to simply pack every nodename individually
*/
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s encode:nidmap non_contig_nodes - packing all names",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* indicate that this will not be a contiguous node map */
command = ORTE_NON_CONTIG_NODE_CMD;
opal_dss.pack(&buf, &command, 1, OPAL_UINT8);
for (i=1; i < num_nodes; i++) {
if (!orte_keep_fqdn_hostnames) {
char *ptr;
nodename = strdup(procs[i]->nodename);
if (NULL != (ptr = strchr(nodename, '.'))) {
*ptr = '\0';
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
free(nodename);
} else {
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &procs[i]->nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
/* allocate space for the node arch */
arch = (int32_t*)malloc(num_nodes * 4);
/* transfer the data from the nodes */
for (i=0; i < num_nodes; i++) {
arch[i] = procs[i]->node->arch;
}
/* pack the values */
opal_dss.pack(&buf, arch, num_nodes, OPAL_INT32);
free(arch);
#endif
/* transfer the payload to the byte object */
opal_dss.unload(&buf, (void**)&boptr->bytes, &boptr->size);
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes)
{
int n, loc, k, diglen, namelen;
char *prefix, digits[10];
int32_t num_nodes, lastnode, endrange, i;
orte_nid_t *node;
uint8_t command, num_digs;
orte_nid_t **nd;
uint8_t incdec;
int32_t index, step;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
int32_t *arch;
#endif
opal_buffer_t buf;
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s decode:nidmap decoding nodemap",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* xfer the byte object to a buffer for unpacking */
/* load it into a buffer */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.load(&buf, bo->bytes, bo->size);
/* unpack number of nodes */
n=1;
opal_dss.unpack(&buf, &num_nodes, &n, OPAL_INT32);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s decode:nidmap decoding %d nodes with %d already loaded",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes, nodes->lowest_free));
/* is this greater than the number of entries in nodes? if so, then
* we will update the node array. if not, then we can return now
*/
if (num_nodes <= nodes->lowest_free) {
/* nothing more to do */
return ORTE_SUCCESS;
}
/* set the size of the nidmap storage so we minimize
* realloc's
*/
opal_pointer_array_set_size(nodes, num_nodes);
/* create the struct for the HNP's node */
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
node->name = NULL;
/* default the arch to our arch so that non-hetero
* case will yield correct behavior
*/
node->arch = orte_process_info.arch;
opal_pointer_array_set_item(nodes, 0, node);
/* unpack the name of the HNP's node */
n=1;
opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING);
/* unpack flag to see if this is a contiguous node map or not */
n=1;
opal_dss.unpack(&buf, &command, &n, OPAL_UINT8);
if (ORTE_CONTIG_NODE_CMD == command) {
/* unpack the prefix */
n=1;
opal_dss.unpack(&buf, &prefix, &n, OPAL_STRING);
/* the number of digits in the index */
n=1;
opal_dss.unpack(&buf, &num_digs, &n, OPAL_UINT8);
/* and the starting offset */
n=1;
opal_dss.unpack(&buf, &lastnode, &n, OPAL_INT32);
/* unpack increment/decrement flag */
n=1;
opal_dss.unpack(&buf, &incdec, &n, OPAL_INT8);
/* unpack the end of the range */
n=1;
opal_dss.unpack(&buf, &endrange, &n, OPAL_INT32);
/* setup loop params */
if (0 == incdec) {
endrange -= 1;
step = -1;
} else {
endrange += 1;
step = 1;
}
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s decode:nidmap:contig_nodes prefix %s num_digits %d offset %d endrange %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), prefix, num_digs, lastnode, endrange));
namelen = strlen(prefix) + num_digs + 1;
/* cycle through the ranges */
index = 1;
while (1) {
for (i=lastnode; i != endrange; i += step) {
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
/* allocate space for the nodename */
node->name = (char*)malloc(namelen);
memset(node->name, 0, namelen);
loc = snprintf(node->name, namelen, "%s", prefix);
diglen = num_digs - snprintf(digits, 10, "%d", i);
for (k=0; k < diglen && loc < namelen; k++) {
node->name[loc] = '0';
loc++;
}
strncat(node->name, digits, num_digs);
/* default the arch to our arch so that non-hetero
* case will yield correct behavior
*/
node->arch = orte_process_info.arch;
opal_pointer_array_set_item(nodes, index, node);
index++;
}
/* unpack start of new range */
n=1;
opal_dss.unpack(&buf, &lastnode, &n, OPAL_INT32);
/* if that is -1, then it flags no more ranges */
if (-1 == lastnode) {
goto arch;
}
n=1;
opal_dss.unpack(&buf, &endrange, &n, OPAL_INT32);
if (0 == incdec) {
endrange -= 1;
} else {
endrange += 1;
}
}
} else {
/* not contiguous - just loop over nodes and
* unpack the raw nodename
*/
for (i=1; i < num_nodes; i++) {
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
node->name = NULL;
/* default the arch to our arch so that non-hetero
* case will yield correct behavior
*/
node->arch = orte_process_info.arch;
opal_pointer_array_set_item(nodes, i, node);
/* unpack the node's name */
n=1;
opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING);
}
}
arch:
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
/* allocate space for the node arch */
arch = (int32_t*)malloc(num_nodes * 4);
/* unpack the values */
n=num_nodes;
opal_dss.unpack(&buf, arch, &n, OPAL_INT32);
/* transfer the data to the nodes */
nd = (orte_nid_t**)nodes->addr;
for (i=0; i < num_nodes; i++) {
nd[i]->arch = arch[i];
}
free(arch);
#endif
if (0 < opal_output_get_verbosity(orte_debug_output)) {
nd = (orte_nid_t**)nodes->addr;
for (i=0; i < num_nodes; i++) {
opal_output(0, "%s node[%d].name %s arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i,
(NULL == nd[i]) ? "NULL" : nd[i]->name,
(NULL == nd[i]) ? 0 : nd[i]->arch);
}
}
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
int orte_util_encode_pidmap(orte_job_t *jdata, opal_byte_object_t *boptr)
{
int32_t *nodes;
orte_proc_t **procs;
orte_vpid_t i;
int8_t *tmp, flag;
opal_buffer_t buf;
/* setup the working buffer */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* pack the number of procs */
opal_dss.pack(&buf, &jdata->num_procs, 1, ORTE_VPID);
/* allocate memory for the nodes */
nodes = (int32_t*)malloc(jdata->num_procs * 4);
/* transfer and pack the node info in one pack */
procs = (orte_proc_t**)jdata->procs->addr;
for (i=0; i < jdata->num_procs; i++) {
nodes[i] = procs[i]->node->index;
}
opal_dss.pack(&buf, nodes, jdata->num_procs, OPAL_INT32);
/* free node storage */
free(nodes);
/* allocate memory for the local_ranks */
tmp = (int8_t*)malloc(jdata->num_procs);
/* transfer and pack them in one pack */
for (i=0; i < jdata->num_procs; i++) {
tmp[i] = procs[i]->local_rank;
}
opal_dss.pack(&buf, tmp, jdata->num_procs, OPAL_UINT8);
/* transfer and pack the node ranks in one pack */
for (i=0; i < jdata->num_procs; i++) {
tmp[i] = procs[i]->node_rank;
}
opal_dss.pack(&buf, tmp, jdata->num_procs, OPAL_UINT8);
/* transfer and pack the app_idx in one pack */
for (i=0; i < jdata->num_procs; i++) {
tmp[i] = procs[i]->app_idx;
}
opal_dss.pack(&buf, tmp, jdata->num_procs, OPAL_INT8);
/* free the storage */
free(tmp);
/* are there cpu_list strings? */
if (jdata->map->cpu_lists) {
flag = (int)true;
opal_dss.pack(&buf, &flag, 1, OPAL_INT8);
for (i=0; i < jdata->num_procs; i++) {
opal_dss.pack(&buf, procs[i]->slot_list, 1, OPAL_STRING);
}
} else {
flag = (int)false;
opal_dss.pack(&buf, &flag, 1, OPAL_INT8);
}
/* transfer the payload to the byte object */
opal_dss.unload(&buf, (void**)&boptr->bytes, &boptr->size);
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
int orte_util_decode_pidmap(opal_byte_object_t *bo, orte_vpid_t *nprocs,
orte_pmap_t **procs, int8_t **app_idx,
char ***slot_str)
{
orte_vpid_t i, num_procs;
orte_pmap_t *pmap;
int32_t *nodes;
int8_t *tmp;
int8_t flag;
char **slots;
orte_std_cntr_t n;
opal_buffer_t buf;
/* xfer the byte object to a buffer for unpacking */
/* load it into a buffer */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.load(&buf, bo->bytes, bo->size);
/* unpack the number of procs */
n=1;
opal_dss.unpack(&buf, &num_procs, &n, ORTE_VPID);
*nprocs = num_procs;
/* allocate memory for the procs array */
pmap = (orte_pmap_t*)malloc(num_procs * sizeof(orte_pmap_t));
*procs = pmap;
/* allocate memory for the node info */
nodes = (int32_t*)malloc(num_procs * 4);
/* unpack it in one shot */
n=num_procs;
opal_dss.unpack(&buf, nodes, &n, OPAL_INT32);
/* store it */
for (i=0; i < num_procs; i++) {
pmap[i].node = nodes[i];
}
free(nodes);
/* allocate memory for local ranks */
tmp = (int8_t*)malloc(num_procs);
/* unpack them in one shot */
n=num_procs;
opal_dss.unpack(&buf, tmp, &n, OPAL_UINT8);
/* store them */
for (i=0; i < num_procs; i++) {
pmap[i].local_rank = tmp[i];
}
/* unpack node ranks in one shot */
n=num_procs;
opal_dss.unpack(&buf, tmp, &n, OPAL_UINT8);
/* store it */
for (i=0; i < num_procs; i++) {
pmap[i].node_rank = tmp[i];
}
/* only daemons/HNPs need the rest of the data, so if
* we aren't one of those, we are done!
*/
if (!orte_process_info.hnp &&
!orte_process_info.daemon) {
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
/* unpack app_idx in one shot */
n=num_procs;
opal_dss.unpack(&buf, tmp, &n, OPAL_INT8);
/* hand the array back to the caller */
*app_idx = tmp;
/* unpack flag to indicate if slot_strings are present */
n=1;
opal_dss.unpack(&buf, &flag, &n, OPAL_INT8);
if (flag) {
/* allocate space */
slots = (char**)malloc(num_procs * sizeof(char*));
for (i=0; i < num_procs; i++) {
n=1;
opal_dss.unpack(&buf, &slots[i], &n, OPAL_STRING);
}
*slot_str = slots;
}
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}

56
orte/util/nidmap.h Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* Populates global structure with system-specific information.
*
* Notes: add limits.h, compute size of integer and other types via sizeof(type)*CHAR_BIT
*
*/
#ifndef _ORTE_NIDMAP_H_
#define _ORTE_NIDMAP_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/dss/dss_types.h"
#include "orte/runtime/orte_globals.h"
BEGIN_C_DECLS
#define ORTE_MAX_NODE_PREFIX 50
#define ORTE_CONTIG_NODE_CMD 0x01
#define ORTE_NON_CONTIG_NODE_CMD 0x02
ORTE_DECLSPEC int orte_util_encode_nodemap(opal_byte_object_t *boptr);
ORTE_DECLSPEC int orte_util_decode_nodemap(opal_byte_object_t *boptr, opal_pointer_array_t *nodes);
ORTE_DECLSPEC int orte_util_encode_pidmap(orte_job_t *jdata, opal_byte_object_t *boptr);
ORTE_DECLSPEC int orte_util_decode_pidmap(opal_byte_object_t *boptr, orte_vpid_t *num_procs,
orte_pmap_t **procs, int8_t **app_idx,
char ***slot_str);
END_C_DECLS
#endif

Просмотреть файл

@ -32,6 +32,7 @@
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "opal/util/arch.h"
#include "orte/runtime/orte_globals.h"
@ -47,15 +48,19 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
/* ,app_num = */ -1,
/* ,universe_size = */ -1,
/* .num_procs = */ 1,
/* .local_rank = */ ORTE_VPID_INVALID,
/* .local_rank = */ UINT8_MAX,
/* .node_rank = */ UINT8_MAX,
/* .num_local_procs = */ 0,
/* .nodeid = */ ORTE_NODEID_INVALID,
/* .local_procs = */ NULL,
/* .nodename = */ NULL,
/* .arch = */ 0,
/* .pid = */ 0,
/* .singleton = */ false,
/* .daemon = */ false,
/* .hnp = */ false,
/* .tool = */ false,
/* .mpi_proc = */ false,
/* .sync_buf = */ NULL,
/* .tmpdir_base = */ NULL,
/* .top_session_dir = */ NULL,
/* .job_session_dir = */ NULL,
@ -136,18 +141,18 @@ int orte_proc_info(void)
/* get the process id */
orte_process_info.pid = getpid();
/* get the nodeid */
mca_base_param_reg_int_name("orte", "nodeid",
"ORTE ID for this node",
false, false, ORTE_NODEID_INVALID, &tmp);
orte_process_info.nodeid = (orte_nodeid_t)tmp;
/* get the nodename */
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
/* overwrite with value if in environment */
mca_base_param_reg_string_name("orte", "base_nodename",
"Name of this node, as provided in environment",
false, false, hostname, &(orte_process_info.nodename));
orte_process_info.nodename = strdup(hostname);
/* get the arch */
if (ORTE_SUCCESS != opal_arch_compute_local_id(&orte_process_info.arch)) {
opal_output(0, "Process on node %s could not obtain local architecture - aborting", orte_process_info.nodename);
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
}
/* setup the sync buffer */
orte_process_info.sync_buf = OBJ_NEW(opal_buffer_t);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -33,6 +33,8 @@
#include <sys/types.h>
#endif
#include "opal/dss/dss_types.h"
BEGIN_C_DECLS
/**
@ -54,15 +56,19 @@ struct orte_proc_info_t {
orte_std_cntr_t app_num; /**< our index into the app_context array */
orte_std_cntr_t universe_size; /**< the size of the universe we are in */
orte_vpid_t num_procs; /**< number of processes in this job */
orte_vpid_t local_rank; /**< local rank on this node */
uint8_t local_rank; /**< local rank on this node */
uint8_t node_rank; /**< local rank on this node */
orte_std_cntr_t num_local_procs; /**< total number of procs on this node */
orte_nodeid_t nodeid; /**< numerical id for this node */
char *local_procs; /**< comma-delimited list of local procs */
char *nodename; /**< string name for this node */
uint32_t arch; /**< arch for this node */
pid_t pid; /**< Local process ID for this process */
bool singleton; /**< I am a singleton */
bool daemon; /**< Indicate whether or not I am a daemon */
bool hnp; /**< Indicate whether or not I am the HNP (orterun) */
bool tool; /**< I am a tool or not */
bool mpi_proc; /**< I am an MPI process */
opal_buffer_t *sync_buf; /**< buffer to store sync response */
/* The session directory has the form
* <prefix>/<openmpi-sessions-user>/<jobid>/<procid>, where the prefix
* can either be provided by the user via the