Repair the MPI-2 dynamic operations. This includes:
1. repair of the linear and direct routed modules 2. repair of the ompi/pubsub/orte module to correctly init routes to the ompi-server, and correctly handle failure to correctly parse the provided ompi-server URI 3. modification of orterun to accept both "file" and "FILE" for designating where the ompi-server URI is to be found - purely a convenience feature 4. resolution of a message ordering problem during the connect/accept handshake that allowed the "send-first" proc to attempt to send to the "recv-first" proc before the HNP had actually updated its routes. Let this be a further reminder to all - message ordering is NOT guaranteed in the OOB 5. Repair the ompi/dpm/orte module to correctly init routes during connect/accept. Reminder to all: messages sent to procs in another job family (i.e., started by a different mpirun) are ALWAYS routed through the respective HNPs. As per the comments in orte/routed, this is REQUIRED to maintain connect/accept (where only the root proc on each side is capable of init'ing the routes), allow communication between mpirun's using different routing modules, and to minimize connections on tools such as ompi-server. It is all taken care of "under the covers" by the OOB to ensure that a route back to the sender is maintained, even when the different mpirun's are using different routed modules. 6. corrections in the orte/odls to ensure proper identification of daemons participating in a dynamic launch 7. corrections in build/nidmap to support update of an existing nidmap during dynamic launch 8. corrected implementation of the update_arch function in the ESS, along with consolidation of a number of ESS operations into base functions for easier maintenance. The ability to support info from multiple jobs was added, although we don't currently do so - this will come later to support further fault recovery strategies 9. minor updates to several functions to remove unnecessary and/or no longer used variables and envar's, add some debugging output, etc. 10. addition of a new macro ORTE_PROC_IS_DAEMON that resolves to true if the provided proc is a daemon There is still more cleanup to be done for efficiency, but this at least works. Tested on single-node Mac, multi-node SLURM via odin. Tests included connect/accept, publish/lookup/unpublish, comm_spawn, comm_spawn_multiple, and singleton comm_spawn. Fixes ticket #1256 This commit was SVN r18804.
Этот коммит содержится в:
родитель
1ed465326b
Коммит
ba5498cdc6
@ -81,6 +81,10 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#if HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
#include "ompi/attribute/attribute.h"
|
||||
@ -105,6 +109,8 @@ static int set_f(int keyval, MPI_Fint value);
|
||||
int ompi_attr_create_predefined(void)
|
||||
{
|
||||
int ret;
|
||||
char *univ_size;
|
||||
int usize;
|
||||
|
||||
/* Create all the keyvals */
|
||||
|
||||
@ -158,10 +164,11 @@ int ompi_attr_create_predefined(void)
|
||||
|
||||
/* If the universe size is set, then use it. Otherwise default
|
||||
* to the size of MPI_COMM_WORLD */
|
||||
if(orte_process_info.universe_size > 0) {
|
||||
ret = set_f(MPI_UNIVERSE_SIZE, orte_process_info.universe_size);
|
||||
} else {
|
||||
univ_size = getenv("OMPI_UNIVERSE_SIZE");
|
||||
if (NULL == univ_size || (usize = strtol(univ_size, NULL, 0)) <= 0) {
|
||||
ret = set_f(MPI_UNIVERSE_SIZE, ompi_comm_size(MPI_COMM_WORLD));
|
||||
} else {
|
||||
ret = set_f(MPI_UNIVERSE_SIZE, usize);
|
||||
}
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
|
@ -92,7 +92,6 @@ int ompi_dpm_base_null_dyn_init(void);
|
||||
int ompi_dpm_base_null_dyn_finalize (void);
|
||||
void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm);
|
||||
int ompi_dpm_base_null_open_port(char *port_name, orte_rml_tag_t given_tag);
|
||||
char* ompi_dpm_base_null_parse_port (char *port_name, orte_rml_tag_t *tag);
|
||||
int ompi_dpm_base_null_close_port(char *port_name);
|
||||
|
||||
/* useful globals */
|
||||
|
@ -70,11 +70,6 @@ int ompi_dpm_base_null_open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
char* ompi_dpm_base_null_parse_port (char *port_name, orte_rml_tag_t *tag)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int ompi_dpm_base_null_close_port(char *port_name)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
|
@ -42,7 +42,6 @@ OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm = {
|
||||
ompi_dpm_base_null_dyn_finalize,
|
||||
ompi_dpm_base_null_mark_dyncomm,
|
||||
ompi_dpm_base_null_open_port,
|
||||
ompi_dpm_base_null_parse_port,
|
||||
ompi_dpm_base_null_close_port,
|
||||
NULL
|
||||
};
|
||||
|
@ -115,11 +115,6 @@ typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm
|
||||
*/
|
||||
typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name, orte_rml_tag_t tag);
|
||||
|
||||
/*
|
||||
* Parse a port name to get the contact info and tag
|
||||
*/
|
||||
typedef char* (*ompi_dpm_base_module_parse_port_fn_t)(char *port_name, orte_rml_tag_t *tag);
|
||||
|
||||
/*
|
||||
* Close a port
|
||||
*/
|
||||
@ -150,8 +145,6 @@ struct ompi_dpm_base_module_1_0_0_t {
|
||||
ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm;
|
||||
/* open port */
|
||||
ompi_dpm_base_module_open_port_fn_t open_port;
|
||||
/* parse port */
|
||||
ompi_dpm_base_module_parse_port_fn_t parse_port;
|
||||
/* close port */
|
||||
ompi_dpm_base_module_close_port_fn_t close_port;
|
||||
/* finalize */
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_data_server.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -51,6 +52,18 @@
|
||||
/* Local static variables */
|
||||
static opal_mutex_t ompi_dpm_port_mutex;
|
||||
static orte_rml_tag_t next_tag;
|
||||
static bool recv_completed;
|
||||
static opal_buffer_t *cabuf=NULL;
|
||||
static orte_process_name_t carport;
|
||||
|
||||
/* Local static functions */
|
||||
static void recv_cb(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata);
|
||||
static void process_cb(int fd, short event, void *data);
|
||||
static int parse_port_name(char *port_name,
|
||||
orte_process_name_t *rproc,
|
||||
orte_rml_tag_t *tag);
|
||||
|
||||
/* API functions */
|
||||
static int init(void);
|
||||
@ -65,7 +78,6 @@ static int spawn(int count, char **array_of_commands,
|
||||
char *port_name);
|
||||
static int dyn_init(void);
|
||||
static int open_port(char *port_name, orte_rml_tag_t given_tag);
|
||||
static char *parse_port (char *port_name, orte_rml_tag_t *tag);
|
||||
static int close_port(char *port_name);
|
||||
static int finalize(void);
|
||||
|
||||
@ -81,7 +93,6 @@ ompi_dpm_base_module_t ompi_dpm_orte_module = {
|
||||
ompi_dpm_base_dyn_finalize,
|
||||
ompi_dpm_base_mark_dyncomm,
|
||||
open_port,
|
||||
parse_port,
|
||||
close_port,
|
||||
finalize
|
||||
};
|
||||
@ -98,11 +109,6 @@ static int init(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int get_rport (orte_process_name_t *port,
|
||||
int send_first, struct ompi_proc_t *proc,
|
||||
orte_rml_tag_t tag, orte_process_name_t *rport);
|
||||
|
||||
|
||||
static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
char *port_string, bool send_first,
|
||||
ompi_communicator_t **newcomm )
|
||||
@ -116,7 +122,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
ompi_communicator_t *newcomp=MPI_COMM_NULL;
|
||||
ompi_proc_t **rprocs=NULL;
|
||||
ompi_group_t *group=comm->c_local_group;
|
||||
orte_process_name_t port, *rport=NULL, tmp_port_name;
|
||||
orte_process_name_t port;
|
||||
orte_rml_tag_t tag=ORTE_RML_TAG_INVALID;
|
||||
opal_buffer_t *nbuf=NULL, *nrbuf=NULL;
|
||||
ompi_proc_t **proc_list=NULL, **new_proc_list;
|
||||
@ -139,30 +145,13 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
* set us up to communicate with it
|
||||
*/
|
||||
if (NULL != port_string && 0 < strlen(port_string)) {
|
||||
char *rml_uri;
|
||||
/* separate the string into the RML URI and tag */
|
||||
rml_uri = parse_port(port_string, &tag);
|
||||
/* extract the process name from the rml_uri */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &port, NULL))) {
|
||||
free(rml_uri);
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* update the local hash table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(rml_uri);
|
||||
return rc;
|
||||
}
|
||||
/* update the route as "direct" - the selected routed
|
||||
* module will handle this appropriate to its methods
|
||||
/* separate the string into the RML URI and tag - this function performs
|
||||
* whatever route initialization is required by the selected routed module
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&port, &port))) {
|
||||
if (ORTE_SUCCESS != (rc = parse_port_name(port_string, &port, &tag))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(rml_uri);
|
||||
return rc;
|
||||
}
|
||||
free(rml_uri);
|
||||
}
|
||||
|
||||
/* tell the progress engine to tick the event library more
|
||||
@ -170,12 +159,21 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
opal_progress_event_users_increment();
|
||||
|
||||
if ( rank == root ) {
|
||||
/* The process receiving first does not have yet the contact
|
||||
information of the remote process. Therefore, we have to
|
||||
exchange that.
|
||||
*/
|
||||
|
||||
if(!OMPI_GROUP_IS_DENSE(group)) {
|
||||
/* Generate the message buffer containing the number of processes and the list of
|
||||
participating processes */
|
||||
nbuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == nbuf) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if(OMPI_GROUP_IS_DENSE(group)) {
|
||||
ompi_proc_pack(group->grp_proc_pointers, size, nbuf);
|
||||
} else {
|
||||
proc_list = (ompi_proc_t **) calloc (group->grp_proc_count,
|
||||
sizeof (ompi_proc_t *));
|
||||
for(i=0 ; i<group->grp_proc_count ; i++)
|
||||
@ -185,48 +183,15 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
"%s dpm:orte:connect_accept adding %s to proc list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_list[i]->proc_name)));
|
||||
}
|
||||
|
||||
if ( OMPI_COMM_JOIN_TAG != tag ) {
|
||||
if(OMPI_GROUP_IS_DENSE(group)){
|
||||
rc = get_rport(&port,send_first,
|
||||
group->grp_proc_pointers[rank], tag,
|
||||
&tmp_port_name);
|
||||
}
|
||||
else {
|
||||
rc = get_rport(&port,send_first,
|
||||
proc_list[rank], tag,
|
||||
&tmp_port_name);
|
||||
}
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
rport = &tmp_port_name;
|
||||
} else {
|
||||
rport = &port;
|
||||
}
|
||||
|
||||
/* Generate the message buffer containing the number of processes and the list of
|
||||
participating processes */
|
||||
nbuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == nbuf) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if(OMPI_GROUP_IS_DENSE(group)) {
|
||||
ompi_proc_pack(group->grp_proc_pointers, size, nbuf);
|
||||
}
|
||||
else {
|
||||
ompi_proc_pack(proc_list, size, nbuf);
|
||||
}
|
||||
|
||||
nrbuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == nrbuf ) {
|
||||
if (NULL != cabuf) {
|
||||
OBJ_RELEASE(cabuf);
|
||||
}
|
||||
|
||||
cabuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == cabuf ) {
|
||||
rc = OMPI_ERROR;
|
||||
goto exit;
|
||||
}
|
||||
@ -236,19 +201,41 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept sending first to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(rport)));
|
||||
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
|
||||
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
|
||||
ORTE_NAME_PRINT(&port)));
|
||||
rc = orte_rml.send_buffer(&port, nbuf, tag, 0);
|
||||
/* setup to recv */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept waiting for response",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
recv_completed = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag,
|
||||
ORTE_RML_NON_PERSISTENT, recv_cb, NULL);
|
||||
/* wait for response */
|
||||
ORTE_PROGRESSED_WAIT(recv_completed, 0, 1);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept got data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&carport)));
|
||||
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept recving first from %s",
|
||||
"%s dpm:orte:connect_accept recving first",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
/* setup to recv */
|
||||
recv_completed = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag,
|
||||
ORTE_RML_NON_PERSISTENT, recv_cb, NULL);
|
||||
/* wait for response */
|
||||
ORTE_PROGRESSED_WAIT(recv_completed, 0, 1);
|
||||
/* now send our info */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept sending info to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(rport)));
|
||||
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
|
||||
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
|
||||
ORTE_NAME_PRINT(&carport)));
|
||||
rc = orte_rml.send_buffer(&carport, nbuf, tag, 0);
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unload(cabuf, &rnamebuf, &rnamebuflen))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto exit;
|
||||
}
|
||||
@ -262,6 +249,9 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
rnamebuflen_int = (int)rnamebuflen;
|
||||
|
||||
/* bcast the buffer-length to all processes in the local comm */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept bcast buffer length",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm,
|
||||
comm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
@ -283,6 +273,9 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
adds processes, which were not known yet to our
|
||||
process pool.
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept bcast proc list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm,
|
||||
comm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
@ -309,6 +302,10 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
goto exit;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept unpacked %d new procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_proc_len));
|
||||
|
||||
/* If we added new procs, we need to do the modex and then call
|
||||
PML add_procs */
|
||||
if (new_proc_len > 0) {
|
||||
@ -419,7 +416,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
rport, /* remote leader */
|
||||
&carport, /* remote leader */
|
||||
OMPI_COMM_CID_INTRA_OOB, /* mode */
|
||||
send_first ); /* send or recv first */
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
@ -431,7 +428,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
rport, /* remote leader */
|
||||
&carport, /* remote leader */
|
||||
OMPI_COMM_CID_INTRA_OOB, /* mode */
|
||||
send_first, /* send or recv first */
|
||||
0); /* sync_flag */
|
||||
@ -476,89 +473,6 @@ static void disconnect(ompi_communicator_t *comm)
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
/*
|
||||
* This routine is necessary, since in the connect/accept case, the processes
|
||||
* executing the connect operation have the OOB contact information of the
|
||||
* leader of the remote group, however, the processes executing the
|
||||
* accept get their own port_name = OOB contact information passed in as
|
||||
* an argument. This is however useless.
|
||||
*
|
||||
* Therefore, the two root processes exchange this information at this
|
||||
* point.
|
||||
*
|
||||
*/
|
||||
static int get_rport(orte_process_name_t *port, int send_first,
|
||||
ompi_proc_t *proc, orte_rml_tag_t tag,
|
||||
orte_process_name_t *rport_name)
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t num_vals;
|
||||
|
||||
if ( send_first ) {
|
||||
opal_buffer_t *sbuf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
|
||||
"%s dpm:orte:get_rport sending to %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(port), (int)tag));
|
||||
|
||||
sbuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == sbuf) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(sbuf, &(proc->proc_name), 1, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(sbuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = orte_rml.send_buffer(port, sbuf, tag, 0);
|
||||
OBJ_RELEASE(sbuf);
|
||||
if ( 0 > rc ) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*rport_name = *port;
|
||||
} else {
|
||||
opal_buffer_t *rbuf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
|
||||
"%s dpm:orte:get_rport waiting to recv on tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)tag));
|
||||
|
||||
rbuf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == rbuf) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, rbuf, tag, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(rbuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
num_vals = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(rbuf, rport_name, &num_vals, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(rbuf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_RELEASE(rbuf);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
|
||||
"%s dpm:orte:get_rport recv'd name %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(rport_name)));
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int spawn(int count, char **array_of_commands,
|
||||
char ***array_of_argv,
|
||||
int *array_of_maxprocs,
|
||||
@ -787,76 +701,121 @@ static int spawn(int count, char **array_of_commands,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* optionally can provide a tag to be used - otherwise, we supply the
|
||||
/*
|
||||
* The port_name is constructed to support the ability
|
||||
* to route messages between different jobs. Messages
|
||||
* between job families are routed via their respective HNPs
|
||||
* to reduce connection count and to support connect/accept.
|
||||
* Thus, the port_name consists of three fields:
|
||||
* (a) the contact info of the process opening the port. This
|
||||
* is provided in case the routed module wants to communicate
|
||||
* directly between the procs.
|
||||
* (b) the tag of the port. The reason for adding the tag is
|
||||
* to make the port unique for multi-threaded scenarios.
|
||||
* (c) the contact info for the job's HNP. This will be
|
||||
* used to route messages between job families
|
||||
*
|
||||
* Construction of the port name is done here - as opposed to
|
||||
* in the routed module itself - because two mpiruns using different
|
||||
* routed modules could exchange the port name (via pubsub). The
|
||||
* format of the port name must, therefore, be universal.
|
||||
*
|
||||
* Optionally can provide a tag to be used - otherwise, we supply the
|
||||
* next dynamically assigned tag
|
||||
*/
|
||||
static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
{
|
||||
char *rml_uri, *ptr, tag[12];
|
||||
int rc;
|
||||
char *rml_uri=NULL;
|
||||
int rc, len;
|
||||
char tag[12];
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_dpm_port_mutex);
|
||||
|
||||
/*
|
||||
* The port_name is equal to the OOB-contact information
|
||||
* and an RML tag. The reason for adding the tag is
|
||||
* to make the port unique for multi-threaded scenarios.
|
||||
*/
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
rc = ORTE_ERR_NOT_AVAILABLE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (NULL == (rml_uri = orte_rml.get_contact_info())) {
|
||||
rc = OMPI_ERR_NOT_AVAILABLE;
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_RML_TAG_INVALID == given_tag) {
|
||||
snprintf(tag, 12, "%d", (int)next_tag);
|
||||
snprintf(tag, 12, "%d", next_tag);
|
||||
next_tag++;
|
||||
} else {
|
||||
/* use the given tag */
|
||||
snprintf(tag, 12, "%d", (int)given_tag);
|
||||
snprintf(tag, 12, "%d", given_tag);
|
||||
}
|
||||
|
||||
/* if the overall port name is too long, we try to truncate the rml uri */
|
||||
rc = 0;
|
||||
while ((strlen(rml_uri)+strlen(tag)) > (MPI_MAX_PORT_NAME-2)) {
|
||||
/* if we have already tried several times, punt! */
|
||||
if (4 < rc) {
|
||||
free(rml_uri);
|
||||
rc = OMPI_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
/* find the trailing uri and truncate there */
|
||||
ptr = strrchr(rml_uri, ';');
|
||||
*ptr = '\0';
|
||||
++rc;
|
||||
|
||||
len = strlen(orte_process_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag);
|
||||
|
||||
/* if the overall port name is too long, we abort */
|
||||
if (len > (MPI_MAX_PORT_NAME-1)) {
|
||||
rc = OMPI_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
snprintf (port_name, MPI_MAX_PORT_NAME, "%s:%s", rml_uri, tag);
|
||||
|
||||
free ( rml_uri );
|
||||
/* assemble the port name */
|
||||
snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_process_info.my_hnp_uri, rml_uri, tag);
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
if (NULL != rml_uri) {
|
||||
free(rml_uri);
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* takes a port_name and separates it into the RML URI
|
||||
* and the tag
|
||||
*/
|
||||
static char *parse_port (char *port_name, orte_rml_tag_t *tag)
|
||||
|
||||
/* HANDLE ACK MESSAGES FROM AN HNP */
|
||||
static bool ack_recvd;
|
||||
|
||||
static void release_ack(int fd, short event, void *data)
|
||||
{
|
||||
char *tmp_string, *ptr;
|
||||
|
||||
/* copy the RML uri so we can return a malloc'd value
|
||||
* that can later be free'd
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
ack_recvd = true;
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void recv_ack(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
tmp_string = strdup(port_name);
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, release_ack);
|
||||
}
|
||||
|
||||
|
||||
static int parse_port_name(char *port_name,
|
||||
orte_process_name_t *rproc,
|
||||
orte_rml_tag_t *tag)
|
||||
{
|
||||
char *tmpstring=NULL, *ptr, *rml_uri=NULL;
|
||||
orte_rml_cmd_flag_t cmd = ORTE_RML_UPDATE_CMD;
|
||||
int rc;
|
||||
opal_buffer_t route;
|
||||
|
||||
/* don't mangle the port name */
|
||||
tmpstring = strdup(port_name);
|
||||
|
||||
/* find the ':' demarking the RML tag we added to the end */
|
||||
if (NULL == (ptr = strrchr(tmp_string, ':'))) {
|
||||
free(tmp_string);
|
||||
return NULL;
|
||||
if (NULL == (ptr = strrchr(tmpstring, ':'))) {
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* terminate the port_name at that location */
|
||||
@ -866,14 +825,95 @@ static char *parse_port (char *port_name, orte_rml_tag_t *tag)
|
||||
/* convert the RML tag */
|
||||
sscanf(ptr,"%d", (int*)tag);
|
||||
|
||||
/* see if the length of the RML uri is too long - if so,
|
||||
* truncate it
|
||||
*/
|
||||
if (strlen(tmp_string) > MPI_MAX_PORT_NAME) {
|
||||
tmp_string[MPI_MAX_PORT_NAME] = '\0';
|
||||
/* now split out the second field - the uri of the remote proc */
|
||||
if (NULL == (ptr = strchr(tmpstring, '+'))) {
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
*ptr = '\0';
|
||||
ptr++;
|
||||
|
||||
/* save that info */
|
||||
rml_uri = strdup(ptr);
|
||||
|
||||
/* extract the originating proc's name */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(ptr, rproc, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if this proc is part of my job family, then I need to
|
||||
* update my RML contact hash table and my routes
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(rproc->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm_parse_port: same job family - updating route",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return tmp_string;
|
||||
/* set the contact info into the hash table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(rproc, rproc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* the proc must be part of another job family. In this case, we
|
||||
* will route any messages to the proc through our HNP. We need
|
||||
* to update the HNP, though, so it knows how to reach the
|
||||
* HNP of the rproc's job family
|
||||
*/
|
||||
/* pack a cmd so the buffer can be unpacked correctly */
|
||||
OBJ_CONSTRUCT(&route, opal_buffer_t);
|
||||
opal_dss.pack(&route, &cmd, 1, ORTE_RML_CMD);
|
||||
|
||||
/* pack the HNP uri */
|
||||
opal_dss.pack(&route, &tmpstring, 1, OPAL_STRING);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm_parse_port: %s in diff job family - sending update to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(rproc),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &route,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&route);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* wait right here until the HNP acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
OBJ_DESTRUCT(&route);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm_parse_port: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* our get_route function automatically routes all messages for
|
||||
* other job families via the HNP, so nothing more to do here
|
||||
*/
|
||||
rc = ORTE_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
/* release the tmp storage */
|
||||
if (NULL != tmpstring) {
|
||||
free(tmpstring);
|
||||
}
|
||||
if (NULL != rml_uri) {
|
||||
free(rml_uri);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int close_port(char *port_name)
|
||||
@ -941,3 +981,37 @@ static int finalize(void)
|
||||
}
|
||||
|
||||
|
||||
static void recv_cb(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release when processed - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_cb);
|
||||
|
||||
|
||||
}
|
||||
static void process_cb(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
|
||||
/* copy the payload to the global buffer */
|
||||
opal_dss.copy_payload(cabuf, mev->buffer);
|
||||
|
||||
/* flag the identity of the remote proc */
|
||||
carport.jobid = mev->sender.jobid;
|
||||
carport.vpid = mev->sender.vpid;
|
||||
|
||||
/* release the event */
|
||||
OBJ_RELEASE(mev);
|
||||
|
||||
/* flag complete */
|
||||
recv_completed = true;
|
||||
}
|
||||
|
@ -91,16 +91,22 @@ static void setup_server(void)
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &cmd, 1, ORTE_RML_CMD);
|
||||
opal_dss.pack(&buf, &mca_pubsub_orte_component.server_uri, 1, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &buf))) {
|
||||
/* extract the server's name so we have its jobid */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri,
|
||||
&mca_pubsub_orte_component.server, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
mca_pubsub_orte_component.server_found = false;
|
||||
return;
|
||||
}
|
||||
/* init routes to the server's job */
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.init_routes(mca_pubsub_orte_component.server.jobid, &buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
mca_pubsub_orte_component.server_found = false;
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* extract the server's name */
|
||||
orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri, &mca_pubsub_orte_component.server, NULL);
|
||||
|
||||
/* flag the server as found */
|
||||
mca_pubsub_orte_component.server_found = true;
|
||||
|
@ -453,6 +453,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
|
||||
if (isnew) {
|
||||
newprocs[newprocs_len++] = plist[i];
|
||||
|
||||
/* update all the values */
|
||||
plist[i]->proc_arch = new_arch;
|
||||
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
|
@ -60,7 +60,7 @@ orte_ess_base_module_t orte_ess_alps_module = {
|
||||
};
|
||||
|
||||
static opal_pointer_array_t nidmap;
|
||||
static orte_pmap_t *pmap;
|
||||
static opal_pointer_array_t jobmap;
|
||||
static orte_vpid_t nprocs;
|
||||
|
||||
|
||||
@ -68,6 +68,7 @@ static int rte_init(char flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
orte_jmap_t *jmap;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
@ -108,9 +109,15 @@ static int rte_init(char flags)
|
||||
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||
|
||||
/* setup array of jmaps */
|
||||
OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1);
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
opal_pointer_array_add(&jobmap, jmap);
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
||||
&nidmap, &pmap, &nprocs))) {
|
||||
&nidmap, &jmap->pmap, &nprocs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_build_nidmap";
|
||||
goto error;
|
||||
@ -131,6 +138,7 @@ static int rte_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
orte_nid_t **nids;
|
||||
orte_jmap_t **jmaps;
|
||||
int32_t i;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
@ -145,19 +153,18 @@ static int rte_finalize(void)
|
||||
}
|
||||
} else {
|
||||
/* otherwise, I must be an application process - deconstruct
|
||||
* my nidmap arrays
|
||||
*/
|
||||
* my nidmap and jobmap arrays
|
||||
*/
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (NULL != nids[i]->name) {
|
||||
free(nids[i]->name);
|
||||
}
|
||||
for (i=0; i < nidmap.size && NULL != nids[i]; i++) {
|
||||
OBJ_RELEASE(nids[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&nidmap);
|
||||
free(pmap);
|
||||
jmaps = (orte_jmap_t**)jobmap.addr;
|
||||
for (i=0; i < jobmap.size && NULL != jmaps[i]; i++) {
|
||||
OBJ_RELEASE(jmaps[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&jobmap);
|
||||
|
||||
/* use the default procedure to finish */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
|
||||
@ -170,7 +177,14 @@ static int rte_finalize(void)
|
||||
|
||||
static bool proc_is_local(orte_process_name_t *proc)
|
||||
{
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -187,112 +201,50 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's hostname */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->name));
|
||||
nid->name));
|
||||
|
||||
return nids[node]->name;
|
||||
return nid->name;
|
||||
}
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's arch */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return 0;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->arch));
|
||||
nid->arch));
|
||||
|
||||
return nids[node]->arch;
|
||||
return nid->arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
node = pmap[proc->vpid].node;
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: updating proc %s to arch %0x",
|
||||
@ -300,32 +252,45 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
nid->arch = arch;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].local_rank));
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap[proc->vpid].local_rank;
|
||||
return pmap->local_rank;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].node_rank));
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap[proc->vpid].node_rank;
|
||||
return pmap->node_rank;
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,7 +34,7 @@ libmca_ess_la_SOURCES += \
|
||||
base/ess_base_std_tool.c \
|
||||
base/ess_base_std_app.c \
|
||||
base/ess_base_std_orted.c \
|
||||
base/ess_base_build_nidmap.c \
|
||||
base/ess_base_nidmap.c \
|
||||
base/ess_base_std_prolog.c
|
||||
endif
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
@ -79,9 +80,18 @@ ORTE_DECLSPEC int orte_ess_base_tool_finalize(void);
|
||||
ORTE_DECLSPEC int orte_ess_base_orted_setup(void);
|
||||
ORTE_DECLSPEC int orte_ess_base_orted_finalize(void);
|
||||
|
||||
/*
|
||||
* Job/nid/pmap support
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_ess_base_build_nidmap(opal_buffer_t *buffer,
|
||||
opal_pointer_array_t *nidmap,
|
||||
orte_pmap_t **pmap, orte_vpid_t *num_procs);
|
||||
opal_value_array_t *pmap, orte_vpid_t *num_procs);
|
||||
|
||||
ORTE_DECLSPEC orte_pmap_t* orte_ess_base_lookup_pmap(opal_pointer_array_t *jobmap, orte_process_name_t *proc);
|
||||
|
||||
ORTE_DECLSPEC orte_nid_t* orte_ess_base_lookup_nid(opal_pointer_array_t *nidmap,
|
||||
opal_pointer_array_t *jobmap,
|
||||
orte_process_name_t *proc);
|
||||
|
||||
/*
|
||||
* Put functions
|
||||
|
@ -48,14 +48,5 @@ int orte_ess_env_get(void)
|
||||
}
|
||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to an invalid value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
mca_base_param_reg_int_name("orte", "ess_local_rank",
|
||||
"Used to discover the local rank of a process on a node",
|
||||
true, false, (int)ORTE_VPID_INVALID, &num_procs);
|
||||
orte_process_info.local_rank = (orte_vpid_t)num_procs;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -34,7 +35,7 @@
|
||||
|
||||
int orte_ess_base_build_nidmap(opal_buffer_t *buffer,
|
||||
opal_pointer_array_t *nidmap,
|
||||
orte_pmap_t **pmap, orte_vpid_t *num_procs)
|
||||
opal_value_array_t *pmap, orte_vpid_t *num_procs)
|
||||
{
|
||||
int rc;
|
||||
opal_byte_object_t *bo;
|
||||
@ -79,3 +80,81 @@ int orte_ess_base_build_nidmap(opal_buffer_t *buffer,
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
orte_pmap_t* orte_ess_base_lookup_pmap(opal_pointer_array_t *jobmap, orte_process_name_t *proc)
|
||||
{
|
||||
int i;
|
||||
orte_jmap_t **jmaps;
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
jmaps = (orte_jmap_t**)jobmap->addr;
|
||||
for (i=0; i < jobmap->size && NULL != jmaps[i]; i++) {
|
||||
if (proc->jobid == jmaps[i]->job) {
|
||||
pmap = (orte_pmap_t*)opal_value_array_get_item(&jmaps[i]->pmap, proc->vpid);
|
||||
return pmap;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessarily correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static orte_nid_t* find_daemon_node(opal_pointer_array_t *nidmap,
|
||||
orte_process_name_t *proc)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap->addr;
|
||||
for (i=0; i < nidmap->size && NULL != nids[i]; i++) {
|
||||
if (nids[i]->daemon == proc->vpid) {
|
||||
return nids[i];
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
orte_nid_t* orte_ess_base_lookup_nid(opal_pointer_array_t *nidmap,
|
||||
opal_pointer_array_t *jobmap,
|
||||
orte_process_name_t *proc)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
orte_nid_t **nids;
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON(proc->jobid)) {
|
||||
if (ORTE_JOB_FAMILY(proc->jobid) !=
|
||||
ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
||||
return NULL;
|
||||
}
|
||||
/* looking for a daemon in my family */
|
||||
if (NULL == (nid = find_daemon_node(nidmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
}
|
||||
return nid;
|
||||
}
|
||||
|
||||
/* looking for an application proc */
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(jobmap, proc))) {
|
||||
opal_output(0, "proc: %s not found", ORTE_NAME_PRINT(proc));
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nidmap->size < pmap->node ||
|
||||
pmap->node < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
nids = (orte_nid_t**)nidmap->addr;
|
||||
return nids[pmap->node];
|
||||
}
|
||||
|
@ -76,13 +76,6 @@ int orte_ess_env_put(orte_std_cntr_t num_procs,
|
||||
opal_setenv("OMPI_COMM_WORLD_SIZE", value, true, env);
|
||||
free(value);
|
||||
|
||||
asprintf(&value, "%ld", (long) num_local_procs);
|
||||
if(NULL == (param = mca_base_param_environ_variable("orte","ess","num_local_procs"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
/* users would appreciate being given a public environmental variable
|
||||
* that also represents this value - something MPI specific - so
|
||||
* do that here.
|
||||
@ -90,6 +83,7 @@ int orte_ess_env_put(orte_std_cntr_t num_procs,
|
||||
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
||||
* We know - just live with it
|
||||
*/
|
||||
asprintf(&value, "%ld", (long) num_local_procs);
|
||||
opal_setenv("OMPI_COMM_WORLD_LOCAL_SIZE", value, true, env);
|
||||
free(value);
|
||||
|
||||
|
144
orte/mca/ess/env/ess_env_module.c
поставляемый
144
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -110,13 +110,14 @@ orte_ess_base_module_t orte_ess_env_module = {
|
||||
};
|
||||
|
||||
static opal_pointer_array_t nidmap;
|
||||
static orte_pmap_t *pmap = NULL;
|
||||
static opal_pointer_array_t jobmap;
|
||||
static orte_vpid_t nprocs;
|
||||
|
||||
static int rte_init(char flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
orte_jmap_t *jmap;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
@ -159,9 +160,16 @@ static int rte_init(char flags)
|
||||
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||
|
||||
/* setup array of jmaps */
|
||||
OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1);
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||
opal_pointer_array_add(&jobmap, jmap);
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
||||
&nidmap, &pmap, &nprocs))) {
|
||||
&nidmap, &jmap->pmap, &nprocs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_build_nidmap";
|
||||
goto error;
|
||||
@ -182,6 +190,7 @@ static int rte_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
orte_nid_t **nids;
|
||||
orte_jmap_t **jmaps;
|
||||
int32_t i;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
@ -196,22 +205,19 @@ static int rte_finalize(void)
|
||||
}
|
||||
} else {
|
||||
/* otherwise, I must be an application process - deconstruct
|
||||
* my nidmap arrays
|
||||
* my nidmap and jobmap arrays
|
||||
*/
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (NULL != nids[i]->name) {
|
||||
free(nids[i]->name);
|
||||
nids[i]->name = NULL;
|
||||
}
|
||||
for (i=0; i < nidmap.size && NULL != nids[i]; i++) {
|
||||
OBJ_RELEASE(nids[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&nidmap);
|
||||
free(pmap);
|
||||
pmap = NULL;
|
||||
|
||||
jmaps = (orte_jmap_t**)jobmap.addr;
|
||||
for (i=0; i < jobmap.size && NULL != jmaps[i]; i++) {
|
||||
OBJ_RELEASE(jmaps[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&jobmap);
|
||||
|
||||
/* use the default procedure to finish */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -223,14 +229,21 @@ static int rte_finalize(void)
|
||||
|
||||
static bool proc_is_local(orte_process_name_t *proc)
|
||||
{
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -240,88 +253,50 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's hostname */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->name));
|
||||
nid->name));
|
||||
|
||||
return nids[node]->name;
|
||||
return nid->name;
|
||||
}
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's arch */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return 0;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->arch));
|
||||
nid->arch));
|
||||
|
||||
return nids[node]->arch;
|
||||
return nid->arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
node = pmap[proc->vpid].node;
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: updating proc %s to arch %0x",
|
||||
@ -329,32 +304,45 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
nid->arch = arch;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].local_rank));
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap[proc->vpid].local_rank;
|
||||
return pmap->local_rank;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].node_rank));
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap[proc->vpid].node_rank;
|
||||
return pmap->node_rank;
|
||||
}
|
||||
|
||||
static int env_set_name(void)
|
||||
|
@ -82,7 +82,7 @@ orte_ess_base_module_t orte_ess_hnp_module = {
|
||||
NULL, /* don't need a proc_get_arch fn */
|
||||
NULL, /* don't need a proc_get_local_rank fn */
|
||||
NULL, /* don't need a proc_get_node_rank fn */
|
||||
NULL, /* don't need to update_arch */
|
||||
NULL, /* don't need to update_nidmap */
|
||||
NULL /* ft_event */
|
||||
};
|
||||
|
||||
|
@ -69,7 +69,7 @@ orte_ess_base_module_t orte_ess_lsf_module = {
|
||||
};
|
||||
|
||||
static opal_pointer_array_t nidmap;
|
||||
static orte_pmap_t *pmap;
|
||||
static opal_pointer_array_t jobmap;
|
||||
static orte_vpid_t nprocs;
|
||||
|
||||
|
||||
@ -77,7 +77,8 @@ static int rte_init(char flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
|
||||
orte_jmap_t *jmap;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
error = "orte_ess_base_std_prolog";
|
||||
@ -117,9 +118,16 @@ static int rte_init(char flags)
|
||||
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||
|
||||
/* setup array of jmaps */
|
||||
OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1);
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||
opal_pointer_array_add(&jobmap, jmap);
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
||||
&nidmap, &pmap, &nprocs))) {
|
||||
&nidmap, &jmap->pmap, &nprocs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_build_nidmap";
|
||||
goto error;
|
||||
@ -141,6 +149,7 @@ static int rte_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
orte_nid_t **nids;
|
||||
orte_jmap_t **jmaps;
|
||||
int32_t i;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
@ -155,19 +164,18 @@ static int rte_finalize(void)
|
||||
}
|
||||
} else {
|
||||
/* otherwise, I must be an application process - deconstruct
|
||||
* my nidmap arrays
|
||||
*/
|
||||
* my nidmap and jobmap arrays
|
||||
*/
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (NULL != nids[i]->name) {
|
||||
free(nids[i]->name);
|
||||
}
|
||||
for (i=0; i < nidmap.size && NULL != nids[i]; i++) {
|
||||
OBJ_RELEASE(nids[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&nidmap);
|
||||
free(pmap);
|
||||
jmaps = (orte_jmap_t**)jobmap.addr;
|
||||
for (i=0; i < jobmap.size && NULL != jmaps[i]; i++) {
|
||||
OBJ_RELEASE(jmaps[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&jobmap);
|
||||
|
||||
/* use the default procedure to finish */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
|
||||
@ -180,7 +188,14 @@ static int rte_finalize(void)
|
||||
|
||||
static bool proc_is_local(orte_process_name_t *proc)
|
||||
{
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -197,88 +212,50 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's hostname */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->name));
|
||||
nid->name));
|
||||
|
||||
return nids[node]->name;
|
||||
return nid->name;
|
||||
}
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's arch */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return 0;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->arch));
|
||||
nid->arch));
|
||||
|
||||
return nids[node]->arch;
|
||||
return nid->arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
node = pmap[proc->vpid].node;
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: updating proc %s to arch %0x",
|
||||
@ -286,32 +263,45 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
nid->arch = arch;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].local_rank));
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap[proc->vpid].local_rank;
|
||||
return pmap->local_rank;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].node_rank));
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap[proc->vpid].node_rank;
|
||||
return pmap->node_rank;
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
@ -87,13 +88,15 @@ orte_ess_base_module_t orte_ess_singleton_module = {
|
||||
};
|
||||
|
||||
static opal_pointer_array_t nidmap;
|
||||
static orte_pmap_t *pmap;
|
||||
static opal_pointer_array_t jobmap;
|
||||
static orte_vpid_t nprocs;
|
||||
|
||||
static int rte_init(char flags)
|
||||
{
|
||||
int rc;
|
||||
orte_nid_t *node;
|
||||
orte_jmap_t *jmap;
|
||||
orte_pmap_t pmap;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
|
||||
@ -143,18 +146,6 @@ static int rte_init(char flags)
|
||||
|
||||
orte_process_info.num_procs = 1;
|
||||
|
||||
/* since we are a singleton, then we must have a local_rank of 0
|
||||
* and only 1 local process
|
||||
*/
|
||||
orte_process_info.local_rank = 0;
|
||||
orte_process_info.num_local_procs = 1;
|
||||
|
||||
/* use the std app init to complete the procedure */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* NOTE: do not wireup our io - let the fork'd orted serve
|
||||
* as our io handler. This prevents issues with the event
|
||||
* library wrt pty's and stdin
|
||||
@ -165,26 +156,39 @@ static int rte_init(char flags)
|
||||
opal_pointer_array_init(&nidmap, 1,
|
||||
INT32_MAX, 8);
|
||||
|
||||
/* setup array of jmaps */
|
||||
OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1);
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||
opal_pointer_array_add(&jobmap, jmap);
|
||||
|
||||
/* we cannot use the std nidmap construction in the ess/base because
|
||||
* the daemon couldn't pass us the info! Since we are a singleton, we
|
||||
* already -know- the info, so we will construct it ourselves
|
||||
*/
|
||||
|
||||
/* create a nidmap entry for this node */
|
||||
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
|
||||
node = OBJ_NEW(orte_nid_t);
|
||||
node->name = strdup(orte_process_info.nodename);
|
||||
node->daemon = 0; /* the HNP co-occupies our node */
|
||||
node->arch = orte_process_info.arch;
|
||||
opal_pointer_array_set_item(&nidmap, 0, node);
|
||||
nprocs = 1;
|
||||
|
||||
/* likewise, we need to construct our own pidmap. Again, since we are
|
||||
/* likewise, we need to construct our own jobmap. Again, since we are
|
||||
* a singleton, this is rather trivial
|
||||
*/
|
||||
pmap = (orte_pmap_t*)malloc(sizeof(orte_pmap_t));
|
||||
pmap[0].local_rank = 0;
|
||||
pmap[0].node_rank = 0;
|
||||
pmap[0].node = 0;
|
||||
pmap.local_rank = 0;
|
||||
pmap.node_rank = 0;
|
||||
pmap.node = 0;
|
||||
opal_value_array_set_item(&jmap->pmap, 0, &pmap);
|
||||
|
||||
/* use the std app init to complete the procedure */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -193,20 +197,20 @@ static int rte_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
orte_nid_t **nids;
|
||||
orte_jmap_t **jmaps;
|
||||
int32_t i;
|
||||
|
||||
/* deconstruct my nidmap arrays */
|
||||
/* deconstruct my nidmap and jobmap arrays */
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (NULL != nids[i]->name) {
|
||||
free(nids[i]->name);
|
||||
}
|
||||
for (i=0; i < nidmap.size && NULL != nids[i]; i++) {
|
||||
OBJ_RELEASE(nids[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&nidmap);
|
||||
free(pmap);
|
||||
jmaps = (orte_jmap_t**)jobmap.addr;
|
||||
for (i=0; i < jobmap.size && NULL != jmaps[i]; i++) {
|
||||
OBJ_RELEASE(jmaps[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&jobmap);
|
||||
|
||||
/* use the default procedure to finish */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
|
||||
@ -419,7 +423,14 @@ static int fork_hnp(void)
|
||||
|
||||
static bool proc_is_local(orte_process_name_t *proc)
|
||||
{
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -436,74 +447,50 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's hostname */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
return nids[node]->name;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
}
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's arch */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return 0;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
return nids[node]->arch;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
node = pmap[proc->vpid].node;
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: updating proc %s to arch %0x",
|
||||
@ -511,16 +498,43 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
nid->arch = arch;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
{
|
||||
return pmap[proc->vpid].local_rank;
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
{
|
||||
return pmap[proc->vpid].node_rank;
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
}
|
||||
|
@ -72,14 +72,15 @@ orte_ess_base_module_t orte_ess_slurm_module = {
|
||||
|
||||
|
||||
static opal_pointer_array_t nidmap;
|
||||
static orte_pmap_t *pmap;
|
||||
static opal_pointer_array_t jobmap;
|
||||
static orte_vpid_t nprocs;
|
||||
|
||||
static int rte_init(char flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
|
||||
orte_jmap_t *jmap;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
error = "orte_ess_base_std_prolog";
|
||||
@ -119,9 +120,16 @@ static int rte_init(char flags)
|
||||
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||
|
||||
/* setup array of jmaps */
|
||||
OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1);
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||
opal_pointer_array_add(&jobmap, jmap);
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
||||
&nidmap, &pmap, &nprocs))) {
|
||||
&nidmap, &jmap->pmap, &nprocs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_build_nidmap";
|
||||
goto error;
|
||||
@ -142,6 +150,7 @@ static int rte_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
orte_nid_t **nids;
|
||||
orte_jmap_t **jmaps;
|
||||
int32_t i;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
@ -156,19 +165,18 @@ static int rte_finalize(void)
|
||||
}
|
||||
} else {
|
||||
/* otherwise, I must be an application process - deconstruct
|
||||
* my nidmap arrays
|
||||
* my nidmap and jobmap arrays
|
||||
*/
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (NULL != nids[i]->name) {
|
||||
free(nids[i]->name);
|
||||
}
|
||||
for (i=0; i < nidmap.size && NULL != nids[i]; i++) {
|
||||
OBJ_RELEASE(nids[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&nidmap);
|
||||
free(pmap);
|
||||
jmaps = (orte_jmap_t**)jobmap.addr;
|
||||
for (i=0; i < jobmap.size && NULL != jmaps[i]; i++) {
|
||||
OBJ_RELEASE(jmaps[i]);
|
||||
}
|
||||
OBJ_DESTRUCT(&jobmap);
|
||||
|
||||
/* use the default procedure to finish */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
|
||||
@ -181,7 +189,14 @@ static int rte_finalize(void)
|
||||
|
||||
static bool proc_is_local(orte_process_name_t *proc)
|
||||
{
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -198,88 +213,50 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
|
||||
}
|
||||
|
||||
/* the daemon's vpid does not necessairly correlate
|
||||
* to the node's index in the node array since
|
||||
* some nodes may not have a daemon on them. Thus,
|
||||
* we have to search for the daemon in the array.
|
||||
* Fortunately, this is rarely done
|
||||
*/
|
||||
static int32_t find_daemon_node(orte_vpid_t vpid)
|
||||
{
|
||||
int32_t i;
|
||||
orte_nid_t **nids;
|
||||
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
for (i=0; i < nidmap.size; i++) {
|
||||
if (NULL == nids[i]) {
|
||||
break;
|
||||
}
|
||||
if (vpid == nids[i]->daemon) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's hostname */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return NULL;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->name));
|
||||
nid->name));
|
||||
|
||||
return nids[node]->name;
|
||||
return nid->name;
|
||||
}
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
orte_nid_t *nid;
|
||||
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
|
||||
/* looking for the daemon's arch */
|
||||
node = find_daemon_node(proc->vpid);
|
||||
if (0 > node) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
node = pmap[proc->vpid].node;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return 0;
|
||||
}
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has arch %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
nids[node]->arch));
|
||||
nid->arch));
|
||||
|
||||
return nids[node]->arch;
|
||||
return nid->arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
|
||||
int32_t node;
|
||||
orte_nid_t **nids;
|
||||
|
||||
node = pmap[proc->vpid].node;
|
||||
nids = (orte_nid_t**)nidmap.addr;
|
||||
if (NULL == (nid = orte_ess_base_lookup_nid(&nidmap, &jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: updating proc %s to arch %0x",
|
||||
@ -287,32 +264,45 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
nid->arch = arch;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].local_rank));
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap[proc->vpid].local_rank;
|
||||
return pmap->local_rank;
|
||||
}
|
||||
|
||||
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
{
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
if (NULL == (pmap = orte_ess_base_lookup_pmap(&jobmap, proc))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return UINT8_MAX;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
(int)pmap[proc->vpid].node_rank));
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap[proc->vpid].node_rank;
|
||||
return pmap->node_rank;
|
||||
}
|
||||
|
||||
static int slurm_set_name(void)
|
||||
|
@ -55,7 +55,7 @@ orte_ess_base_module_t orte_ess_tool_module = {
|
||||
NULL, /* don't need a proc_get_arch fn */
|
||||
NULL, /* don't need a proc_get_local_rank fn */
|
||||
NULL, /* don't need a proc_get_node_rank fn */
|
||||
NULL, /* don't need to update_arch */
|
||||
NULL, /* don't need to update_nidmap */
|
||||
NULL /* ft_event */
|
||||
};
|
||||
|
||||
|
@ -543,12 +543,24 @@ static int modex(opal_list_t *procs)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* update the arch in the ESS */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess.update_arch(&proc_name, arch))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
/* update the arch in the ESS
|
||||
* RHC: DO NOT UPDATE ARCH IF THE PROC IS NOT IN OUR JOB. THIS IS A TEMPORARY
|
||||
* FIX TO COMPENSATE FOR A PROBLEM IN THE CONNECT/ACCEPT CODE WHERE WE EXCHANGE
|
||||
* INFO INCLUDING THE ARCH, BUT THEN DO A MODEX THAT ALSO INCLUDES THE ARCH. WE
|
||||
* CANNOT UPDATE THE ARCH FOR JOBS OUTSIDE OUR OWN AS THE ESS HAS NO INFO ON
|
||||
* THOSE PROCS/NODES - AND DOESN'T NEED IT AS THE MPI LAYER HAS ALREADY SET
|
||||
* ITSELF UP AND DOES NOT NEED ESS SUPPORT FOR PROCS IN THE OTHER JOB
|
||||
*
|
||||
* EVENTUALLY, WE WILL SUPPORT THE ESS HAVING INFO ON OTHER JOBS FOR
|
||||
* FAULT TOLERANCE PURPOSES - BUT NOT RIGHT NOW
|
||||
*/
|
||||
if (proc_name.jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||
if (ORTE_SUCCESS != (rc = orte_ess.update_arch(&proc_name, arch))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* update the modex database */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_update_modex_entries(&proc_name, &rbuf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -331,7 +331,13 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
modex_attr_data_t *attr_data;
|
||||
|
||||
proc_data = modex_lookup_orte_proc(&proc);
|
||||
if (NULL == proc_data) return ORTE_ERR_NOT_FOUND;
|
||||
if (NULL == proc_data) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&proc_data->modex_lock);
|
||||
|
||||
@ -342,8 +348,9 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
if ((NULL == attr_data) ||
|
||||
(attr_data->attr_data_size == 0)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: no attr avail or zero byte size",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"%s grpcomm:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
*val = NULL;
|
||||
*size = 0;
|
||||
} else {
|
||||
|
@ -205,6 +205,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
opal_byte_object_t *bo;
|
||||
int32_t numbytes;
|
||||
orte_nid_t *node;
|
||||
orte_pmap_t *pmap;
|
||||
opal_buffer_t alert;
|
||||
opal_list_item_t *item;
|
||||
orte_namelist_t *nm;
|
||||
@ -367,12 +368,18 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
for (j=0; j < jobdat->num_procs; j++) {
|
||||
proc.vpid = j;
|
||||
/* ident this proc's node */
|
||||
node = (orte_nid_t*)orte_daemonmap.addr[jobdat->procmap[j].node];
|
||||
pmap = opal_value_array_get_item(&jobdat->procmap, j);
|
||||
if (pmap->node < 0 || pmap->node >= orte_daemonmap.size) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
||||
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
node = (orte_nid_t*)orte_daemonmap.addr[pmap->node];
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:constructing child list - checking proc %s on node %d with daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j),
|
||||
jobdat->procmap[j].node, ORTE_VPID_PRINT(node->daemon)));
|
||||
pmap->node, ORTE_VPID_PRINT(node->daemon)));
|
||||
|
||||
/* does this data belong to us? */
|
||||
if (ORTE_PROC_MY_NAME->vpid == node->daemon) {
|
||||
@ -391,7 +398,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
child->app_idx = app_idx[j]; /* save the index into the app_context objects */
|
||||
child->local_rank = jobdat->procmap[j].local_rank; /* save the local_rank */
|
||||
if (NULL != slot_str && NULL != slot_str[j]) {
|
||||
child->slot_list = strdup(slot_str[j]);
|
||||
}
|
||||
@ -413,8 +419,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
item != opal_list_get_end(&daemon_tree);
|
||||
item = opal_list_get_next(item)) {
|
||||
nm = (orte_namelist_t*)item;
|
||||
if ((int)nm->name.vpid == jobdat->procmap[j].node ||
|
||||
nm->name.vpid == ORTE_VPID_WILDCARD) {
|
||||
if (nm->name.vpid == node->daemon) {
|
||||
/* add to the count for collectives */
|
||||
jobdat->num_participating++;
|
||||
/* remove this node from the tree so we don't count it again */
|
||||
@ -427,7 +432,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
/* set the routing info through the other daemon - we need to do this
|
||||
* prior to launch as the procs may want to communicate right away
|
||||
*/
|
||||
daemon.vpid = jobdat->procmap[j].node;
|
||||
daemon.vpid = node->daemon;
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&proc, &daemon))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
@ -440,6 +445,10 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
jobdat->num_participating++;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct:child: num_participating %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jobdat->num_participating));
|
||||
|
||||
if (NULL != app_idx) {
|
||||
free(app_idx);
|
||||
app_idx = NULL;
|
||||
@ -652,20 +661,15 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
|
||||
free(param);
|
||||
free(param2);
|
||||
|
||||
/* set the universe size in the environment */
|
||||
param = mca_base_param_environ_variable("orte","universe","size");
|
||||
asprintf(¶m2, "%ld", (long)total_slots_alloc);
|
||||
opal_setenv(param, param2, true, environ_copy);
|
||||
free(param);
|
||||
|
||||
/* although the total_slots_alloc is the universe size, users
|
||||
* would appreciate being given a public environmental variable
|
||||
* that also represents this value - something MPI specific - so
|
||||
* do that here.
|
||||
* do that here. Also required by the ompi_attributes code!
|
||||
*
|
||||
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
||||
* We know - just live with it
|
||||
*/
|
||||
asprintf(¶m2, "%ld", (long)total_slots_alloc);
|
||||
opal_setenv("OMPI_UNIVERSE_SIZE", param2, true, environ_copy);
|
||||
free(param2);
|
||||
|
||||
@ -759,6 +763,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
opal_buffer_t alert;
|
||||
orte_std_cntr_t proc_rank;
|
||||
orte_odls_job_t *jobdat;
|
||||
orte_pmap_t *pmap;
|
||||
|
||||
/* protect operations involving the global list of children */
|
||||
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
|
||||
@ -983,21 +988,15 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
opal_setenv("OMPI_COMM_WORLD_RANK", vpid_str, true, &app->env);
|
||||
free(vpid_str); /* done with this now */
|
||||
|
||||
asprintf(&value, "%lu", (unsigned long) child->local_rank);
|
||||
if(NULL == (param = mca_base_param_environ_variable("orte","ess","local_rank"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto CLEANUP;
|
||||
}
|
||||
opal_setenv(param, value, true, &app->env);
|
||||
free(param);
|
||||
/* users would appreciate being given a public environmental variable
|
||||
* that also represents this value - something MPI specific - so
|
||||
* that also represents the local rank value - something MPI specific - so
|
||||
* do that here.
|
||||
*
|
||||
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
|
||||
* We know - just live with it
|
||||
*/
|
||||
pmap = (orte_pmap_t*)opal_value_array_get_item(&jobdat->procmap, child->name->vpid);
|
||||
asprintf(&value, "%lu", (unsigned long) pmap->local_rank);
|
||||
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
|
||||
free(value);
|
||||
|
||||
@ -1299,6 +1298,7 @@ static int pack_child_contact_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
static void setup_singleton_jobdat(orte_jobid_t jobid)
|
||||
{
|
||||
orte_odls_job_t *jobdat;
|
||||
orte_pmap_t pmap;
|
||||
int32_t one32;
|
||||
int8_t one8;
|
||||
opal_buffer_t buffer;
|
||||
@ -1308,10 +1308,10 @@ static void setup_singleton_jobdat(orte_jobid_t jobid)
|
||||
jobdat->jobid = jobid;
|
||||
jobdat->num_procs = 1;
|
||||
jobdat->num_local_procs = 1;
|
||||
jobdat->procmap = (orte_pmap_t*)malloc(sizeof(orte_pmap_t));
|
||||
jobdat->procmap[0].node = ORTE_PROC_MY_NAME->vpid;
|
||||
jobdat->procmap[0].local_rank = 0;
|
||||
jobdat->procmap[0].node_rank = opal_list_get_size(&orte_odls_globals.children);
|
||||
pmap.node = 0; /* since it is a singleton, it must be on the first node in array */
|
||||
pmap.local_rank = 0;
|
||||
pmap.node_rank = opal_list_get_size(&orte_odls_globals.children);
|
||||
opal_value_array_set_item(&jobdat->procmap, 0, &pmap);
|
||||
/* also need to setup a pidmap for it */
|
||||
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
|
||||
opal_dss.pack(&buffer, &(ORTE_PROC_MY_NAME->vpid), 1, ORTE_VPID); /* num_procs */
|
||||
|
@ -72,7 +72,6 @@ orte_odls_base_module_t orte_odls;
|
||||
static void orte_odls_child_constructor(orte_odls_child_t *ptr)
|
||||
{
|
||||
ptr->name = NULL;
|
||||
ptr->local_rank = ORTE_VPID_INVALID;
|
||||
ptr->pid = 0;
|
||||
ptr->app_idx = -1;
|
||||
ptr->alive = false;
|
||||
@ -105,7 +104,8 @@ static void orte_odls_job_constructor(orte_odls_job_t *ptr)
|
||||
ptr->total_slots_alloc = 0;
|
||||
ptr->num_procs = 0;
|
||||
ptr->num_local_procs = 0;
|
||||
ptr->procmap = NULL;
|
||||
OBJ_CONSTRUCT(&ptr->procmap, opal_value_array_t);
|
||||
opal_value_array_init(&ptr->procmap, sizeof(orte_pmap_t));
|
||||
ptr->pmap = NULL;
|
||||
OBJ_CONSTRUCT(&ptr->collection_bucket, opal_buffer_t);
|
||||
OBJ_CONSTRUCT(&ptr->local_collection, opal_buffer_t);
|
||||
@ -127,9 +127,7 @@ static void orte_odls_job_destructor(orte_odls_job_t *ptr)
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != ptr->procmap) {
|
||||
free(ptr->procmap);
|
||||
}
|
||||
OBJ_DESTRUCT(&ptr->procmap);
|
||||
|
||||
if (NULL != ptr->pmap && NULL != ptr->pmap->bytes) {
|
||||
free(ptr->pmap->bytes);
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
@ -55,7 +56,6 @@ BEGIN_C_DECLS
|
||||
typedef struct {
|
||||
opal_list_item_t super; /* required to place this on a list */
|
||||
orte_process_name_t *name; /* the OpenRTE name of the proc */
|
||||
orte_vpid_t local_rank; /* local rank of the proc on this node */
|
||||
pid_t pid; /* local pid of the proc */
|
||||
orte_std_cntr_t app_idx; /* index of the app_context for this proc */
|
||||
bool alive; /* is this proc alive? */
|
||||
@ -78,7 +78,7 @@ typedef struct orte_odls_job_t {
|
||||
orte_std_cntr_t total_slots_alloc;
|
||||
orte_vpid_t num_procs;
|
||||
int32_t num_local_procs;
|
||||
orte_pmap_t *procmap; /* map of procs/node, local ranks */
|
||||
opal_value_array_t procmap; /* map of procs/node, local ranks */
|
||||
opal_byte_object_t *pmap; /* byte object version of procmap */
|
||||
opal_buffer_t collection_bucket;
|
||||
opal_buffer_t local_collection;
|
||||
|
@ -86,8 +86,10 @@ static void process_message(int fd, short event, void *data)
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
orte_rml_cmd_flag_t command;
|
||||
orte_std_cntr_t count;
|
||||
opal_buffer_t buf;
|
||||
int rc;
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_output,
|
||||
"%s rml:base:recv: processing message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -107,6 +109,25 @@ static void process_message(int fd, short event, void *data)
|
||||
default:
|
||||
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
||||
}
|
||||
|
||||
/* send an ack back - this is REQUIRED to ensure that the routing
|
||||
* info gets updated -before- a message intending to use that info
|
||||
* arrives. Because message ordering is NOT preserved in the OOB, it
|
||||
* is possible for code that updates our contact info and then sends
|
||||
* a message to fail because the update contact info message is
|
||||
* processed too late
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_output,
|
||||
"%s rml:base:recv: sending ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
if (0 > (rc = orte_rml.send_buffer(&mev->sender, &buf, ORTE_RML_TAG_UPDATE_ROUTE_ACK, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,7 @@ BEGIN_C_DECLS
|
||||
#define ORTE_RML_TAG_BARRIER 17
|
||||
|
||||
#define ORTE_RML_TAG_INIT_ROUTES 18
|
||||
#define ORTE_RML_TAG_UPDATE_ROUTES 19
|
||||
#define ORTE_RML_TAG_UPDATE_ROUTE_ACK 19
|
||||
#define ORTE_RML_TAG_SYNC 20
|
||||
|
||||
/* For FileM Base */
|
||||
|
@ -49,11 +49,6 @@ static int warmup_routes(void);
|
||||
static int binomial_ft_event(int state);
|
||||
#endif
|
||||
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
static orte_process_name_t my_parent;
|
||||
static int num_children;
|
||||
static opal_list_t my_children;
|
||||
|
||||
orte_routed_module_t orte_routed_binomial_module = {
|
||||
init,
|
||||
finalize,
|
||||
@ -79,6 +74,11 @@ static opal_hash_table_t vpid_wildcard_list;
|
||||
static orte_process_name_t wildcard_route;
|
||||
static opal_condition_t cond;
|
||||
static opal_mutex_t lock;
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
static orte_process_name_t my_parent;
|
||||
static int num_children;
|
||||
static opal_list_t my_children;
|
||||
static bool ack_recvd;
|
||||
|
||||
|
||||
static int init(void)
|
||||
@ -131,9 +131,9 @@ static int finalize(void)
|
||||
}
|
||||
|
||||
/* don't destruct the routes until *after* we send the
|
||||
* sync as the oob will be asking us how to route
|
||||
* the message!
|
||||
*/
|
||||
* sync as the oob will be asking us how to route
|
||||
* the message!
|
||||
*/
|
||||
rc = opal_hash_table_get_first_key_uint64(&peer_list,
|
||||
&key, &value, &node);
|
||||
while(OPAL_SUCCESS == rc) {
|
||||
@ -166,13 +166,21 @@ static int update_route(orte_process_name_t *target,
|
||||
orte_process_name_t *route)
|
||||
{
|
||||
int rc;
|
||||
orte_process_name_t * route_copy;
|
||||
orte_process_name_t *route_copy;
|
||||
|
||||
if (target->jobid == ORTE_JOBID_INVALID ||
|
||||
target->vpid == ORTE_VPID_INVALID) {
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if I am an application process, we don't update the route since
|
||||
* we automatically route everything through the local daemon
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -180,28 +188,49 @@ static int update_route(orte_process_name_t *target,
|
||||
ORTE_NAME_PRINT(route)));
|
||||
|
||||
|
||||
/* if I am an application process, we don't update the route unless
|
||||
* the conditions dictate it. This is done to avoid creating large
|
||||
* hash tables when they aren't needed
|
||||
/* if this is from a different job family, then I need to
|
||||
* track how to send messages to it
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
/* if the route is the daemon, then do nothing - we already route
|
||||
* everything through the daemon anyway
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
|
||||
/* if I am a daemon, then I will automatically route
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route,
|
||||
ORTE_PROC_MY_DAEMON)) {
|
||||
if (orte_process_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if this is for my own job family, then do nothing - we -always- route
|
||||
* our own job family through the daemons
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_update: diff job family routing job %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(target->jobid),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
|
||||
/* see if this target is already present - it will have a wildcard vpid,
|
||||
* so we have to look for it with that condition
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid),
|
||||
(void**)&route_copy);
|
||||
if (ORTE_SUCCESS == rc && NULL != route_copy) {
|
||||
/* target already present - no need for duplicate entry */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* not there, so add the route FOR THE JOB FAMILY*/
|
||||
route_copy = malloc(sizeof(orte_process_name_t));
|
||||
*route_copy = *route;
|
||||
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), route_copy);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
|
||||
route_copy = malloc(sizeof(orte_process_name_t));
|
||||
*route_copy = *route;
|
||||
/* exact match */
|
||||
@ -236,12 +265,49 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
orte_process_name_t *ret;
|
||||
int rc;
|
||||
|
||||
if (target->jobid == ORTE_JOBID_INVALID ||
|
||||
target->vpid == ORTE_VPID_INVALID) {
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if it is me, then the route is just direct */
|
||||
if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
|
||||
ret = target;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* if I am a daemon, route this via the HNP */
|
||||
if (orte_process_info.daemon) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am the HNP or a tool, then I stored a route to
|
||||
* this job family, so look it up
|
||||
*/
|
||||
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), (void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* got a good result - return it */
|
||||
goto found;
|
||||
}
|
||||
/* not found - so we have no route */
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
|
||||
/* check exact matches */
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list,
|
||||
orte_util_hash_name(target), (void**)&ret);
|
||||
@ -262,7 +328,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
ret = &wildcard_route;
|
||||
|
||||
found:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_binomial_get(%s) --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -337,6 +402,31 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* HANDLE ACK MESSAGES FROM AN HNP */
|
||||
static void release_ack(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
ack_recvd = true;
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void recv_ack(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, release_ack);
|
||||
}
|
||||
|
||||
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
{
|
||||
/* the binomial module routes all proc communications through
|
||||
@ -483,14 +573,65 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
"%s routed_binomial: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* send the buffer to the proper tag on the daemon */
|
||||
/* if this is for my job family, then we send the buffer
|
||||
* to the proper tag on the daemon
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(job)) {
|
||||
/* send the buffer to the proper tag on the daemon */
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_DAEMON, ndat,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* wait right here until the daemon acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* we already have defined our routes to everyone to
|
||||
* be through the local daemon, so nothing further to do
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if this is for a different job family, then we route via our HNP
|
||||
* to minimize connection counts to entities such as ompi-server, so
|
||||
* start by sending the contact info to the HNP for update
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_init_routes: diff job family - sending update to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, ndat,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* we already have defined our routes to everyone to
|
||||
* be through the local daemon, so nothing further to do
|
||||
|
||||
/* wait right here until the HNP acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* our get_route function automatically routes all messages for
|
||||
* other job families via the HNP, so nothing more to do here
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -732,7 +873,6 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
return my_parent.vpid;
|
||||
}
|
||||
|
||||
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
|
@ -11,12 +11,12 @@
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/threads/condition.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
@ -26,16 +26,23 @@
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "routed_direct.h"
|
||||
|
||||
static opal_condition_t cond;
|
||||
static opal_mutex_t lock;
|
||||
static opal_hash_table_t peer_list;
|
||||
/* Local static variables */
|
||||
static opal_condition_t cond;
|
||||
static opal_mutex_t lock;
|
||||
static opal_hash_table_t peer_list;
|
||||
static opal_buffer_t *recv_buf=NULL;
|
||||
static bool ack_recvd, msg_recvd;
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
|
||||
|
||||
/* API functions */
|
||||
static int init(void);
|
||||
static int finalize(void);
|
||||
static int update_route(orte_process_name_t *target,
|
||||
@ -53,8 +60,6 @@ static int warmup_routes(void);
|
||||
static int direct_ft_event(int state);
|
||||
#endif
|
||||
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
|
||||
orte_routed_module_t orte_routed_direct_module = {
|
||||
init,
|
||||
finalize,
|
||||
@ -91,8 +96,8 @@ static int init(void)
|
||||
static int finalize(void)
|
||||
{
|
||||
int rc;
|
||||
uint64_t key;
|
||||
void * value, *node, *next_node;
|
||||
uint32_t key;
|
||||
void *value, *node, *next_node;
|
||||
|
||||
/* if I am the HNP, I need to stop the comm recv */
|
||||
if (orte_process_info.hnp) {
|
||||
@ -114,15 +119,18 @@ static int finalize(void)
|
||||
* sync as the oob will be asking us how to route
|
||||
* the message!
|
||||
*/
|
||||
rc = opal_hash_table_get_first_key_uint64(&peer_list, &key, &value, &node);
|
||||
rc = opal_hash_table_get_first_key_uint32(&peer_list,
|
||||
&key, &value, &node);
|
||||
while(OPAL_SUCCESS == rc) {
|
||||
if(NULL != value) {
|
||||
free(value);
|
||||
}
|
||||
rc = opal_hash_table_get_next_key_uint64(&peer_list, &key, &value, node, &next_node);
|
||||
rc = opal_hash_table_get_next_key_uint32(&peer_list,
|
||||
&key, &value, node, &next_node);
|
||||
node = next_node;
|
||||
}
|
||||
OBJ_DESTRUCT(&peer_list);
|
||||
|
||||
/* cleanup the global condition */
|
||||
OBJ_DESTRUCT(&cond);
|
||||
OBJ_DESTRUCT(&lock);
|
||||
@ -136,42 +144,54 @@ static int finalize(void)
|
||||
static int update_route(orte_process_name_t *target,
|
||||
orte_process_name_t *route)
|
||||
{
|
||||
orte_process_name_t *route_copy;
|
||||
int rc;
|
||||
orte_process_name_t * route_copy;
|
||||
|
||||
if (target->jobid == ORTE_JOBID_INVALID ||
|
||||
target->vpid == ORTE_VPID_INVALID) {
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if this is from a different job family, then I need to
|
||||
* track how to send messages to it
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* this message came from a different job family, so we will update
|
||||
* our local route table so we know how to get there
|
||||
*/
|
||||
|
||||
/* if the route is direct, do nothing - we default to direct routing */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
target, route)) {
|
||||
goto direct;
|
||||
/* if I am -not- the HNP or a tool, then I will automatically route
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_update: diff job family routing %s --> %s",
|
||||
"%s routed_direct_update: diff job family routing job %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_JOBID_PRINT(target->jobid),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
|
||||
|
||||
/* see if this target is already present */
|
||||
rc = opal_hash_table_get_value_uint32(&peer_list,
|
||||
ORTE_JOB_FAMILY(target->jobid),
|
||||
(void**)&route_copy);
|
||||
if (ORTE_SUCCESS == rc && NULL != route_copy) {
|
||||
/* target already present - no need for duplicate entry */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* not there, so add the route FOR THE JOB FAMILY*/
|
||||
route_copy = malloc(sizeof(orte_process_name_t));
|
||||
*route_copy = *route;
|
||||
/* if we are routing everything for this target through one place,
|
||||
* then the target vpid is ORTE_VPID_WILDCARD. So no need for
|
||||
* special cases, just add it
|
||||
*/
|
||||
rc = opal_hash_table_set_value_uint64(&peer_list, orte_util_hash_name(target),
|
||||
route_copy);
|
||||
rc = opal_hash_table_set_value_uint32(&peer_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), route_copy);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
direct:
|
||||
/* if it came from our own job family or was direct, there is nothing to do */
|
||||
/* if it came from our own job family, there is nothing to do */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -184,30 +204,43 @@ direct:
|
||||
|
||||
static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
{
|
||||
orte_process_name_t *ret, lookup;
|
||||
orte_process_name_t *ret;
|
||||
int rc;
|
||||
|
||||
if (target->jobid == ORTE_JOBID_INVALID ||
|
||||
target->vpid == ORTE_VPID_INVALID) {
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if it is me, then the route is just direct */
|
||||
if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
|
||||
ret = target;
|
||||
goto found;
|
||||
}
|
||||
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list, orte_util_hash_name(target),
|
||||
(void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* got a good result - return it */
|
||||
goto found;
|
||||
}
|
||||
/* check to see if we specified the route to be for all vpids in the job */
|
||||
lookup = *target;
|
||||
lookup.vpid = ORTE_VPID_WILDCARD;
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list, orte_util_hash_name(&lookup),
|
||||
(void**)&ret);
|
||||
/* if I am -not- the HNP or a tool, route this via the HNP */
|
||||
if (!orte_process_info.hnp && !orte_process_info.tool) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am the HNP or a tool, then I stored a route to this proc, so look it up */
|
||||
rc = opal_hash_table_get_value_uint32(&peer_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), (void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* got a good result - return it */
|
||||
goto found;
|
||||
}
|
||||
/* not found - so we have no route */
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
} else {
|
||||
/* if it is our own job family, just go direct */
|
||||
ret = target;
|
||||
}
|
||||
|
||||
/* if it is our own job family, or we didn't find it on the list, just go direct */
|
||||
ret = target;
|
||||
|
||||
found:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s routed_direct_get(%s) --> %s",
|
||||
@ -306,6 +339,62 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* HANDLE ACK MESSAGES FROM AN HNP */
|
||||
static void release_ack(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
ack_recvd = true;
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void recv_ack(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, release_ack);
|
||||
}
|
||||
|
||||
/* HANDLE PEER CONTACT INFO MESSAGE */
|
||||
static void process_msg(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
|
||||
/* copy the data to the recv buffer */
|
||||
opal_dss.copy_payload(recv_buf, mev->buffer);
|
||||
|
||||
/* acknowledge receipt */
|
||||
msg_recvd = true;
|
||||
|
||||
/* cleanup event */
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void recv_msg(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
|
||||
}
|
||||
|
||||
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
{
|
||||
/* the direct module just sends direct to everyone, so it requires
|
||||
@ -421,24 +510,63 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
orte_rml_cmd_flag_t command;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"%s routed_direct: init routes to jobid %s w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
|
||||
/* extract the RML command from the buffer and discard it - this
|
||||
* command is in there for compatibility with other routed
|
||||
* components but is not needed here
|
||||
/* if this is for my job family, then we update my contact info
|
||||
* so I can talk directly to my fellow family members
|
||||
*/
|
||||
cnt=1;
|
||||
opal_dss.unpack(ndata, &command, &cnt, ORTE_RML_CMD);
|
||||
if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(job)) {
|
||||
/* extract the RML command from the buffer and discard it - this
|
||||
* command is in there for compatibility with other routed
|
||||
* components but is not needed here
|
||||
*/
|
||||
cnt=1;
|
||||
opal_dss.unpack(ndata, &command, &cnt, ORTE_RML_CMD);
|
||||
|
||||
/* Set the contact info in the RML - this won't actually establish
|
||||
* the connection, but just tells the RML how to reach the
|
||||
* target proc(s)
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set the contact info in the RML - this won't actually establish
|
||||
* the connection, but just tells the RML how to reach the
|
||||
* target proc(s)
|
||||
/* if this is for a different job family, then we route via our HNP
|
||||
* to minimize connection counts to entities such as ompi-server, so
|
||||
* start by sending the contact info to the HNP for update
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndata))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_init_routes: diff job family - sending update to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, ndata,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* wait right here until the HNP acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* our get_route function automatically routes all messages for
|
||||
* other job families via the HNP, so nothing more to do here
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -447,7 +575,6 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
* case, we need to setup a few critical pieces of info
|
||||
*/
|
||||
int rc;
|
||||
opal_buffer_t buf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
@ -539,25 +666,26 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
/* now setup a blocking receive and wait right here until we get
|
||||
* the contact info for all of our peers
|
||||
*/
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
rc = orte_rml.recv_buffer(ORTE_NAME_WILDCARD, &buf, ORTE_RML_TAG_INIT_ROUTES, 0);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
if (NULL != recv_buf) {
|
||||
OBJ_RELEASE(recv_buf);
|
||||
}
|
||||
recv_buf = OBJ_NEW(opal_buffer_t);
|
||||
msg_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_INIT_ROUTES,
|
||||
ORTE_RML_NON_PERSISTENT, recv_msg, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(msg_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_direct_init: peer contact info recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* process it */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(recv_buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
OBJ_RELEASE(recv_buf);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -577,6 +705,8 @@ static int warmup_routes(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
/* I am an application process. In this case, we
|
||||
* do a semi-intelligent messaging scheme to
|
||||
* force the sockets to be opened
|
||||
@ -641,27 +771,26 @@ static int route_lost(const orte_process_name_t *route)
|
||||
|
||||
static bool route_is_defined(const orte_process_name_t *target)
|
||||
{
|
||||
orte_process_name_t *ret, lookup;
|
||||
orte_process_name_t *ret;
|
||||
int rc;
|
||||
|
||||
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid))
|
||||
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* we always have a route to our own job */
|
||||
return true;
|
||||
else {
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list, orte_util_hash_name(target),
|
||||
(void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
return true;
|
||||
}
|
||||
/* check to see if we specified the route to be for all vpids in the job */
|
||||
lookup = *target;
|
||||
lookup.vpid = ORTE_VPID_WILDCARD;
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list, orte_util_hash_name(&lookup),
|
||||
(void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* if the job family is different, check the peer list to see if a route
|
||||
* has been defined
|
||||
*/
|
||||
rc = opal_hash_table_get_value_uint32(&peer_list,
|
||||
ORTE_JOB_FAMILY(target->jobid),
|
||||
(void**)&ret);
|
||||
if (ORTE_SUCCESS == rc && NULL != ret) {
|
||||
/* target present - we have a route */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if we get here, then we don't have a route */
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -685,6 +814,8 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
opal_list_t *children)
|
||||
{
|
||||
orte_namelist_t *nm;
|
||||
orte_job_t *jdata;
|
||||
orte_vpid_t i, start;
|
||||
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
@ -706,11 +837,24 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
* HNP is capable of looking up the vpid range for this job
|
||||
*/
|
||||
if (NULL != children) {
|
||||
nm = OBJ_NEW(orte_namelist_t);
|
||||
nm->name.jobid = job;
|
||||
nm->name.vpid = ORTE_VPID_WILDCARD;
|
||||
opal_list_append(children, &nm->item);
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
/* if this is the daemon job, don't include myself */
|
||||
if (ORTE_PROC_MY_NAME->jobid == job) {
|
||||
start = 1;
|
||||
} else {
|
||||
start = 0;
|
||||
}
|
||||
for (i=start; i < jdata->num_procs; i++) {
|
||||
nm = OBJ_NEW(orte_namelist_t);
|
||||
nm->name.jobid = job;
|
||||
nm->name.vpid = i;
|
||||
opal_list_append(children, &nm->item);
|
||||
}
|
||||
}
|
||||
|
||||
/* the parent of the HNP is invalid */
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
@ -48,8 +48,6 @@ static int warmup_routes(void);
|
||||
static int linear_ft_event(int state);
|
||||
#endif
|
||||
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
|
||||
orte_routed_module_t orte_routed_linear_module = {
|
||||
init,
|
||||
finalize,
|
||||
@ -75,6 +73,9 @@ static opal_hash_table_t vpid_wildcard_list;
|
||||
static orte_process_name_t wildcard_route;
|
||||
static opal_condition_t cond;
|
||||
static opal_mutex_t lock;
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
static bool ack_recvd;
|
||||
|
||||
|
||||
|
||||
static int init(void)
|
||||
@ -156,6 +157,14 @@ static int update_route(orte_process_name_t *target,
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if I am an application process, we don't update the route since
|
||||
* we automatically route everything through the local daemon
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -163,28 +172,49 @@ static int update_route(orte_process_name_t *target,
|
||||
ORTE_NAME_PRINT(route)));
|
||||
|
||||
|
||||
/* if I am an application process, we don't update the route unless
|
||||
* the conditions dictate it. This is done to avoid creating large
|
||||
* hash tables when they aren't needed
|
||||
/* if this is from a different job family, then I need to
|
||||
* track how to send messages to it
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
/* if the route is the daemon, then do nothing - we already route
|
||||
* everything through the daemon anyway
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
|
||||
/* if I am a daemon, then I will automatically route
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route,
|
||||
ORTE_PROC_MY_DAEMON)) {
|
||||
if (orte_process_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if this is for my own job family, then do nothing - we -always- route
|
||||
* our own job family through the daemons
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear_update: diff job family routing job %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(target->jobid),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
|
||||
/* see if this target is already present - it will have a wildcard vpid,
|
||||
* so we have to look for it with that condition
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid),
|
||||
(void**)&route_copy);
|
||||
if (ORTE_SUCCESS == rc && NULL != route_copy) {
|
||||
/* target already present - no need for duplicate entry */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* not there, so add the route FOR THE JOB FAMILY*/
|
||||
route_copy = malloc(sizeof(orte_process_name_t));
|
||||
*route_copy = *route;
|
||||
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), route_copy);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
|
||||
route_copy = malloc(sizeof(orte_process_name_t));
|
||||
*route_copy = *route;
|
||||
/* exact match */
|
||||
@ -225,6 +255,36 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* if I am a daemon, route this via the HNP */
|
||||
if (orte_process_info.daemon) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am the HNP or a tool, then I stored a route to
|
||||
* this job family, so look it up
|
||||
*/
|
||||
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
|
||||
ORTE_JOB_FAMILY(target->jobid), (void**)&ret);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
/* got a good result - return it */
|
||||
goto found;
|
||||
}
|
||||
/* not found - so we have no route */
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
/* check exact matches */
|
||||
rc = opal_hash_table_get_value_uint64(&peer_list,
|
||||
orte_util_hash_name(target), (void**)&ret);
|
||||
@ -320,6 +380,31 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* HANDLE ACK MESSAGES FROM AN HNP */
|
||||
static void release_ack(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
ack_recvd = true;
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void recv_ack(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, release_ack);
|
||||
}
|
||||
|
||||
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
{
|
||||
/* the linear module routes all proc communications through
|
||||
@ -466,14 +551,65 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
"%s routed_linear: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* send the buffer to the proper tag on the daemon */
|
||||
/* if this is for my job family, then we send the buffer
|
||||
* to the proper tag on the daemon
|
||||
*/
|
||||
if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(job)) {
|
||||
/* send the buffer to the proper tag on the daemon */
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_DAEMON, ndat,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* wait right here until the daemon acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* we already have defined our routes to everyone to
|
||||
* be through the local daemon, so nothing further to do
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if this is for a different job family, then we route via our HNP
|
||||
* to minimize connection counts to entities such as ompi-server, so
|
||||
* start by sending the contact info to the HNP for update
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear_init_routes: diff job family - sending update to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, ndat,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* we already have defined our routes to everyone to
|
||||
* be through the local daemon, so nothing further to do
|
||||
|
||||
/* wait right here until the HNP acks the update to ensure that
|
||||
* any subsequent messaging can succeed
|
||||
*/
|
||||
ack_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_UPDATE_ROUTE_ACK,
|
||||
ORTE_RML_NON_PERSISTENT, recv_ack, NULL);
|
||||
|
||||
ORTE_PROGRESSED_WAIT(ack_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* our get_route function automatically routes all messages for
|
||||
* other job families via the HNP, so nothing more to do here
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -639,25 +775,25 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* if I am a daemon, I have no children and my
|
||||
* parent is the HNP
|
||||
/* the linear routing tree consists of a chain of daemons
|
||||
* extending from the HNP to orte_process_info.num_procs-1.
|
||||
* Accordingly, my child is just the my_vpid+1 daemon
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
return ORTE_PROC_MY_HNP->vpid;
|
||||
}
|
||||
|
||||
/* if we are the HNP, then the linear routing tree
|
||||
* consists of every daemon - indicate that by
|
||||
* adding a proc name of our jobid and a wildcard vpid
|
||||
*/
|
||||
if (NULL != children) {
|
||||
if (NULL != children &&
|
||||
ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) {
|
||||
nm = OBJ_NEW(orte_namelist_t);
|
||||
nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
nm->name.vpid = ORTE_VPID_WILDCARD;
|
||||
nm->name.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
opal_list_append(children, &nm->item);
|
||||
}
|
||||
/* the parent of the HNP is invalid */
|
||||
return ORTE_VPID_INVALID;
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
/* the parent of the HNP is invalid */
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* my parent is the my_vpid-1 daemon */
|
||||
return (ORTE_PROC_MY_NAME->vpid - 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -290,13 +290,16 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_proc_t);
|
||||
|
||||
|
||||
typedef struct {
|
||||
/* base object */
|
||||
opal_object_t super;
|
||||
/* nodename */
|
||||
char *name;
|
||||
/* vpid of daemon on this node */
|
||||
/* vpid of this job family's daemon on this node */
|
||||
orte_vpid_t daemon;
|
||||
/* arch of node */
|
||||
uint32_t arch;
|
||||
} orte_nid_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nid_t);
|
||||
|
||||
typedef struct {
|
||||
/* index to node */
|
||||
@ -307,6 +310,16 @@ typedef struct {
|
||||
uint8_t node_rank;
|
||||
} orte_pmap_t;
|
||||
|
||||
typedef struct {
|
||||
/* base object */
|
||||
opal_object_t super;
|
||||
/* jobid */
|
||||
orte_jobid_t job;
|
||||
/* array of data for procs */
|
||||
opal_value_array_t pmap;
|
||||
} orte_jmap_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_jmap_t);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/**
|
||||
|
@ -283,6 +283,57 @@ OBJ_CLASS_INSTANCE(orte_proc_t,
|
||||
orte_proc_construct,
|
||||
orte_proc_destruct);
|
||||
|
||||
static void orte_nid_construct(orte_nid_t *ptr)
|
||||
{
|
||||
ptr->name = NULL;
|
||||
ptr->daemon = ORTE_VPID_INVALID;
|
||||
ptr->arch = orte_process_info.arch;
|
||||
}
|
||||
|
||||
static void orte_nid_destruct(orte_nid_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->name) {
|
||||
free(ptr->name);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_nid_t,
|
||||
opal_object_t,
|
||||
orte_nid_construct,
|
||||
orte_nid_destruct);
|
||||
|
||||
static void orte_pmap_construct(orte_pmap_t *ptr)
|
||||
{
|
||||
ptr->node = -1;
|
||||
ptr->local_rank = 0;
|
||||
ptr->node_rank = 0;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_pmap_t,
|
||||
opal_object_t,
|
||||
orte_pmap_construct,
|
||||
NULL);
|
||||
|
||||
|
||||
static void orte_jmap_construct(orte_jmap_t *ptr)
|
||||
{
|
||||
ptr->job = ORTE_JOBID_INVALID;
|
||||
OBJ_CONSTRUCT(&ptr->pmap, opal_value_array_t);
|
||||
opal_value_array_init(&ptr->pmap, sizeof(orte_pmap_t));
|
||||
}
|
||||
|
||||
static void orte_jmap_destruct(orte_jmap_t *ptr)
|
||||
{
|
||||
OBJ_DESTRUCT(&ptr->pmap);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_jmap_t,
|
||||
opal_object_t,
|
||||
orte_jmap_construct,
|
||||
orte_jmap_destruct);
|
||||
|
||||
|
||||
|
||||
static void orte_job_map_construct(orte_job_map_t* map)
|
||||
{
|
||||
map->policy = ORTE_RMAPS_BYSLOT; /* default to byslot mapping as per orterun options */
|
||||
|
@ -21,6 +21,7 @@ int main(int argc, char* argv[])
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
printf("Hello, World, I am %d of %d\n", rank, size);
|
||||
fflush(stdout);
|
||||
|
||||
MPI_Info_create(&info);
|
||||
MPI_Info_set(info, "ompi_global_scope", "true");
|
||||
|
@ -1174,7 +1174,8 @@ static int parse_locals(int argc, char* argv[])
|
||||
* of an ORTE-standard string. Note that this is NOT a standard
|
||||
* uri as it starts with the process name!
|
||||
*/
|
||||
if (0 == strncmp(orterun_globals.ompi_server, "file", strlen("file"))) {
|
||||
if (0 == strncmp(orterun_globals.ompi_server, "file", strlen("file")) ||
|
||||
0 == strncmp(orterun_globals.ompi_server, "FILE", strlen("FILE"))) {
|
||||
char input[1024], *filename;
|
||||
FILE *fp;
|
||||
|
||||
|
@ -61,6 +61,10 @@ ORTE_DECLSPEC char* orte_util_print_vpids(const orte_vpid_t vpid);
|
||||
(((n) >> 16) & 0x0000ffff)
|
||||
|
||||
|
||||
/* a macro for identifying that a proc is a daemon */
|
||||
#define ORTE_PROC_IS_DAEMON(n) \
|
||||
!((n) & 0x0000ffff)
|
||||
|
||||
/* List of names for general use */
|
||||
struct orte_namelist_t {
|
||||
opal_list_item_t item; /**< Allows this item to be placed on a list */
|
||||
|
@ -289,8 +289,21 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
"%s decode:nidmap decoding nodemap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if there are any entries already in the node array, clear it out */
|
||||
if (0 < nodes->size) {
|
||||
/* unfortunately, the opal function "remove_all" doesn't release
|
||||
* the memory pointed to by the elements in the array, so we need
|
||||
* to release those first
|
||||
*/
|
||||
nd = (orte_nid_t**)nodes->addr;
|
||||
for (i=0; i < nodes->size && NULL != nd[i]; i++) {
|
||||
OBJ_RELEASE(nd[i]);
|
||||
}
|
||||
/* now use the opal function to reset the internal pointers */
|
||||
opal_pointer_array_remove_all(nodes);
|
||||
}
|
||||
|
||||
/* xfer the byte object to a buffer for unpacking */
|
||||
/* load it into a buffer */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.load(&buf, bo->bytes, bo->size);
|
||||
|
||||
@ -302,26 +315,14 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
"%s decode:nidmap decoding %d nodes with %d already loaded",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes, nodes->lowest_free));
|
||||
|
||||
/* is this greater than the number of entries in nodes? if so, then
|
||||
* we will update the node array. if not, then we can return now
|
||||
*/
|
||||
if (num_nodes <= nodes->lowest_free) {
|
||||
/* nothing more to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* set the size of the nidmap storage so we minimize
|
||||
* realloc's
|
||||
*/
|
||||
/* set the size of the nidmap storage so we minimize realloc's */
|
||||
opal_pointer_array_set_size(nodes, num_nodes);
|
||||
|
||||
/* create the struct for the HNP's node */
|
||||
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
|
||||
node->name = NULL;
|
||||
/* default the arch to our arch so that non-hetero
|
||||
node = OBJ_NEW(orte_nid_t);
|
||||
/* the arch defaults to our arch so that non-hetero
|
||||
* case will yield correct behavior
|
||||
*/
|
||||
node->arch = orte_process_info.arch;
|
||||
opal_pointer_array_set_item(nodes, 0, node);
|
||||
|
||||
/* unpack the name of the HNP's node */
|
||||
@ -371,7 +372,7 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
index = 1;
|
||||
while (1) {
|
||||
for (i=lastnode; i != endrange; i += step) {
|
||||
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
|
||||
node = OBJ_NEW(orte_nid_t);
|
||||
/* allocate space for the nodename */
|
||||
node->name = (char*)malloc(namelen);
|
||||
memset(node->name, 0, namelen);
|
||||
@ -382,11 +383,9 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
loc++;
|
||||
}
|
||||
strncat(node->name, digits, num_digs);
|
||||
node->daemon = ORTE_VPID_INVALID;
|
||||
/* default the arch to our arch so that non-hetero
|
||||
/* the arch defaults to our arch so that non-hetero
|
||||
* case will yield correct behavior
|
||||
*/
|
||||
node->arch = orte_process_info.arch;
|
||||
opal_pointer_array_set_item(nodes, index, node);
|
||||
index++;
|
||||
}
|
||||
@ -395,7 +394,7 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
opal_dss.unpack(&buf, &lastnode, &n, OPAL_INT32);
|
||||
/* if that is -1, then it flags no more ranges */
|
||||
if (-1 == lastnode) {
|
||||
goto vpids;
|
||||
goto process_daemons;
|
||||
}
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &endrange, &n, OPAL_INT32);
|
||||
@ -410,13 +409,10 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
* unpack the raw nodename
|
||||
*/
|
||||
for (i=1; i < num_nodes; i++) {
|
||||
node = (orte_nid_t*)malloc(sizeof(orte_nid_t));
|
||||
node->name = NULL;
|
||||
node->daemon = ORTE_VPID_INVALID;
|
||||
/* default the arch to our arch so that non-hetero
|
||||
node = OBJ_NEW(orte_nid_t);
|
||||
/* the arch defaults to our arch so that non-hetero
|
||||
* case will yield correct behavior
|
||||
*/
|
||||
node->arch = orte_process_info.arch;
|
||||
opal_pointer_array_set_item(nodes, i, node);
|
||||
|
||||
/* unpack the node's name */
|
||||
@ -425,16 +421,16 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo, opal_pointer_array_t *nodes
|
||||
}
|
||||
}
|
||||
|
||||
vpids:
|
||||
/* unpack the daemon vpids */
|
||||
process_daemons:
|
||||
/* unpack the daemon names */
|
||||
vpids = (orte_vpid_t*)malloc(num_nodes * sizeof(orte_vpid_t));
|
||||
n=num_nodes;
|
||||
opal_dss.unpack(&buf, vpids, &n, ORTE_VPID);
|
||||
nd = (orte_nid_t**)nodes->addr;
|
||||
/* transfer the data to the nidmap, counting the number of
|
||||
* daemons in the system
|
||||
*/
|
||||
num_daemons = 0;
|
||||
nd = (orte_nid_t**)nodes->addr;
|
||||
for (i=0; i < num_nodes; i++) {
|
||||
nd[i]->daemon = vpids[i];
|
||||
if (ORTE_VPID_INVALID != vpids[i]) {
|
||||
@ -560,13 +556,13 @@ int orte_util_encode_pidmap(orte_job_t *jdata, opal_byte_object_t *boptr)
|
||||
|
||||
|
||||
int orte_util_decode_pidmap(opal_byte_object_t *bo, orte_vpid_t *nprocs,
|
||||
orte_pmap_t **procs, int8_t **app_idx,
|
||||
opal_value_array_t *procs, int8_t **app_idx,
|
||||
char ***slot_str)
|
||||
{
|
||||
orte_vpid_t i, num_procs;
|
||||
orte_pmap_t *pmap;
|
||||
orte_pmap_t pmap;
|
||||
int32_t *nodes;
|
||||
int8_t *tmp;
|
||||
int8_t *local_rank, *node_rank, *idx;
|
||||
int8_t flag;
|
||||
char **slots;
|
||||
orte_std_cntr_t n;
|
||||
@ -583,40 +579,39 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo, orte_vpid_t *nprocs,
|
||||
*nprocs = num_procs;
|
||||
|
||||
/* allocate memory for the procs array */
|
||||
pmap = (orte_pmap_t*)malloc(num_procs * sizeof(orte_pmap_t));
|
||||
*procs = pmap;
|
||||
|
||||
opal_value_array_set_size(procs, num_procs);
|
||||
|
||||
/* allocate memory for the node info */
|
||||
nodes = (int32_t*)malloc(num_procs * 4);
|
||||
|
||||
/* unpack it in one shot */
|
||||
n=num_procs;
|
||||
opal_dss.unpack(&buf, nodes, &n, OPAL_INT32);
|
||||
/* store it */
|
||||
for (i=0; i < num_procs; i++) {
|
||||
pmap[i].node = nodes[i];
|
||||
}
|
||||
free(nodes);
|
||||
|
||||
/* allocate memory for local ranks */
|
||||
tmp = (int8_t*)malloc(num_procs);
|
||||
|
||||
local_rank = (int8_t*)malloc(num_procs);
|
||||
/* unpack them in one shot */
|
||||
n=num_procs;
|
||||
opal_dss.unpack(&buf, tmp, &n, OPAL_UINT8);
|
||||
/* store them */
|
||||
for (i=0; i < num_procs; i++) {
|
||||
pmap[i].local_rank = tmp[i];
|
||||
}
|
||||
opal_dss.unpack(&buf, local_rank, &n, OPAL_UINT8);
|
||||
|
||||
/* allocate memory for node ranks */
|
||||
node_rank = (int8_t*)malloc(num_procs);
|
||||
/* unpack node ranks in one shot */
|
||||
n=num_procs;
|
||||
opal_dss.unpack(&buf, tmp, &n, OPAL_UINT8);
|
||||
/* store it */
|
||||
opal_dss.unpack(&buf, node_rank, &n, OPAL_UINT8);
|
||||
|
||||
/* store the data */
|
||||
for (i=0; i < num_procs; i++) {
|
||||
pmap[i].node_rank = tmp[i];
|
||||
pmap.node = nodes[i];
|
||||
pmap.local_rank = local_rank[i];
|
||||
pmap.node_rank = node_rank[i];
|
||||
opal_value_array_set_item(procs, i, &pmap);
|
||||
}
|
||||
|
||||
/* release data */
|
||||
free(nodes);
|
||||
free(local_rank);
|
||||
free(node_rank);
|
||||
|
||||
/* only daemons/HNPs need the rest of the data, so if
|
||||
* we aren't one of those, we are done!
|
||||
*/
|
||||
@ -626,12 +621,14 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo, orte_vpid_t *nprocs,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* allocate memory for app_idx */
|
||||
idx = (int8_t*)malloc(num_procs);
|
||||
/* unpack app_idx in one shot */
|
||||
n=num_procs;
|
||||
opal_dss.unpack(&buf, tmp, &n, OPAL_INT8);
|
||||
opal_dss.unpack(&buf, idx, &n, OPAL_INT8);
|
||||
/* hand the array back to the caller */
|
||||
*app_idx = tmp;
|
||||
|
||||
*app_idx = idx;
|
||||
|
||||
/* unpack flag to indicate if slot_strings are present */
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &flag, &n, OPAL_INT8);
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -48,7 +49,7 @@ ORTE_DECLSPEC int orte_util_decode_nodemap(opal_byte_object_t *boptr, opal_point
|
||||
|
||||
ORTE_DECLSPEC int orte_util_encode_pidmap(orte_job_t *jdata, opal_byte_object_t *boptr);
|
||||
ORTE_DECLSPEC int orte_util_decode_pidmap(opal_byte_object_t *boptr, orte_vpid_t *num_procs,
|
||||
orte_pmap_t **procs, int8_t **app_idx,
|
||||
opal_value_array_t *procs, int8_t **app_idx,
|
||||
char ***slot_str);
|
||||
|
||||
|
||||
|
@ -44,12 +44,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
||||
/* .my_hnp_uri = */ NULL,
|
||||
/* .hnp_pid = */ 0,
|
||||
/* .app_num = */ -1,
|
||||
/* .universe_size = */ -1,
|
||||
/* .num_procs = */ 1,
|
||||
/* .local_rank = */ UINT8_MAX,
|
||||
/* .node_rank = */ UINT8_MAX,
|
||||
/* .num_local_procs = */ 0,
|
||||
/* .local_procs = */ NULL,
|
||||
/* .nodename = */ NULL,
|
||||
/* .arch = */ 0,
|
||||
/* .pid = */ 0,
|
||||
@ -122,11 +117,6 @@ int orte_proc_info(void)
|
||||
true, false, -1, &tmp);
|
||||
orte_process_info.app_num = tmp;
|
||||
|
||||
mca_base_param_reg_int_name("orte", "universe_size",
|
||||
"Total number of process slots allocated to this job",
|
||||
true, false, -1, &tmp);
|
||||
orte_process_info.universe_size = tmp;
|
||||
|
||||
/* get the process id */
|
||||
orte_process_info.pid = getpid();
|
||||
|
||||
|
@ -54,12 +54,7 @@ struct orte_proc_info_t {
|
||||
char *my_hnp_uri; /**< Contact info for my hnp */
|
||||
pid_t hnp_pid; /**< hnp pid - used if singleton */
|
||||
orte_std_cntr_t app_num; /**< our index into the app_context array */
|
||||
orte_std_cntr_t universe_size; /**< the size of the universe we are in */
|
||||
orte_vpid_t num_procs; /**< number of processes in this job */
|
||||
uint8_t local_rank; /**< local rank on this node */
|
||||
uint8_t node_rank; /**< local rank on this node */
|
||||
orte_std_cntr_t num_local_procs; /**< total number of procs on this node */
|
||||
char *local_procs; /**< comma-delimited list of local procs */
|
||||
char *nodename; /**< string name for this node */
|
||||
uint32_t arch; /**< arch for this node */
|
||||
pid_t pid; /**< Local process ID for this process */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user