1
1

Merge pull request #2916 from rhc54/topic/sim

Create an alternative mapping method
Этот коммит содержится в:
Ralph Castain 2017-03-08 07:08:51 -08:00 коммит произвёл GitHub
родитель dc12ae008b 48fc339718
Коммит 97287f6568
39 изменённых файлов: 1076 добавлений и 783 удалений

Просмотреть файл

@ -8,7 +8,7 @@ enable_heterogeneous=no
enable_picky=yes
enable_debug=yes
enable_shared=yes
enable_static=no
enable_static=yes
enable_memchecker=no
enable_ipv6=no
enable_mpi_fortran=no
@ -18,7 +18,10 @@ enable_cxx_exceptions=no
enable_oshmem=no
enable_mpi_java=no
enable_io_romio=no
enable_builtin_atomics=no
enable_contrib_no_build=libnbc
enable_mca_no_build=btl-tcp,btl-sm,rcache-udreg
enable_mca_direct=pml-ob1
with_memory_manager=no
with_tm=no
with_verbs=no

Просмотреть файл

@ -24,7 +24,7 @@ my $ppn = 1;
my @csvrow;
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
my @options = ("", "", "", "--fwd-mpirun-port -mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "--fwd-mpirun-port -mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
my @starterlist = qw(mpirun orterun srun aprun);
my @starteroptionlist = ("--novm",
"--hnp file:dvm_uri",

Просмотреть файл

@ -58,6 +58,7 @@
#include "orte/mca/plm/base/base.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
@ -116,6 +117,7 @@ int orte_ess_base_orted_setup(char **hosts)
char *param;
hwloc_obj_t obj;
unsigned i, j;
orte_topology_t *t;
opal_list_t transports;
/* my name is set, xfer it to the OPAL layer */
@ -333,13 +335,8 @@ int orte_ess_base_orted_setup(char **hosts)
/* create and store a node object where we are */
node = OBJ_NEW(orte_node_t);
node->name = strdup(orte_process_info.nodename);
node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node);
/* point our topology to the one detected locally */
node->topology = OBJ_NEW(orte_topology_t);
node->topology->sig = strdup(orte_topo_signature);
node->topology->topo = opal_hwloc_topology;
/* add it to the array of known ones */
opal_pointer_array_add(orte_node_topologies, node->topology);
node->index = ORTE_PROC_MY_NAME->vpid;
opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node);
/* create and store a proc object for us */
proc = OBJ_NEW(orte_proc_t);
@ -496,14 +493,40 @@ int orte_ess_base_orted_setup(char **hosts)
error = "orte_rtc_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_rmaps_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmaps_base_find_available";
goto error;
}
/* if we are using static ports, then we need to setup
/* if a topology file was given, then the rmaps framework open
* will have reset our topology. Ensure we always get the right
* one by setting our node topology afterwards
*/
t = OBJ_NEW(orte_topology_t);
t->topo = opal_hwloc_topology;
/* generate the signature */
orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology);
t->sig = strdup(orte_topo_signature);
opal_pointer_array_add(orte_node_topologies, t);
node->topology = t;
if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
}
/* if we were given the host list, then we need to setup
* the daemon info so the RML can function properly
* without requiring a wireup stage. This must be done
* after we enable_comm as that function determines our
* own port, which we need in order to construct the nidmap
*/
if (orte_static_ports) {
if (NULL != hosts) {
/* extract the node info from the environment and
* build a nidmap from it - this will update the
* routing plan as well

Просмотреть файл

@ -427,7 +427,8 @@ static int rte_init(void)
/* create and store a node object where we are */
node = OBJ_NEW(orte_node_t);
node->name = strdup(orte_process_info.nodename);
node->index = opal_pointer_array_set_item(orte_node_pool, 0, node);
node->index = ORTE_PROC_MY_NAME->vpid;
opal_pointer_array_set_item(orte_node_pool, 0, node);
/* create and store a proc object for us */
proc = OBJ_NEW(orte_proc_t);

Просмотреть файл

@ -545,6 +545,8 @@ static int pack_xcast(orte_grpcomm_signature_t *sig,
OBJ_DESTRUCT(&data);
}
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"MSG SIZE: %lu", buffer->bytes_used));
return ORTE_SUCCESS;
}

Просмотреть файл

@ -67,6 +67,8 @@
#include "orte/mca/plm/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/schizo/schizo.h"
#include "orte/mca/state/state.h"
#include "orte/mca/filem/filem.h"
@ -74,6 +76,7 @@
#include "orte/util/context_fns.h"
#include "orte/util/name_fns.h"
#include "orte/util/regex.h"
#include "orte/util/session_dir.h"
#include "orte/util/proc_info.h"
#include "orte/util/nidmap.h"
@ -137,7 +140,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
}
/* if we are not using static ports, we need to send the wireup info */
if (!orte_static_ports) {
if (!orte_static_ports && !orte_fwd_mpirun_port) {
/* pack a flag indicating wiring info is provided */
flag = 1;
opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
@ -234,11 +237,11 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
int rc;
orte_std_cntr_t cnt;
orte_job_t *jdata=NULL, *daemons;
int32_t n, j, k;
orte_proc_t *pptr, *dmn;
int32_t n, k, m;
opal_buffer_t *bptr;
orte_app_context_t *app;
orte_node_t *node;
orte_proc_t *pptr, *dmn;
orte_app_context_t *app;
bool newmap = false;
int8_t flag;
@ -246,6 +249,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
"%s odls:constructing child list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* set a default response */
*job = ORTE_JOBID_INVALID;
/* get the daemon job object */
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
@ -286,24 +290,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
if (NULL == orte_get_job_data_object(jdata->jobid)) {
/* nope - add it */
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
/* connect each proc to its node object */
for (j=0; j < jdata->procs->size; j++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
continue;
}
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, pptr->parent))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto REPORT_ERROR;
}
OBJ_RETAIN(dmn->node);
pptr->node = dmn->node;
/* add proc to node - note that num_procs for the
* node was already correctly unpacked, so don't
* increment it here */
OBJ_RETAIN(pptr);
opal_pointer_array_add(dmn->node->procs, pptr);
}
} else {
/* yep - so we can drop this copy */
jdata->jobid = ORTE_JOBID_INVALID;
@ -351,6 +337,19 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
rc = ORTE_ERR_NOT_FOUND;
goto REPORT_ERROR;
}
} else {
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
}
/* ensure the map object is present */
if (NULL == jdata->map) {
jdata->map = OBJ_NEW(orte_job_map_t);
newmap = true;
}
if (orte_no_vm) {
/* if we are operating novm, then mpirun will have sent us
* the complete array of procs - process it */
for (n=0; n < jdata->procs->size; n++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) {
continue;
@ -359,6 +358,40 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
/* not ready for use yet */
continue;
}
opal_output_verbose(5, orte_odls_base_framework.framework_output,
"%s GETTING DAEMON FOR PROC %s WITH PARENT %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pptr->name),
ORTE_VPID_PRINT(pptr->parent));
if (ORTE_VPID_INVALID == pptr->parent) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto REPORT_ERROR;
}
/* connect the proc to its node object */
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, pptr->parent))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto REPORT_ERROR;
}
OBJ_RETAIN(dmn->node);
pptr->node = dmn->node;
/* add proc to node - note that num_procs for the
* node was already correctly unpacked, so don't
* increment it here */
OBJ_RETAIN(pptr);
opal_pointer_array_add(dmn->node->procs, pptr);
/* add the node to the map, if not already there */
if (!ORTE_FLAG_TEST(dmn->node, ORTE_NODE_FLAG_MAPPED)) {
OBJ_RETAIN(dmn->node);
ORTE_FLAG_SET(dmn->node, ORTE_NODE_FLAG_MAPPED);
opal_pointer_array_add(jdata->map->nodes, dmn->node);
if (newmap) {
jdata->map->num_nodes++;
}
}
/* see if it belongs to us */
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
/* is this child on our current list of children */
@ -385,15 +418,59 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
}
}
goto COMPLETE;
} else {
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
/* create the map - will already have been done for the novm case */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* find our local procs */
for (n=0; n < jdata->map->nodes->size; n++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
continue;
}
if (node->index != (int)ORTE_PROC_MY_NAME->vpid) {
continue;
}
for (m=0; m < node->procs->size; m++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, m))) {
continue;
}
if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) {
/* not on the local list */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s adding proc %s to my local list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pptr->name)));
/* keep tabs of the number of local procs */
jdata->num_local_procs++;
/* add this proc to our child list */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL);
opal_pointer_array_add(orte_local_children, pptr);
/* mark that this app_context is being used on this node */
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
}
}
}
/* compute and save bindings of local children */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* ensure the map object is present */
if (NULL == jdata->map) {
jdata->map = OBJ_NEW(orte_job_map_t);
newmap = true;
/* reset any node map flags we used so the next job will start clean */
for (n=0; n < jdata->map->nodes->size; n++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
}
}
/* if we wanted to see the map, now is the time to display it */
if (jdata->map->display_map) {
orte_rmaps_base_display_map(jdata);
}
/* if we have a file map, then we need to load it */
@ -405,89 +482,18 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
}
}
/* check the procs */
for (n=0; n < jdata->procs->size; n++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) {
continue;
}
if (ORTE_PROC_STATE_UNDEF == pptr->state) {
/* not ready for use yet */
continue;
}
opal_output_verbose(5, orte_odls_base_framework.framework_output,
"%s GETTING DAEMON FOR PROC %s WITH PARENT %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pptr->name),
ORTE_VPID_PRINT(pptr->parent));
if (ORTE_VPID_INVALID == pptr->parent) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto REPORT_ERROR;
}
/* connect the proc to its node object */
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, pptr->parent))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto REPORT_ERROR;
}
OBJ_RETAIN(dmn->node);
pptr->node = dmn->node;
/* add proc to node - note that num_procs for the
* node was already correctly unpacked, so don't
* increment it here */
OBJ_RETAIN(pptr);
opal_pointer_array_add(dmn->node->procs, pptr);
/* add the node to the map, if not already there */
if (!ORTE_FLAG_TEST(dmn->node, ORTE_NODE_FLAG_MAPPED)) {
OBJ_RETAIN(dmn->node);
ORTE_FLAG_SET(dmn->node, ORTE_NODE_FLAG_MAPPED);
opal_pointer_array_add(jdata->map->nodes, dmn->node);
if (newmap) {
jdata->map->num_nodes++;
}
}
/* see if it belongs to us */
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
/* is this child on our current list of children */
if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) {
/* not on the local list */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s adding proc %s to my local list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pptr->name)));
/* keep tabs of the number of local procs */
jdata->num_local_procs++;
/* add this proc to our child list */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL);
opal_pointer_array_add(orte_local_children, pptr);
}
/* if the job is in restart mode, the child must not barrier when launched */
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
orte_set_attribute(&pptr->attributes, ORTE_PROC_NOBARRIER, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
}
/* mark that this app_context is being used on this node */
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
}
}
/* reset the node map flags we used so the next job will start clean */
for (n=0; n < jdata->map->nodes->size; n++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
}
}
COMPLETE:
/* register this job with the PMIx server - need to wait until after we
* have computed the #local_procs before calling the function */
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
/* to save memory, purge the job map of all procs other than
* our own - for daemons, this will completely release the
* proc structures. For the HNP, the proc structs will
* remain in the orte_job_t array */
return ORTE_SUCCESS;
REPORT_ERROR:
@ -636,6 +642,10 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
orte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local;
bool index_argv;
opal_output_verbose(5, orte_odls_base_framework.framework_output,
"%s local:launch",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* establish our baseline working directory - we will be potentially
* bouncing around as we execute various apps, but we will always return
* to this place as our default directory
@ -804,7 +814,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
continue;
}
/* does this child belong to this app? */
if (j != (int)child->app_idx) {
continue;

Просмотреть файл

@ -325,13 +325,12 @@ static int tcp_component_register(void)
} else {
mca_oob_tcp_component.tcp6_static_ports = NULL;
}
if (NULL == mca_oob_tcp_component.tcp_static_ports &&
NULL == mca_oob_tcp_component.tcp6_static_ports) {
orte_static_ports = false;
} else {
#endif // OPAL_ENABLE_IPV6
if (NULL != mca_oob_tcp_component.tcp_static_ports ||
NULL != mca_oob_tcp_component.tcp6_static_ports) {
orte_static_ports = true;
}
#endif // OPAL_ENABLE_IPV6
dyn_port_string = NULL;
(void)mca_base_component_var_register(component, "dynamic_ipv4_ports",

Просмотреть файл

@ -385,7 +385,7 @@ static int create_listen(void)
conn = OBJ_NEW(mca_oob_tcp_listener_t);
conn->sd = sd;
conn->port = ntohs(((struct sockaddr_in*) &inaddr)->sin_port);
if (orte_static_ports && 0 == orte_process_info.my_port) {
if (0 == orte_process_info.my_port) {
/* save the first one */
orte_process_info.my_port = conn->port;
}

Просмотреть файл

@ -202,6 +202,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
orte_std_cntr_t nnode;
orte_job_t *daemons;
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
char *ltmp;
/* if we are launching debugger daemons, then just go
* do it - no new daemons will be launched
@ -350,16 +351,13 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* add the daemon command (as specified by user) */
orte_plm_base_setup_orted_cmd(&argc, &argv);
/* if we have static ports, we need to ensure that mpirun is
/* ensure that mpirun is
* on the list. Since alps won't be launching a daemon on it,
* it won't have been placed on the list, so create a new
* version here that includes it */
if (orte_static_ports) {
char *ltmp;
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist_flat);
free(nodelist_flat);
nodelist_flat = ltmp;
}
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist_flat);
free(nodelist_flat);
nodelist_flat = ltmp;
/* Add basic orted command line options, including debug flags */
orte_plm_base_orted_append_basic_args(&argc, &argv,

Просмотреть файл

@ -207,7 +207,7 @@ static void files_ready(int status, void *cbdata)
if (ORTE_SUCCESS != status) {
ORTE_FORCED_TERMINATE(status);
} else {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP);
}
}
@ -395,22 +395,6 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
/* convenience */
jdata = caddy->jdata;
/* quick sanity check - is the stdin target within range
* of the job?
*/
if (ORTE_VPID_WILDCARD != jdata->stdin_target &&
ORTE_VPID_INVALID != jdata->stdin_target &&
jdata->num_procs <= jdata->stdin_target) {
/* this request cannot be met */
orte_show_help("help-plm-base.txt", "stdin-target-out-of-range", true,
ORTE_VPID_PRINT(jdata->stdin_target),
ORTE_VPID_PRINT(jdata->num_procs));
orte_never_launched = true;
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy);
return;
}
/* If this job is being started by me, then there is nothing
* further we need to do as any user directives (e.g., to tie
* off IO to /dev/null) will have been included in the launch
@ -426,19 +410,6 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
* about stdin */
}
#if OPAL_ENABLE_FT_CR == 1
/*
* Notify the Global SnapC component regarding new job (even if it was restarted)
*/
{
int rc;
if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(jdata->jobid) ) ) {
/* Silent Failure :/ JJH */
ORTE_ERROR_LOG(rc);
}
}
#endif
/* if coprocessors were detected, now is the time to
* identify who is attached to what host - this info
* will be shipped to the daemons in the nidmap. Someday,
@ -1457,24 +1428,31 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, rml_uri);
free(rml_uri);
/* if we have static ports, pass the node list */
if (orte_static_ports) {
param = NULL;
if (NULL != nodes) {
/* convert the nodes to a regex */
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else if (NULL != orte_node_regex) {
param = strdup(orte_node_regex);
}
if (NULL != param) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_node_regex");
opal_argv_append(argc, argv, param);
free(param);
/* pass the node list if one was given*/
param = NULL;
if (NULL != nodes) {
/* convert the nodes to a regex */
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else if (NULL != orte_node_regex) {
param = strdup(orte_node_regex);
}
if (NULL != param) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "orte_node_regex");
opal_argv_append(argc, argv, param);
free(param);
}
/* if requested, pass our port */
if (orte_fwd_mpirun_port) {
asprintf(&param, "%d", orte_process_info.my_port);
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "oob_tcp_static_ipv4_ports");
opal_argv_append(argc, argv, param);
free(param);
}
/* if output-filename was specified, pass that along */

Просмотреть файл

@ -341,6 +341,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto CLEANUP;
}
/* NEVER update the proc state before activating the state machine - let
* the state cbfunc update it as it may need to compare this
@ -374,14 +375,14 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
count=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &job, &count, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
goto DEPART;
goto CLEANUP;
}
name.jobid = job;
/* get the job object */
if (NULL == (jdata = orte_get_job_data_object(job))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto DEPART;
goto CLEANUP;
}
count=1;
while (ORTE_SUCCESS == opal_dss.unpack(buffer, &vpid, &count, ORTE_VPID)) {
@ -397,12 +398,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
break;
}
CLEANUP:
if (ORTE_SUCCESS != rc) {
goto DEPART;
}
DEPART:
CLEANUP:
/* see if an error occurred - if so, wakeup the HNP so we can exit */
if (ORTE_PROC_IS_HNP && ORTE_SUCCESS != rc) {
jdata = NULL;

Просмотреть файл

@ -1191,25 +1191,20 @@ static void launch_daemons(int fd, short args, void *cbdata)
orte_routed.get_routing_list(rtmod, &coll);
}
if (orte_static_ports) {
/* create a list of all nodes involved so we can pass it along */
char **nodelist = NULL;
orte_node_t *n2;
for (nnode=0; nnode < map->nodes->size; nnode++) {
if (NULL != (n2 = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
opal_argv_append_nosize(&nodelist, n2->name);
}
/* create a list of all nodes involved so we can pass it along */
char **nodelist = NULL;
orte_node_t *n2;
for (nnode=0; nnode < map->nodes->size; nnode++) {
if (NULL != (n2 = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
opal_argv_append_nosize(&nodelist, n2->name);
}
/* we need mpirun to be the first node on this list */
if (NULL == nodelist ||
0 != strcmp(nodelist[0], orte_process_info.nodename)) {
opal_argv_prepend_nosize(&nodelist, orte_process_info.nodename);
}
nlistflat = opal_argv_join(nodelist, ',');
opal_argv_free(nodelist);
} else {
nlistflat = NULL;
}
/* we need mpirun to be the first node on this list */
if (0 != strcmp(nodelist[0], orte_process_info.nodename)) {
opal_argv_prepend_nosize(&nodelist, orte_process_info.nodename);
}
nlistflat = opal_argv_join(nodelist, ',');
opal_argv_free(nodelist);
/* setup the launch */
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1,

Просмотреть файл

@ -118,7 +118,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
/* check for topology */
if (use_local_topology) {
/* use our topology */
topo = opal_hwloc_topology;
t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
} else if (NULL != files) {
if (0 != hwloc_topology_init(&topo)) {
orte_show_help("help-ras-simulator.txt",
@ -257,17 +257,17 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
node->slots_max = 0;
} else {
obj = hwloc_get_root_obj(topo);
node->slots_max = opal_hwloc_base_get_npus(topo, obj);
obj = hwloc_get_root_obj(t->topo);
node->slots_max = opal_hwloc_base_get_npus(t->topo, obj);
}
if (NULL == slot_cnt || NULL == slot_cnt[n]) {
node->slots = 0;
} else {
obj = hwloc_get_root_obj(topo);
node->slots = opal_hwloc_base_get_npus(topo, obj);
obj = hwloc_get_root_obj(t->topo);
node->slots = opal_hwloc_base_get_npus(t->topo, obj);
}
node->topology = OBJ_NEW(orte_topology_t);
node->topology->topo = topo;
OBJ_RETAIN(t);
node->topology = t;
opal_output_verbose(1, orte_ras_base_framework.framework_output,
"Created Node <%10s> [%3d : %3d]",
node->name, node->slots, node->slots_max);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -99,7 +99,7 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_selected_module_t);
/*
* Map a job
*/
ORTE_DECLSPEC void orte_rmaps_base_map_job(int fd, short args, void *cbdata);
ORTE_DECLSPEC int orte_rmaps_base_map_job(orte_job_t *jdata);
/**
* Utility routines to get/set vpid mapping for the job
@ -126,6 +126,8 @@ ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *poli
orte_mapping_policy_t mapping,
char *spec);
ORTE_DECLSPEC void orte_rmaps_base_display_map(orte_job_t *jdata);
END_C_DECLS
#endif

Просмотреть файл

@ -414,6 +414,9 @@ static int bind_in_place(orte_job_t *jdata,
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
}
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) {
continue;
}
if (!orte_do_not_launch) {
/* if we don't want to launch, then we are just testing the system,
* so ignore questions about support capabilities
@ -606,6 +609,9 @@ static int bind_to_cpuset(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
}
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) {
continue;
}
if (!orte_do_not_launch) {
/* if we don't want to launch, then we are just testing the system,
* so ignore questions about support capabilities
@ -835,13 +841,16 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
* basis because different nodes could potentially have different
* topologies, with different relative depths for the two levels
*/
execute:
execute:
/* initialize */
for (i=0; i < jdata->map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != i) {
continue;
}
if (!orte_do_not_launch) {
/* if we don't want to launch, then we are just testing the system,
* so ignore questions about support capabilities

Просмотреть файл

@ -42,31 +42,18 @@
#include "orte/mca/rmaps/base/rmaps_private.h"
/*
* Function for selecting one component from all those that are
* available.
*/
void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
int orte_rmaps_base_map_job(orte_job_t *jdata)
{
orte_job_t *jdata;
orte_node_t *node;
int rc, i, ppx;
bool did_map, given, pernode;
orte_rmaps_base_selected_module_t *mod;
orte_job_t *parent;
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_vpid_t nprocs;
orte_app_context_t *app;
/* convenience */
jdata = caddy->jdata;
jdata->state = ORTE_JOB_STATE_MAP;
/* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT
* ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN
* DO SO, AND ALL PLM COMMANDS ARE RELAYED TO HNP
*/
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
@ -128,10 +115,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING))) {
/* inform the user of the error */
orte_show_help("help-orte-rmaps-base.txt", "num-procs-not-specified", true);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
OBJ_RELEASE(caddy);
OPAL_LIST_DESTRUCT(&nodes);
return;
return ORTE_ERR_BAD_PARAM;
}
}
nprocs += slots;
@ -350,9 +335,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
int i;
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
return ORTE_ERR_NOT_FOUND;
}
t0 = node->topology;
for (i=1; i < orte_node_pool->size; i++) {
@ -385,9 +368,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
*/
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
OBJ_RELEASE(caddy);
return;
return rc;
}
}
if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) {
@ -395,9 +376,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
* for launch as all the resources were busy
*/
orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_CANNOT_LAUNCH);
OBJ_RELEASE(caddy);
return;
return rc;
}
/* if we get here without doing the map, or with zero procs in
@ -407,9 +386,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true,
did_map ? "mapped" : "unmapped",
jdata->num_procs, jdata->map->num_nodes);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
OBJ_RELEASE(caddy);
return;
return ORTE_ERR_INVALID_NUM_PROCS;
}
/* if any node is oversubscribed, then check to see if a binding
@ -425,17 +402,15 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
OBJ_RELEASE(caddy);
return;
return rc;
}
/* compute and save bindings */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
OBJ_RELEASE(caddy);
return;
if (orte_no_vm) {
/* compute and save bindings */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* set the offset so shared memory components can potentially
@ -452,104 +427,100 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}
/* if we wanted to display the map, now is the time to do it - ignore
* daemon job
*/
if (jdata->map->display_map) {
char *output=NULL;
int i, j, cnt;
orte_node_t *node;
orte_proc_t *proc;
char tmp1[1024];
hwloc_obj_t bd=NULL;;
opal_hwloc_locality_t locality;
orte_proc_t *p0;
char *p0bitmap, *procbitmap;
return ORTE_SUCCESS;
}
if (orte_display_diffable_output) {
/* intended solely to test mapping methods, this output
* can become quite long when testing at scale. Rather
* than enduring all the malloc/free's required to
* create an arbitrary-length string, custom-generate
* the output a line at a time here
*/
/* display just the procs in a diffable format */
opal_output(orte_clean_output, "<map>\n");
fflush(stderr);
/* loop through nodes */
cnt = 0;
for (i=0; i < jdata->map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
opal_output(orte_clean_output, "\t<host num=%d>", cnt);
fflush(stderr);
cnt++;
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
memset(tmp1, 0, 1024);
orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR);
if (NULL == bd) {
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
} else {
if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology->topo, bd->cpuset)) {
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
}
}
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu binding=%s>",
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
(unsigned long)proc->local_rank,
(unsigned long)proc->node_rank, tmp1);
}
opal_output(orte_clean_output, "\t</host>");
fflush(stderr);
void orte_rmaps_base_display_map(orte_job_t *jdata)
{
/* ignore daemon job */
char *output=NULL;
int i, j, cnt;
orte_node_t *node;
orte_proc_t *proc;
char tmp1[1024];
hwloc_obj_t bd=NULL;;
opal_hwloc_locality_t locality;
orte_proc_t *p0;
char *p0bitmap, *procbitmap;
if (orte_display_diffable_output) {
/* intended solely to test mapping methods, this output
* can become quite long when testing at scale. Rather
* than enduring all the malloc/free's required to
* create an arbitrary-length string, custom-generate
* the output a line at a time here
*/
/* display just the procs in a diffable format */
opal_output(orte_clean_output, "<map>\n");
fflush(stderr);
/* loop through nodes */
cnt = 0;
for (i=0; i < jdata->map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
/* test locality - for the first node, print the locality of each proc relative to the first one */
node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, 0);
p0 = (orte_proc_t*)opal_pointer_array_get_item(node->procs, 0);
p0bitmap = NULL;
orte_get_attribute(&p0->attributes, ORTE_PROC_CPU_BITMAP, (void**)&p0bitmap, OPAL_STRING);
opal_output(orte_clean_output, "\t<locality>");
for (j=1; j < node->procs->size; j++) {
opal_output(orte_clean_output, "\t<host num=%d>", cnt);
fflush(stderr);
cnt++;
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
procbitmap = NULL;
orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&procbitmap, OPAL_STRING);
locality = opal_hwloc_base_get_relative_locality(node->topology->topo,
p0bitmap,
procbitmap);
opal_output(orte_clean_output, "\t\t<rank=%s rank=%s locality=%s>",
ORTE_VPID_PRINT(p0->name.vpid),
ORTE_VPID_PRINT(proc->name.vpid),
opal_hwloc_base_print_locality(locality));
memset(tmp1, 0, 1024);
orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR);
if (NULL == bd) {
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
} else {
if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology->topo, bd->cpuset)) {
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
}
}
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu binding=%s>",
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
(unsigned long)proc->local_rank,
(unsigned long)proc->node_rank, tmp1);
}
opal_output(orte_clean_output, "\t</locality>\n</map>");
opal_output(orte_clean_output, "\t</host>");
fflush(stderr);
if (NULL != p0bitmap) {
free(p0bitmap);
}
if (NULL != procbitmap) {
free(procbitmap);
}
} else {
opal_output(orte_clean_output, " Data for JOB %s offset %s", ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset));
opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP);
if (orte_xml_output) {
fprintf(orte_xml_fp, "%s\n", output);
fflush(orte_xml_fp);
} else {
opal_output(orte_clean_output, "%s", output);
}
free(output);
}
}
/* set the job state to the next position */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);
/* cleanup */
OBJ_RELEASE(caddy);
/* test locality - for the first node, print the locality of each proc relative to the first one */
node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, 0);
p0 = (orte_proc_t*)opal_pointer_array_get_item(node->procs, 0);
p0bitmap = NULL;
orte_get_attribute(&p0->attributes, ORTE_PROC_CPU_BITMAP, (void**)&p0bitmap, OPAL_STRING);
opal_output(orte_clean_output, "\t<locality>");
for (j=1; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
procbitmap = NULL;
orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&procbitmap, OPAL_STRING);
locality = opal_hwloc_base_get_relative_locality(node->topology->topo,
p0bitmap,
procbitmap);
opal_output(orte_clean_output, "\t\t<rank=%s rank=%s locality=%s>",
ORTE_VPID_PRINT(p0->name.vpid),
ORTE_VPID_PRINT(proc->name.vpid),
opal_hwloc_base_print_locality(locality));
}
opal_output(orte_clean_output, "\t</locality>\n</map>");
fflush(stderr);
if (NULL != p0bitmap) {
free(p0bitmap);
}
if (NULL != procbitmap) {
free(procbitmap);
}
} else {
opal_output(orte_clean_output, " Data for JOB %s offset %s", ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset));
opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP);
if (orte_xml_output) {
fprintf(orte_xml_fp, "%s\n", output);
fflush(orte_xml_fp);
} else {
opal_output(orte_clean_output, "%s", output);
}
free(output);
}
}

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -61,7 +62,7 @@ BEGIN_C_DECLS
/* mapping event - the event one activates to schedule mapping
* of procs to nodes for pending jobs
*/
*/
ORTE_DECLSPEC extern opal_event_t orte_mapping_event;
/**
@ -71,16 +72,16 @@ ORTE_DECLSPEC extern opal_event_t orte_mapping_event;
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_job_t *jdata);
/*
* rmaps module version 1.3.0
* rmaps module version 3.0.0
*/
struct orte_rmaps_base_module_1_3_0_t {
struct orte_rmaps_base_module_3_0_0_t {
/** Mapping function pointer */
orte_rmaps_base_module_map_fn_t map_job;
orte_rmaps_base_module_map_fn_t map_job;
};
/** Convenience typedef */
typedef struct orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_1_3_0_t;
typedef struct orte_rmaps_base_module_3_0_0_t orte_rmaps_base_module_3_0_0_t;
/** Convenience typedef */
typedef orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_t;
typedef orte_rmaps_base_module_3_0_0_t orte_rmaps_base_module_t;
/*
@ -88,18 +89,18 @@ typedef orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_t;
*/
/**
* rmaps component version 1.3.0
* rmaps component version 3.0.0
*/
struct orte_rmaps_base_component_2_0_0_t {
struct orte_rmaps_base_component_3_0_0_t {
/** Base MCA structure */
mca_base_component_t base_version;
/** Base MCA data */
mca_base_component_data_t base_data;
};
/** Convenience typedef */
typedef struct orte_rmaps_base_component_2_0_0_t orte_rmaps_base_component_2_0_0_t;
typedef struct orte_rmaps_base_component_3_0_0_t orte_rmaps_base_component_3_0_0_t;
/** Convenience typedef */
typedef orte_rmaps_base_component_2_0_0_t orte_rmaps_base_component_t;
typedef orte_rmaps_base_component_3_0_0_t orte_rmaps_base_component_t;
END_C_DECLS

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -20,8 +21,10 @@
#
[orte-rmaps-rr:alloc-error]
There are not enough slots available in the system to satisfy the %d slots
that were requested by the application:
%s
that were requested:
application: %s
host: %s
Either request fewer slots for your application, or make more slots available
for use.

Просмотреть файл

@ -59,7 +59,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
if (num_slots < (int)app->num_procs) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -192,13 +192,13 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
* via hostfile/dash-host */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
/* if we were explicitly told not to oversubscribe, then don't */
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -237,7 +237,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
if (num_slots < (int)app->num_procs) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -377,13 +377,13 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
* via hostfile/dash-host */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
/* if we were explicitly told not to oversubscribe, then don't */
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -491,7 +491,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
if (num_slots < app->num_procs) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -599,13 +599,13 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
* via hostfile/dash-host */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
/* if we were explicitly told not to oversubscribe, then don't */
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}
@ -652,7 +652,7 @@ static int byobj_span(orte_job_t *jdata,
if (num_slots < (int)app->num_procs) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
true, app->num_procs, app->app, orte_process_info.nodename);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return ORTE_ERR_SILENT;
}

Просмотреть файл

@ -168,6 +168,12 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
/* if this message is just to warmup the connection, then drop it */
if (ORTE_RML_TAG_WARMUP_CONNECTION == msg->tag) {
OBJ_RELEASE(msg);
return;
}
/* see if we have a waiting recv for this message */
OPAL_LIST_FOREACH(post, &orte_rml_base.posted_recvs, orte_rml_posted_recv_t) {
/* since names could include wildcards, must use

Просмотреть файл

@ -172,6 +172,9 @@ BEGIN_C_DECLS
/* topology report */
#define ORTE_RML_TAG_TOPOLOGY_REPORT 62
/* warmup connection - simply establishes the connection */
#define ORTE_RML_TAG_WARMUP_CONNECTION 63
#define ORTE_RML_TAG_MAX 100

Просмотреть файл

@ -428,7 +428,7 @@ static opal_cmd_line_init_t cmd_line_init[] = {
&orte_cmd_options.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL,
"Disable recovery (resets all recovery options to off)" },
{ "state_novm_select", '\0', "novm", "novm", 0,
{ "orte_no_vm", '\0', "novm", "novm", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Execute without creating an allocation-spanning virtual machine (only start daemons on nodes hosting application procs)" },
@ -449,6 +449,11 @@ static opal_cmd_line_init_t cmd_line_init[] = {
&orte_cmd_options.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL,
"Terminate the DVM" },
/* fwd mpirun port */
{ "orte_fwd_mpirun_port", '\0', "fwd-mpirun-port", "fwd-mpirun-port", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Forward mpirun port to compute node daemons so all will use it" },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }

Просмотреть файл

@ -80,8 +80,6 @@ static orte_job_state_t launch_states[] = {
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
ORTE_JOB_STATE_DAEMONS_REPORTED,
ORTE_JOB_STATE_VM_READY,
ORTE_JOB_STATE_MAP,
ORTE_JOB_STATE_MAP_COMPLETE,
ORTE_JOB_STATE_SYSTEM_PREP,
ORTE_JOB_STATE_LAUNCH_APPS,
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
@ -100,8 +98,6 @@ static orte_state_cbfunc_t launch_callbacks[] = {
orte_plm_base_daemons_launched,
orte_plm_base_daemons_reported,
vm_ready,
orte_rmaps_base_map_job,
orte_plm_base_mapping_complete,
orte_plm_base_complete_setup,
orte_plm_base_launch_apps,
orte_state_base_local_launch_complete,
@ -213,8 +209,9 @@ static void files_ready(int status, void *cbdata)
if (ORTE_SUCCESS != status) {
ORTE_FORCED_TERMINATE(status);
return;
} else {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP);
}
}
@ -259,7 +256,7 @@ static void vm_ready(int fd, short args, void *cbdata)
OBJ_RELEASE(buf);
return;
}
/* flag that daemons were launchd so we will update the nidmap */
/* flag that daemons were launched so we will update the nidmap */
flag = 1;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
/* construct a nodemap with everything in it */
@ -269,33 +266,38 @@ static void vm_ready(int fd, short args, void *cbdata)
return;
}
/* pack a flag indicating wiring info is provided */
flag = 1;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
/* get wireup info for daemons per the selected routing module */
wireup = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) {
ORTE_ERROR_LOG(rc);
if (!orte_static_ports && !orte_fwd_mpirun_port) {
/* pack a flag indicating wiring info is provided */
flag = 1;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
/* get wireup info for daemons per the selected routing module */
wireup = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
}
/* put it in a byte object for xmission */
opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
/* pack the byte object - zero-byte objects are fine */
bo.size = numbytes;
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
}
/* release the data since it has now been copied into our buffer */
if (NULL != bo.bytes) {
free(bo.bytes);
}
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
} else {
flag = 0;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
}
/* put it in a byte object for xmission */
opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
/* pack the byte object - zero-byte objects are fine */
bo.size = numbytes;
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
}
/* release the data since it has now been copied into our buffer */
if (NULL != bo.bytes) {
free(bo.bytes);
}
OBJ_RELEASE(wireup);
/* goes to all daemons */
sig = OBJ_NEW(orte_grpcomm_signature_t);

Просмотреть файл

@ -73,8 +73,6 @@ static orte_job_state_t launch_states[] = {
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
ORTE_JOB_STATE_DAEMONS_REPORTED,
ORTE_JOB_STATE_VM_READY,
ORTE_JOB_STATE_MAP,
ORTE_JOB_STATE_MAP_COMPLETE,
ORTE_JOB_STATE_SYSTEM_PREP,
ORTE_JOB_STATE_LAUNCH_APPS,
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
@ -93,8 +91,6 @@ static orte_state_cbfunc_t launch_callbacks[] = {
orte_plm_base_daemons_launched,
orte_plm_base_daemons_reported,
orte_plm_base_vm_ready,
orte_rmaps_base_map_job,
orte_plm_base_mapping_complete,
orte_plm_base_complete_setup,
orte_plm_base_launch_apps,
orte_state_base_local_launch_complete,

Просмотреть файл

@ -61,7 +61,6 @@ orte_state_base_module_t orte_state_novm_module = {
};
static void allocation_complete(int fd, short args, void *cbdata);
static void map_complete(int fd, short args, void *cbdata);
static void vm_ready(int fd, short args, void *cbdata);
/* defined state machine sequence for no VM - individual
@ -75,8 +74,6 @@ static orte_job_state_t launch_states[] = {
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
ORTE_JOB_STATE_DAEMONS_REPORTED,
ORTE_JOB_STATE_VM_READY,
ORTE_JOB_STATE_MAP,
ORTE_JOB_STATE_MAP_COMPLETE,
ORTE_JOB_STATE_SYSTEM_PREP,
ORTE_JOB_STATE_LAUNCH_APPS,
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
@ -96,8 +93,6 @@ static orte_state_cbfunc_t launch_callbacks[] = {
orte_plm_base_daemons_launched,
orte_plm_base_daemons_reported,
vm_ready,
orte_rmaps_base_map_job,
map_complete,
orte_plm_base_complete_setup,
orte_plm_base_launch_apps,
orte_state_base_local_launch_complete,
@ -200,7 +195,7 @@ static void allocation_complete(int fd, short args, void *cbdata)
orte_job_t *daemons;
orte_topology_t *t;
orte_node_t *node;
int i;
int i, rc;
jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
@ -213,7 +208,6 @@ static void allocation_complete(int fd, short args, void *cbdata)
/* mark that we are not using a VM */
orte_set_attribute(&daemons->attributes, ORTE_JOB_NO_VM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
/* ensure that all nodes point to our topology - we
* cannot support hetero nodes with this state machine
*/
@ -224,28 +218,38 @@ static void allocation_complete(int fd, short args, void *cbdata)
}
node->topology = t;
}
if (!orte_managed_allocation) {
if (NULL != orte_set_slots &&
0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setting slots for node %s by %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
orte_plm_base_set_slots(node);
}
}
}
}
/* move to the map stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
/* perform the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto done;
}
/* after we map, we are ready to launch the daemons */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
done:
/* cleanup */
OBJ_RELEASE(state);
}
/* after we map, we are ready to launch the daemons */
static void map_complete(int fd, short args, void *cbdata)
{
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata = state->jdata;
jdata->state = ORTE_JOB_STATE_MAP_COMPLETE;
/* move to the map stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
/* cleanup */
OBJ_RELEASE(state);
}
static void vm_ready(int fd, short args, void *cbdata)
{

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,6 +14,7 @@
#include "orte_config.h"
#include "opal/util/output.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/state/state.h"
#include "orte/mca/state/base/base.h"
#include "state_novm.h"
@ -26,7 +28,6 @@ const char *orte_state_novm_component_version_string =
/*
* Local functionality
*/
static int state_novm_register (void);
static int state_novm_open(void);
static int state_novm_close(void);
static int state_novm_component_query(mca_base_module_t **module, int *priority);
@ -50,8 +51,7 @@ orte_state_base_component_t mca_state_novm_component =
/* Component open and close functions */
.mca_open_component = state_novm_open,
.mca_close_component = state_novm_close,
.mca_query_component = state_novm_component_query,
.mca_register_component_params = state_novm_register
.mca_query_component = state_novm_component_query
},
.base_data = {
/* The component is checkpoint ready */
@ -59,23 +59,6 @@ orte_state_base_component_t mca_state_novm_component =
},
};
static bool select_me = false;
static int state_novm_register (void)
{
int ret;
select_me = false;
ret = mca_base_component_var_register (&mca_state_novm_component.base_version,
"select", "Use this component",
MCA_BASE_VAR_TYPE_BOOL, NULL,
0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&select_me);
return (0 > ret) ? ret : ORTE_SUCCESS;
}
static int state_novm_open(void)
{
return ORTE_SUCCESS;
@ -88,7 +71,7 @@ static int state_novm_close(void)
static int state_novm_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_HNP && select_me) {
if (ORTE_PROC_IS_HNP && orte_no_vm) {
/* set our priority high so we'll be selected if user desires */
*priority = 1000;
*module = (mca_base_module_t *)&orte_state_novm_module;

Просмотреть файл

@ -23,6 +23,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/session_dir.h"
@ -256,6 +257,9 @@ static void track_procs(int fd, short argc, void *cbdata)
int rc, i, j;
orte_plm_cmd_flag_t cmd;
char *rtmod;
orte_std_cntr_t index;
orte_job_map_t *map;
orte_node_t *node;
OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
"%s state:orted:track_procs called for proc %s state %s",
@ -426,18 +430,6 @@ static void track_procs(int fd, short argc, void *cbdata)
if (pptr->name.jobid == jdata->jobid) {
/* clear the entry in the local children */
opal_pointer_array_set_item(orte_local_children, i, NULL);
/* find it in the node->procs array */
for (j=0; j < pptr->node->procs->size; j++) {
if (NULL == (pdata = (orte_proc_t*)opal_pointer_array_get_item(pptr->node->procs, j))) {
continue;
}
if (pdata == pptr) {
/* remove it */
opal_pointer_array_set_item(pptr->node->procs, j, NULL);
OBJ_RELEASE(pdata); // maintain accounting
break;
}
}
OBJ_RELEASE(pptr); // maintain accounting
}
}
@ -451,6 +443,47 @@ static void track_procs(int fd, short argc, void *cbdata)
opal_pmix.server_deregister_nspace(jdata->jobid, NULL, NULL);
}
/* release the resources */
if (NULL != jdata->map) {
map = jdata->map;
for (index = 0; index < map->nodes->size; index++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) {
continue;
}
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:orted releasing procs from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
for (i = 0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (pptr->name.jobid != jdata->jobid) {
/* skip procs from another job */
continue;
}
node->slots_inuse--;
node->num_procs--;
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:orted releasing proc %s from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pptr->name), node->name));
/* set the entry in the node array to NULL */
opal_pointer_array_set_item(node->procs, i, NULL);
/* release the proc once for the map entry */
OBJ_RELEASE(pptr);
}
/* set the node location to NULL */
opal_pointer_array_set_item(map->nodes, index, NULL);
/* maintain accounting */
OBJ_RELEASE(node);
/* flag that the node is no longer in a map */
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
}
OBJ_RELEASE(map);
jdata->map = NULL;
}
/* cleanup the job info */
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL);
OBJ_RELEASE(jdata);

Просмотреть файл

@ -111,6 +111,11 @@
static opal_event_t *pipe_handler;
static void shutdown_callback(int fd, short flags, void *arg);
static void pipe_closed(int fd, short flags, void *arg);
static void rollup(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata);
static opal_buffer_t *bucket;
static int ncollected = 0;
static char *orte_parent_uri;
@ -228,6 +233,7 @@ int orte_daemon(int argc, char *argv[])
memset(&orted_globals, 0, sizeof(orted_globals));
/* initialize the singleton died pipe to an illegal value so we can detect it was set */
orted_globals.singleton_died_pipe = -1;
bucket = OBJ_NEW(opal_buffer_t);
/* setup to check common command line options that just report and die */
cmd_line = OBJ_NEW(opal_cmd_line_t);
@ -694,6 +700,31 @@ int orte_daemon(int argc, char *argv[])
* for it
*/
if (!ORTE_PROC_IS_HNP) {
orte_process_name_t target;
target.jobid = ORTE_PROC_MY_NAME->jobid;
if (orte_fwd_mpirun_port) {
/* setup the rollup callback */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK,
ORTE_RML_PERSISTENT, rollup, NULL);
target.vpid = ORTE_PROC_MY_NAME->vpid;
/* since we will be waiting for any children to send us
* their rollup info before sending to our parent, save
* a little time in the launch phase by "warming up" the
* connection to our parent while we wait for our children */
buffer = OBJ_NEW(opal_buffer_t); // zero-byte message
if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_PARENT, buffer,
ORTE_RML_TAG_WARMUP_CONNECTION,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
} else {
target.vpid = 0;
}
/* send the information to the orted report-back point - this function
* will process the data, but also counts the number of
* orteds that reported back so the launch procedure can continue.
@ -767,9 +798,9 @@ int orte_daemon(int argc, char *argv[])
}
}
/* send to the HNP's callback - will be routed if routes are available */
/* send it to the designated target */
if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_HNP, buffer,
&target, buffer,
ORTE_RML_TAG_ORTED_CALLBACK,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
@ -898,3 +929,30 @@ static void shutdown_callback(int fd, short flags, void *arg)
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
}
static void rollup(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata)
{
int nreqd;
char *rtmod;
int ret;
/* xfer the contents of the rollup to our bucket */
opal_dss.copy_payload(bucket, buffer);
ncollected++;
/* get the number of children, and include ourselves */
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
nreqd = orte_routed.num_routes(rtmod) + 1;
if (nreqd == ncollected) {
/* relay this on to our parent */
if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
ORTE_PROC_MY_PARENT, bucket,
ORTE_RML_TAG_ORTED_CALLBACK,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(bucket);
}
}
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -82,6 +82,13 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the flags */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->flags)), 1, ORTE_JOB_FLAGS_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the personality */
count = opal_argv_count(jobs[i]->personality);
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &count, 1, OPAL_INT32))) {
@ -126,8 +133,8 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
ORTE_ERROR_LOG(rc);
return rc;
}
/* and the procs, if we have them */
if (0 < jobs[i]->num_procs) {
if (orte_no_vm && 0 < jobs[i]->num_procs) {
for (j=0; j < jobs[i]->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) {
continue;
@ -163,6 +170,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
j=0;
} else {
/* pack a one to indicate a map is there */
j = 1;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)&j, 1, ORTE_STD_CNTR))) {
@ -191,13 +199,6 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the flags */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->flags)), 1, ORTE_JOB_FLAGS_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the attributes that need to be sent */
count = 0;
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {

Просмотреть файл

@ -187,13 +187,12 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
}
tmp2 = opal_argv_join(src->personality, ',');
asprintf(&tmp, "\n%sData for job: %s\tPersonality: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tMPI allowed: %s\tStdin target: %s\tState: %s\tAbort: %s", pfx2,
asprintf(&tmp, "\n%sData for job: %s\tPersonality: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tStdin target: %s\tState: %s\tAbort: %s", pfx2,
ORTE_JOBID_PRINT(src->jobid), tmp2,
(ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_RECOVERABLE)) ? "ENABLED" : "DISABLED",
(orte_get_attribute(&src->attributes, ORTE_JOB_RECOVER_DEFINED, NULL, OPAL_BOOL)) ? "DEFINED" : "DEFAULT",
pfx2,
(long)src->num_apps,
(ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_GANG_LAUNCHED)) ? "YES" : "NO", ORTE_VPID_PRINT(src->stdin_target),
(long)src->num_apps, ORTE_VPID_PRINT(src->stdin_target),
orte_job_state_to_str(src->state), (ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_ABORTED)) ? "True" : "False");
free(tmp2);
asprintf(&pfx, "%s\t", pfx2);

Просмотреть файл

@ -87,6 +87,14 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the flags */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
(&(jobs[i]->flags)), &n, ORTE_JOB_FLAGS_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the personality */
n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, &n, OPAL_INT32))) {
@ -138,8 +146,8 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc);
return rc;
}
/* and the procs, if provided */
if (0 < jobs[i]->num_procs) {
if (orte_no_vm && 0 < jobs[i]->num_procs) {
orte_proc_t *proc;
for (j=0; j < jobs[i]->num_procs; j++) {
n = 1;
@ -197,14 +205,6 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the flags */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
(&(jobs[i]->flags)), &n, ORTE_JOB_FLAGS_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the attributes */
n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,

Просмотреть файл

@ -76,11 +76,13 @@ char *orte_mgmt_transport = NULL;
char *orte_coll_transport = NULL;
int orte_mgmt_conduit = -1;
int orte_coll_conduit = -1;
bool orte_no_vm = false;
/* ORTE OOB port flags */
bool orte_static_ports = false;
char *orte_oob_static_ports = NULL;
bool orte_standalone_operation = false;
bool orte_fwd_mpirun_port = false;
bool orte_keep_fqdn_hostnames = false;
bool orte_have_fqdn_allocation = false;
@ -648,7 +650,6 @@ static void orte_job_construct(orte_job_t* job)
job->num_local_procs = 0;
job->flags = 0;
ORTE_FLAG_SET(job, ORTE_JOB_FLAG_GANG_LAUNCHED);
ORTE_FLAG_SET(job, ORTE_JOB_FLAG_FORWARD_OUTPUT);
OBJ_CONSTRUCT(&job->attributes, opal_list_t);

Просмотреть файл

@ -456,11 +456,13 @@ ORTE_DECLSPEC extern char *orte_basename;
ORTE_DECLSPEC extern bool orte_coprocessors_detected;
ORTE_DECLSPEC extern opal_hash_table_t *orte_coprocessors;
ORTE_DECLSPEC extern char *orte_topo_signature;
ORTE_DECLSPEC extern bool orte_no_vm;
/* ORTE OOB port flags */
ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern char *orte_oob_static_ports;
ORTE_DECLSPEC extern bool orte_standalone_operation;
ORTE_DECLSPEC extern bool orte_fwd_mpirun_port;
/* nodename flags */
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;

Просмотреть файл

@ -689,6 +689,15 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_stat_history_size);
orte_no_vm = false;
id = mca_base_var_register ("orte", "orte", NULL, "no_vm",
"Do not build the VM at start to detect topologies",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_no_vm);
/* register a synonym for old name */
mca_base_var_register_synonym (id, "orte", "state", "novm", "select", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
orte_max_vm_size = -1;
(void) mca_base_var_register ("orte", "orte", NULL, "max_vm_size",
"Maximum size of virtual machine - used to subdivide allocation",
@ -773,5 +782,11 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_stack_trace_wait_timeout);
(void) mca_base_var_register ("orte", "orte", NULL, "fwd_mpirun_port",
"Forward the port used by mpirun so all daemons will use it",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_fwd_mpirun_port);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -78,7 +78,6 @@ typedef uint8_t orte_node_flags_t;
typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_FLAGS_T OPAL_UINT16
#define ORTE_JOB_FLAG_UPDATED 0x0001 // job has been updated and needs to be included in the pidmap message
#define ORTE_JOB_FLAG_GANG_LAUNCHED 0x0002 // MPI is allowed on this job - i.e., all members of the job were simultaneously launched
#define ORTE_JOB_FLAG_RESTARTED 0x0004 // some procs in this job are being restarted
#define ORTE_JOB_FLAG_ABORTED 0x0008 // did this job abort?
#define ORTE_JOB_FLAG_DEBUGGER_DAEMON 0x0010 // job is launching debugger daemons

Просмотреть файл

@ -177,23 +177,27 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
char *node;
char prefix[ORTE_MAX_NODE_PREFIX];
int i, j, n, len, startnum, nodenum, numdigits;
bool found, fullname;
bool found, fullname, test;
char *suffix, *sfx;
orte_regex_node_t *ndreg;
orte_regex_range_t *range, *rng, *idrng;
opal_list_t nodeids, nodenms, dvpids;
orte_regex_range_t *range, *rng, *slt, *tp, *flg;
opal_list_t nodenms, dvpids, slots, topos, flags;
opal_list_item_t *item, *itm2;
char **regexargs = NULL, *tmp, *tmp2;
orte_node_t *nptr;
int rc;
/* setup the list of results */
OBJ_CONSTRUCT(&nodeids, opal_list_t);
OBJ_CONSTRUCT(&nodenms, opal_list_t);
OBJ_CONSTRUCT(&dvpids, opal_list_t);
OBJ_CONSTRUCT(&slots, opal_list_t);
OBJ_CONSTRUCT(&topos, opal_list_t);
OBJ_CONSTRUCT(&flags, opal_list_t);
rng = NULL;
idrng = NULL;
slt = NULL;
tp = NULL;
flg = NULL;
for (n=0; n < orte_node_pool->size; n++) {
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
continue;
@ -202,26 +206,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
if (NULL == nptr->daemon) {
continue;
}
/* deal with the nodeids - these are just the index into
* the node array. We pass these to ensure coherence across
* the virtual machine */
if (NULL == idrng) {
/* just starting */
idrng = OBJ_NEW(orte_regex_range_t);
idrng->start = n;
idrng->cnt = 1;
opal_list_append(&nodeids, &idrng->super);
} else {
/* is this the next in line */
if (n == (idrng->start + idrng->cnt)) {
idrng->cnt++;
} else {
/* need to start another range */
idrng = OBJ_NEW(orte_regex_range_t);
idrng->start = n;
opal_list_append(&nodeids, &idrng->super);
}
}
/* deal with the daemon vpid - see if it is next in the
* current range */
if (NULL == rng) {
@ -236,9 +220,87 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
rng->cnt++;
} else {
/* need to start another range */
rng = OBJ_NEW(orte_regex_range_t);
rng->start = nptr->daemon->name.vpid;
opal_list_append(&dvpids, &rng->super);
rng = OBJ_NEW(orte_regex_range_t);
rng->start = nptr->daemon->name.vpid;
rng->cnt = 1;
opal_list_append(&dvpids, &rng->super);
}
}
/* check the #slots */
if (NULL == slt) {
/* just starting */
slt = OBJ_NEW(orte_regex_range_t);
slt->start = nptr->daemon->name.vpid;
slt->slots = nptr->slots;
slt->cnt = 1;
opal_list_append(&slots, &slt->super);
} else {
/* is this the next in line */
if (nptr->slots == slt->slots) {
slt->cnt++;
} else {
/* need to start another range */
slt = OBJ_NEW(orte_regex_range_t);
slt->start = nptr->daemon->name.vpid;
slt->slots = nptr->slots;
slt->cnt = 1;
opal_list_append(&slots, &slt->super);
}
}
/* check the topologies */
if (NULL == tp) {
if (NULL != nptr->topology) {
/* just starting */
tp = OBJ_NEW(orte_regex_range_t);
tp->start = nptr->daemon->name.vpid;
tp->t = nptr->topology;
tp->cnt = 1;
opal_list_append(&topos, &tp->super);
}
} else {
if (NULL != nptr->topology) {
/* is this the next in line */
if (tp->t == nptr->topology) {
tp->cnt++;
} else {
/* need to start another range */
tp = OBJ_NEW(orte_regex_range_t);
tp->start = nptr->daemon->name.vpid;
tp->t = nptr->topology;
tp->cnt = 1;
opal_list_append(&topos, &tp->super);
}
}
}
/* check the flags */
test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN);
if (NULL == flg) {
/* just starting */
flg = OBJ_NEW(orte_regex_range_t);
flg->start = nptr->daemon->name.vpid;
if (test) {
flg->slots = 1;
} else {
flg->slots = 0;
}
flg->cnt = 1;
opal_list_append(&flags, &flg->super);
} else {
/* is this the next in line */
if ((test && 1 == flg->slots) ||
(!test && 0 == flg->slots)) {
flg->cnt++;
} else {
/* need to start another range */
flg = OBJ_NEW(orte_regex_range_t);
flg->start = nptr->daemon->name.vpid;
if (test) {
flg->slots = 1;
} else {
flg->slots = 0;
}
flg->cnt = 1;
opal_list_append(&flags, &flg->super);
}
}
node = nptr->name;
@ -422,41 +484,7 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&dvpids);
return rc;
}
if (NULL != tmp) {
free(tmp);
}
/* do the same for the indices */
tmp = NULL;
while (NULL != (item = opal_list_remove_first(&nodeids))) {
rng = (orte_regex_range_t*)item;
if (1 < rng->cnt) {
if (NULL == tmp) {
asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1);
} else {
asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1);
free(tmp);
tmp = tmp2;
}
} else {
if (NULL == tmp) {
asprintf(&tmp, "%d", rng->start);
} else {
asprintf(&tmp2, "%s,%d", tmp, rng->start);
free(tmp);
tmp = tmp2;
}
}
OBJ_RELEASE(rng);
}
OPAL_LIST_DESTRUCT(&nodeids);
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&dvpids);
OPAL_LIST_DESTRUCT(&slots);
return rc;
}
if (NULL != tmp) {
@ -491,27 +519,171 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&dvpids);
OPAL_LIST_DESTRUCT(&slots);
return rc;
}
if (NULL != tmp) {
free(tmp);
}
/* do the same to pass #slots on each node */
tmp = NULL;
while (NULL != (item = opal_list_remove_first(&slots))) {
rng = (orte_regex_range_t*)item;
if (1 < rng->cnt) {
if (NULL == tmp) {
asprintf(&tmp, "%d-%d[%d]", rng->start, rng->start + rng->cnt - 1, rng->slots);
} else {
asprintf(&tmp2, "%s,%d-%d[%d]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots);
free(tmp);
tmp = tmp2;
}
} else {
if (NULL == tmp) {
asprintf(&tmp, "%d[%d]", rng->start, rng->slots);
} else {
asprintf(&tmp2, "%s,%d[%d]", tmp, rng->start, rng->slots);
free(tmp);
tmp = tmp2;
}
}
OBJ_RELEASE(rng);
}
OPAL_LIST_DESTRUCT(&slots);
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL != tmp) {
free(tmp);
}
/* do the same to pass the flags for each node */
tmp = NULL;
while (NULL != (item = opal_list_remove_first(&flags))) {
rng = (orte_regex_range_t*)item;
if (1 < rng->cnt) {
if (NULL == tmp) {
asprintf(&tmp, "%d-%d[%x]", rng->start, rng->start + rng->cnt - 1, rng->slots);
} else {
asprintf(&tmp2, "%s,%d-%d[%x]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots);
free(tmp);
tmp = tmp2;
}
} else {
if (NULL == tmp) {
asprintf(&tmp, "%d[%x]", rng->start, rng->slots);
} else {
asprintf(&tmp2, "%s,%d[%x]", tmp, rng->start, rng->slots);
free(tmp);
tmp = tmp2;
}
}
OBJ_RELEASE(rng);
}
OPAL_LIST_DESTRUCT(&flags);
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL != tmp) {
free(tmp);
}
/* handle the topologies - as the most common case by far
* is to have homogeneous topologies, we only send them
* if something is different */
tmp = NULL;
if (1 < opal_list_get_size(&topos)) {
opal_buffer_t bucket, *bptr;
OBJ_CONSTRUCT(&bucket, opal_buffer_t);
while (NULL != (item = opal_list_remove_first(&topos))) {
rng = (orte_regex_range_t*)item;
if (1 < rng->cnt) {
if (NULL == tmp) {
asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1);
} else {
asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1);
free(tmp);
tmp = tmp2;
}
} else {
if (NULL == tmp) {
asprintf(&tmp, "%d", rng->start);
} else {
asprintf(&tmp2, "%s,%d", tmp, rng->start);
free(tmp);
tmp = tmp2;
}
}
/* pack this topology string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(rng);
OPAL_LIST_DESTRUCT(&topos);
OBJ_DESTRUCT(&bucket);
free(tmp);
return rc;
}
/* pack the topology itself */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->topo, 1, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(rng);
OPAL_LIST_DESTRUCT(&topos);
OBJ_DESTRUCT(&bucket);
free(tmp);
return rc;
}
OBJ_RELEASE(rng);
}
OPAL_LIST_DESTRUCT(&topos);
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&bucket);
free(tmp);
return rc;
}
free(tmp);
/* now pack the topologies */
bptr = &bucket;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &bptr, 1, OPAL_BUFFER))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&bucket);
return rc;
}
OBJ_DESTRUCT(&bucket);
} else {
/* need to pack the NULL just to terminate the region */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
return ORTE_SUCCESS;
}
/* decode a nodemap for a daemon */
int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
{
int m, n, rc;
int n, nn, rc;
orte_node_t *node;
size_t k, num_nodes, endpt;
size_t k, endpt, start;
orte_job_t *daemons;
orte_proc_t *dptr;
char **nodes, *indices, *dvpids;
char **nodes=NULL, *dvpids=NULL, *slots=NULL, *topos=NULL, *flags=NULL;
char *ndnames, *rmndr, **tmp;
int *nodeids, *dids;
opal_list_t dids, slts, flgs;;
opal_buffer_t *bptr=NULL;
orte_topology_t *t;
orte_regex_range_t *rng, *drng, *srng, *frng;
/* unpack the node regex */
n = 1;
@ -524,141 +696,226 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
return ORTE_SUCCESS;
}
/* unpack the index regex */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &indices, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(ndnames);
return rc;
}
/* this is not allowed to be NULL */
if (NULL == indices) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(ndnames);
return ORTE_ERR_BAD_PARAM;
}
OBJ_CONSTRUCT(&dids, opal_list_t);
OBJ_CONSTRUCT(&slts, opal_list_t);
OBJ_CONSTRUCT(&flgs, opal_list_t);
/* unpack the daemon vpid regex */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &dvpids, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
free(ndnames);
free(indices);
return rc;
goto cleanup;
}
/* this is not allowed to be NULL */
if (NULL == dvpids) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(ndnames);
free(indices);
return ORTE_ERR_BAD_PARAM;
rc = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
/* unpack the slots regex */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &slots, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* this is not allowed to be NULL */
if (NULL == slots) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
/* unpack the flags regex */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flags, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* this is not allowed to be NULL */
if (NULL == flags) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
/* unpack the topos regex - this may not have been
* provided (e.g., for a homogeneous machine) */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &topos, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != topos) {
/* need to unpack the topologies */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &n, OPAL_BUFFER))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* if we are the HNP, then we just discard these strings as we already
* have a complete picture - but we needed to unpack them in order to
* maintain sync in the unpacking order */
if (ORTE_PROC_IS_HNP) {
free(ndnames);
free(indices);
free(dvpids);
return ORTE_SUCCESS;
rc = ORTE_SUCCESS;
goto cleanup;
}
/* decompress the regex */
nodes = NULL;
if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(ndnames, &nodes))) {
free(ndnames);
free(indices);
free(dvpids);
return rc;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
free(ndnames);
if (NULL == nodes) {
/* should not happen */
free(indices);
free(dvpids);
return ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
/* decompress the index ranges */
num_nodes = opal_argv_count(nodes);
nodeids = (int*)malloc(num_nodes * sizeof(int));
tmp = opal_argv_split(indices, ',');
k = 0;
for (n=0; NULL != tmp[n]; n++) {
/* convert the number - since it might be a range,
* save the remainder pointer */
nodeids[k++] = strtoul(tmp[n], &rmndr, 10);
if (NULL != rmndr) {
/* it must be a range - find the endpoint */
++rmndr;
m = nodeids[k-1] + 1;
endpt = strtoul(rmndr, NULL, 10);
while (k <= endpt && k < num_nodes) {
nodeids[k++] = m++;
}
--k; // step back to compensate for later increment
}
++k;
}
opal_argv_free(tmp);
free(indices);
/* decompress the vpids */
dids = (int*)malloc(num_nodes * sizeof(int));
tmp = opal_argv_split(dvpids, ',');
k = 0;
for (n=0; NULL != tmp[n]; n++) {
rng = OBJ_NEW(orte_regex_range_t);
opal_list_append(&dids, &rng->super);
/* convert the number - since it might be a range,
* save the remainder pointer */
dids[k++] = strtoul(tmp[n], &rmndr, 10);
if (NULL != rmndr) {
rng->start = strtoul(tmp[n], &rmndr, 10);
if (NULL == rmndr || 0 == strlen(rmndr)) {
rng->endpt = rng->start;
} else {
/* it must be a range - find the endpoint */
++rmndr;
endpt = strtoul(rmndr, NULL, 10);
m = dids[k-1] + 1;
while (k <= endpt && k < num_nodes) {
dids[k++] = m++;
}
--k; // step back to compensate for later increment
rng->endpt = strtoul(rmndr, NULL, 10);
}
++k;
}
opal_argv_free(tmp);
free(dvpids);
/* decompress the slots */
tmp = opal_argv_split(slots, ',');
for (n=0; NULL != tmp[n]; n++) {
rng = OBJ_NEW(orte_regex_range_t);
opal_list_append(&slts, &rng->super);
/* find the '[' as that delimits the value */
rmndr = strchr(tmp[n], '[');
if (NULL == rmndr) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
opal_argv_free(tmp);
goto cleanup;
}
*rmndr = '\0';
++rmndr;
/* convert that number as this is the number of
* slots for this range */
rng->slots = strtoul(rmndr, NULL, 10);
/* convert the starting pt - since it might be a range,
* save the remainder pointer */
rng->start = strtoul(tmp[n], &rmndr, 10);
if (NULL == rmndr || 0 == strlen(rmndr)) {
rng->endpt = rng->start;
} else {
/* it must be a range - find the endpoint */
++rmndr;
rng->endpt = strtoul(rmndr, NULL, 10);
}
}
opal_argv_free(tmp);
/* decompress the flags */
tmp = opal_argv_split(flags, ',');
for (n=0; NULL != tmp[n]; n++) {
rng = OBJ_NEW(orte_regex_range_t);
opal_list_append(&dids, &rng->super);
/* find the '[' as that delimits the value */
rmndr = strchr(tmp[n], '[');
if (NULL == rmndr) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
opal_argv_free(tmp);
rc = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
*rmndr = '\0';
++rmndr;
/* check the value - it is just one character */
if ('1' == *rmndr) {
rng->slots = 1;
} else {
rng->slots = 0;
}
/* convert the starting pt - since it might be a range,
* save the remainder pointer */
rng->start = strtoul(tmp[n], &rmndr, 10);
if (NULL == rmndr || 0 == strlen(rmndr)) {
rng->endpt = rng->start;
} else {
/* it must be a range - find the endpoint */
++rmndr;
rng->endpt = strtoul(rmndr, NULL, 10);
}
}
opal_argv_free(tmp);
free(flags);
/* get the daemon job object */
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
/* since this is a complete picture of all the nodes, reset the
* counters and regenerate them */
orte_process_info.num_daemons = 0;
orte_process_info.num_nodes = 0;
daemons->num_procs = 0;
/* update the node array */
drng = (orte_regex_range_t*)opal_list_get_first(&dids);
srng = (orte_regex_range_t*)opal_list_get_first(&slts);
frng = (orte_regex_range_t*)opal_list_get_first(&flgs);
for (n=0; NULL != nodes[n]; n++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeids[n]))) {
/* the daemon vpids for these nodes will be in the dids array, so
* use those to lookup the nodes */
nn = drng->start + n;
if (nn == drng->endpt) {
drng = (orte_regex_range_t*)opal_list_get_next(&drng->super);
}
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nn))) {
node = OBJ_NEW(orte_node_t);
node->name = nodes[n];
node->index = nodeids[n];
opal_pointer_array_set_item(orte_node_pool, node->index, node);
} else if (NULL != node->daemon) {
OBJ_RELEASE(node->daemon);
node->daemon = NULL;
node->index = nn;
opal_pointer_array_set_item(orte_node_pool, nn, node);
}
/* set the number of slots */
node->slots = srng->slots;
if (srng->endpt == nn) {
srng = (orte_regex_range_t*)opal_list_get_next(&srng->super);
}
/* set the flags */
if (0 == frng->slots) {
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
} else {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
if (frng->endpt == nn) {
frng = (orte_regex_range_t*)opal_list_get_next(&frng->super);
}
++orte_process_info.num_nodes;
if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, dids[n]))) {
/* if this is me, just ignore the rest as we are all setup */
if (nn == (int)ORTE_PROC_MY_NAME->vpid) {
continue;
}
if (NULL != node->daemon) {
OBJ_RELEASE(node->daemon);
node->daemon = NULL;
}
if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, nn))) {
/* create a daemon object for this node */
dptr = OBJ_NEW(orte_proc_t);
dptr->name.jobid = ORTE_PROC_MY_NAME->jobid;
dptr->name.vpid = dids[n];
dptr->name.vpid = nn;
ORTE_FLAG_SET(dptr, ORTE_PROC_FLAG_ALIVE); // assume the daemon is alive until discovered otherwise
opal_pointer_array_set_item(daemons->procs, dids[n], dptr);
opal_pointer_array_set_item(daemons->procs, nn, dptr);
++daemons->num_procs;
} else if (NULL != dptr->node) {
OBJ_RELEASE(dptr->node);
dptr->node = NULL;
}
++daemons->num_procs;
/* link the node to the daemon */
OBJ_RETAIN(dptr);
node->daemon = dptr;
@ -670,8 +927,83 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
* all the node names themselves. Instead, we just free the
* array of string pointers, leaving the strings alone */
free(nodes);
free(nodeids);
free(dids);
/* if no topology info was passed, then everyone shares our topology */
if (NULL == bptr) {
orte_topology_t *t;
/* our topology is first in the array */
t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
for (n=0; n < orte_node_pool->size; n++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
if (NULL == node->topology) {
OBJ_RETAIN(t);
node->topology = t;
}
}
}
} else {
char *sig;
hwloc_topology_t topo;
/* decompress the topology regex */
tmp = opal_argv_split(topos, ',');
/* there must be a topology definition for each range */
for (nn=0; NULL != tmp[nn]; nn++) {
/* unpack the signature */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &sig, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
opal_argv_free(tmp);
OBJ_RELEASE(bptr);
goto cleanup;
}
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
opal_argv_free(tmp);
OBJ_RELEASE(bptr);
free(sig);
goto cleanup;
}
/* see if we already have this topology - could be an update */
for (n=0; n < orte_node_topologies->size; n++) {
if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) {
continue;
}
if (0 == strcmp(t->sig, sig)) {
/* found a match */
free(sig);
opal_hwloc_base_free_topology(topo);
sig = NULL;
break;
}
}
if (NULL != sig) {
/* new topology - record it */
t = OBJ_NEW(orte_topology_t);
t->sig = sig;
t->topo = topo;
}
/* point each of the nodes in the regex to this topology */
start = strtoul(tmp[nn], &rmndr, 10);
if (NULL != rmndr) {
/* it must be a range - find the endpoint */
++rmndr;
endpt = strtoul(rmndr, NULL, 10);
} else {
endpt = start;
}
for (k=start; k <= endpt; k++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) {
if (NULL == node->topology) {
OBJ_RETAIN(t);
node->topology = t;
}
}
}
}
OBJ_RELEASE(bptr);
opal_argv_free(tmp);
}
/* unpdate num procs */
if (orte_process_info.num_procs != daemons->num_procs) {
@ -700,5 +1032,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
}
}
return ORTE_SUCCESS;
cleanup:
OPAL_LIST_DESTRUCT(&dids);
OPAL_LIST_DESTRUCT(&slts);
OPAL_LIST_DESTRUCT(&flgs);
return rc;
}

Просмотреть файл

@ -51,6 +51,10 @@ ORTE_DECLSPEC int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer);
ORTE_DECLSPEC int orte_util_build_daemon_nidmap(char **nodes);
ORTE_DECLSPEC int orte_util_encode_topologies(opal_buffer_t *buffer);
ORTE_DECLSPEC int orte_util_decode_topologies(opal_buffer_t *buffer);
END_C_DECLS
#endif

Просмотреть файл

@ -428,7 +428,6 @@ int orte_regex_extract_node_names(char *regexp, char ***names)
return ret;
}
/*
* Parse one or more ranges in a set
*
@ -589,148 +588,7 @@ static int regex_parse_node_range(char *base, char *range, int num_digits, char
return ORTE_SUCCESS;
}
/* Compute the #procs on each node given a regex of form
* "#procs(x#nodes),#procs(x#nodes). In other words, an
* expression of "4(x30) will be interpreted to mean four
* procs on each of the next 30 nodes.
*/
int orte_regex_extract_ppn(int num_nodes, char *regexp, int **ppn)
{
int *tmp;
char *begptr, *endptr, *orig;
int i, j, count, reps;
/* init null answer */
*ppn = NULL;
tmp = (int *) malloc(sizeof(int) * num_nodes);
if (NULL == tmp) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
memset(tmp, 0, sizeof(int) * num_nodes);
orig = begptr = strdup(regexp);
if (NULL == begptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(tmp);
return ORTE_ERR_OUT_OF_RESOURCE;
}
j = 0;
while (begptr) {
count = strtol(begptr, &endptr, 10);
if ((endptr[0] == '(') && (endptr[1] == 'x')) {
reps = strtol((endptr+2), &endptr, 10);
if (endptr[0] == ')') {
endptr++;
}
} else {
reps = 1;
}
for (i = 0; i < reps && j < num_nodes; i++) {
tmp[j++] = count;
}
if (*endptr == ',') {
begptr = endptr + 1;
} else if (*endptr == '\0' || j >= num_nodes) {
break;
} else {
orte_show_help("help-regex.txt", "regex:bad-ppn", true, regexp);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(tmp);
free(orig);
return ORTE_ERR_BAD_PARAM;
}
}
free(orig);
/* return values */
*ppn = tmp;
return ORTE_SUCCESS;
}
int orte_regex_extract_name_range(char *regexp, char ***names)
{
char *tmp, *b, *b2, **rngs, *t, *pre, *post;
int i;
char c;
/* protect input */
tmp = strdup(regexp);
/* look for bracket */
if (NULL == (b = strchr(tmp, '['))) {
/* just one value */
opal_argv_append_nosize(names, regexp);
free(tmp);
return ORTE_SUCCESS;
}
if (b == tmp) {
/* bracket at very beginning */
pre = NULL;
} else {
*b = '\0';
pre = tmp;
}
/* step over the bracket */
b++;
/* look for closing bracket */
if (NULL == (b2 = strrchr(tmp, ']'))) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
free(tmp);
return ORTE_ERR_BAD_PARAM;
}
*b2 = '\0';
b2++;
if ('\0' == *b2) {
/* bracket was at end */
post = NULL;
} else {
post = b2;
}
/* split on commas */
rngs = opal_argv_split(b, ',');
for (i=0; NULL != rngs[i]; i++) {
/* look for a range */
if (NULL == strchr(rngs[i], '-')) {
/* just one value */
if (NULL == pre && NULL == post) {
t = strdup(rngs[i]);
} else if (NULL == pre) {
asprintf(&t, "%s%s", rngs[i], post);
} else if (NULL == post) {
asprintf(&t, "%s%s", pre, rngs[i]);
} else {
asprintf(&t, "%s%s%s", pre, rngs[i], post);
}
opal_argv_append_nosize(names, t);
free(t);
} else {
/* has to be <char>-<char> */
for (c=rngs[i][0]; c <= rngs[i][2]; c++) {
if (NULL == pre && NULL == post) {
asprintf(&t, "%c", c);
} else if (NULL == pre) {
asprintf(&t, "%c%s", c, post);
} else if (NULL == post) {
asprintf(&t, "%s%c", pre, c);
} else {
asprintf(&t, "%s%c%s", pre, c, post);
}
opal_argv_append_nosize(names, t);
free(t);
}
}
}
opal_argv_free(rngs);
free(tmp);
return ORTE_SUCCESS;
}
/***** CLASS INSTANTIATIONS ****/
static void range_construct(orte_regex_range_t *ptr)
{
@ -768,4 +626,3 @@ OBJ_CLASS_INSTANCE(orte_regex_node_t,
opal_list_item_t,
orte_regex_node_construct,
orte_regex_node_destruct);

Просмотреть файл

@ -37,7 +37,10 @@ BEGIN_C_DECLS
typedef struct {
opal_list_item_t super;
int start;
int endpt;
int cnt;
int slots;
orte_topology_t *t;
} orte_regex_range_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_regex_range_t);
@ -58,9 +61,5 @@ ORTE_DECLSPEC int orte_regex_create(char *nodes, char **regexp);
ORTE_DECLSPEC int orte_regex_extract_node_names(char *regexp, char ***names);
ORTE_DECLSPEC int orte_regex_extract_ppn(int num_nodes, char *regexp, int **ppn);
ORTE_DECLSPEC int orte_regex_extract_name_range(char *regexp, char ***names);
END_C_DECLS
#endif