Коммит
3b29b78a19
@ -108,7 +108,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
|
||||
true, jdata->num_apps, NULL);
|
||||
rc = ORTE_ERR_SILENT;
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
|
||||
@ -119,7 +118,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
|
||||
jdata->map->mapping, initial_map, false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
/* flag that all subsequent requests should not reset the node->mapped flag */
|
||||
@ -238,12 +236,10 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
true, "mapping",
|
||||
orte_rmaps_base_print_mapping(jdata->map->mapping));
|
||||
rc = ORTE_ERR_SILENT;
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
|
||||
@ -253,7 +249,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -275,7 +270,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__);
|
||||
while(NULL != (item = opal_list_remove_first(&node_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
@ -287,4 +281,3 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
|
||||
orte_rmaps_rr_map
|
||||
};
|
||||
|
||||
|
@ -493,7 +493,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
@ -511,7 +510,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
if (NULL == node->topology || NULL == node->topology->topo) {
|
||||
orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
|
||||
true, node->name);
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
start = 0;
|
||||
@ -550,7 +548,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
/* add this node to the map, if reqd */
|
||||
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
|
||||
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
ORTE_ERROR_LOG(idx);
|
||||
return idx;
|
||||
}
|
||||
@ -569,18 +566,15 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
/* get the hwloc object */
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true,
|
||||
orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj),
|
||||
orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
nprocs_mapped++;
|
||||
@ -607,14 +601,12 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_SILENT;
|
||||
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||
/* if we were explicitly told not to oversubscribe, then don't */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
@ -629,7 +621,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
|
||||
if (nprocs_mapped < app->num_procs) {
|
||||
/* usually means there were no objects of the requested type */
|
||||
opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
*
|
||||
@ -48,6 +48,7 @@
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/util/listener.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/orted/pmix/pmix_server.h"
|
||||
@ -216,6 +217,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* let the pmix server register params */
|
||||
pmix_server_register_params();
|
||||
orte_util_nidmap_init();
|
||||
OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params");
|
||||
}
|
||||
|
||||
|
@ -74,6 +74,27 @@
|
||||
|
||||
#include "orte/util/nidmap.h"
|
||||
|
||||
static int orte_nidmap_verbosity = -1;
|
||||
static int orte_nidmap_output = -1;
|
||||
|
||||
void orte_util_nidmap_init(void)
|
||||
{
|
||||
orte_nidmap_verbosity = -1;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "nidmap_verbose",
|
||||
"Verbosity level for ORTE debug messages in the nidmap utilities",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_nidmap_verbosity);
|
||||
|
||||
/* set default output */
|
||||
orte_nidmap_output = opal_output_open(NULL);
|
||||
|
||||
/* open up the verbose output for debugging */
|
||||
if (0 < orte_nidmap_verbosity) {
|
||||
opal_output_set_verbosity(orte_nidmap_output, orte_nidmap_verbosity);
|
||||
}
|
||||
}
|
||||
|
||||
int orte_util_build_daemon_nidmap(void)
|
||||
{
|
||||
int i;
|
||||
@ -585,6 +606,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
|
||||
OBJ_RELEASE(rng);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&slots);
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s SLOT ASSIGNMENTS: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
/* pack the string */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -610,6 +634,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
|
||||
OPAL_LIST_DESTRUCT(&flags);
|
||||
|
||||
/* pack the string */
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s FLAG ASSIGNMENTS: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -652,6 +679,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
|
||||
}
|
||||
if (NULL == rng->t) {
|
||||
/* need to account for NULL topology */
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s PACKING NULL TOPOLOGY",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
tmp2 = NULL;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -662,6 +692,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s PACKING TOPOLOGY: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rng->t->sig);
|
||||
/* pack this topology string */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -685,6 +718,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&topos);
|
||||
/* pack the string */
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s TOPOLOGY ASSIGNMENTS: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&bucket);
|
||||
@ -1011,6 +1047,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
|
||||
if (NULL == bptr) {
|
||||
/* our topology is first in the array */
|
||||
t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s ASSIGNING ALL TOPOLOGIES TO: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), t2->sig);
|
||||
for (n=0; n < orte_node_pool->size; n++) {
|
||||
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
|
||||
if (NULL == node->topology) {
|
||||
@ -1077,6 +1116,10 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) {
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(1, orte_nidmap_output,
|
||||
"%s ASSIGNING NODE %s WITH TOPO: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name, t2->sig);
|
||||
if (NULL == node->topology) {
|
||||
OBJ_RETAIN(t2);
|
||||
node->topology = t2;
|
||||
|
@ -44,6 +44,8 @@ BEGIN_C_DECLS
|
||||
#define ORTE_NON_CONTIG_NODE_CMD 0x02
|
||||
|
||||
|
||||
ORTE_DECLSPEC void orte_util_nidmap_init(void);
|
||||
|
||||
ORTE_DECLSPEC int orte_util_nidmap_create(char **regex);
|
||||
ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex);
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user