Correct mapping errors
Since we now support the dynamic addition of hosts to the orte_node_pool, there is no longer any reason to require advanced specification of all possible nodes. Instead, use a precedence method to initially allocate only those hosts that were specified in the cmd line: * rankfile, if given, as that will specify the nodes * -host, aggregated across all app_contexts * -hostfile, aggregated across all app_contexts * default hostfile * assign local node Fix slots_inuse accounting so that the nodes are correctly reset upon error termination - e.g., when oversubscribed without permission. Ensure we accurately track the user's specified desires for oversubscribe and no-use-local when dynamically spawning jobs. Signed-off-by: Ralph Castain <rhc@open-mpi.org> (cherry picked from commit c9b3e68ce596a68a2ed2fbf73f211b3334b0a6a8)
Этот коммит содержится в:
родитель
4f13dbc15e
Коммит
cb221b6f6f
@ -209,9 +209,9 @@ static void job_errors(int fd, short args, void *cbdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
ORTE_NAME_PRINT(&jdata->originator)));
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&jdata->originator, answer,
|
||||
ORTE_RML_TAG_LAUNCH_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
&jdata->originator, answer,
|
||||
ORTE_RML_TAG_LAUNCH_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
}
|
||||
|
@ -235,43 +235,9 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
"%s ras:base:allocate nothing found in module - proceeding to hostfile",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* nothing was found, or no active module was alive. Our next
|
||||
* option is to look for a hostfile and assign our global
|
||||
* pool from there.
|
||||
*
|
||||
* Individual hostfile names, if given, are included
|
||||
* in the app_contexts for this job. We therefore need to
|
||||
* retrieve the app_contexts for the job, and then cycle
|
||||
* through them to see if anything is there. The parser will
|
||||
* add the nodes found in each hostfile to our list - i.e.,
|
||||
* the resulting list contains the UNION of all nodes specified
|
||||
* in hostfiles from across all app_contexts
|
||||
*
|
||||
* We then continue to add any hosts provided by dash-host and
|
||||
* the default hostfile, if we have it. We will then filter out
|
||||
* all the non-desired hosts (i.e., those not specified by
|
||||
* -host and/or -hostfile) when we start the mapping process
|
||||
*
|
||||
* Note that any relative node syntax found in the hostfiles will
|
||||
* generate an error in this scenario, so only non-relative syntax
|
||||
* can be present
|
||||
*/
|
||||
if (NULL != orte_default_hostfile) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate parsing default hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_default_hostfile));
|
||||
|
||||
/* a default hostfile was provided - parse it */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
|
||||
orte_default_hostfile))) {
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* nothing was found, or no active module was alive. We first see
|
||||
* if we were given a rankfile - if so, use it as the hosts will be
|
||||
* taken from the mapping */
|
||||
if (NULL != orte_rankfile) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate parsing rankfile %s",
|
||||
@ -287,85 +253,8 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (i=0; i < jdata->apps->size; i++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||
continue;
|
||||
}
|
||||
if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate adding hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
|
||||
|
||||
/* hostfile was specified - parse it and add it to the list */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) {
|
||||
free(hosts);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
/* set an error event */
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
free(hosts);
|
||||
} else if (!orte_soft_locations &&
|
||||
orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
|
||||
/* if we are using soft locations, then any dash-host would
|
||||
* just include desired nodes and not required. We don't want
|
||||
* to pick them up here as this would mean the request was
|
||||
* always satisfied - instead, we want to allow the request
|
||||
* to fail later on and use whatever nodes are actually
|
||||
* available
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate adding dash_hosts",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
|
||||
free(hosts);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
free(hosts);
|
||||
}
|
||||
}
|
||||
|
||||
/* if something was found in the hostfile(s), we use that as our global
|
||||
* pool - set it and we are done
|
||||
*/
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
/* store the results in the global resource pool - this removes the
|
||||
* list items
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
goto DISPLAY;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate nothing found in hostfiles - checking for rankfile",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* Our next option is to look for a rankfile - if one was provided, we
|
||||
* will use its nodes to create a default allocation pool
|
||||
*/
|
||||
if (NULL != orte_rankfile) {
|
||||
/* check the rankfile for node information */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
|
||||
orte_rankfile))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return ;
|
||||
}
|
||||
}
|
||||
/* if something was found in rankfile, we use that as our global
|
||||
/* if something was found in the rankfile, we use that as our global
|
||||
* pool - set it and we are done
|
||||
*/
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
@ -387,9 +276,146 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
goto DISPLAY;
|
||||
}
|
||||
|
||||
/* if a dash-host has been provided, aggregate across all the
|
||||
* app_contexts. Any hosts the user wants to add via comm_spawn
|
||||
* can be done so using the add_host option */
|
||||
for (i=0; i < jdata->apps->size; i++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||
continue;
|
||||
}
|
||||
if (!orte_soft_locations &&
|
||||
orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
|
||||
/* if we are using soft locations, then any dash-host would
|
||||
* just include desired nodes and not required. We don't want
|
||||
* to pick them up here as this would mean the request was
|
||||
* always satisfied - instead, we want to allow the request
|
||||
* to fail later on and use whatever nodes are actually
|
||||
* available
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate adding dash_hosts",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
|
||||
free(hosts);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
free(hosts);
|
||||
}
|
||||
}
|
||||
|
||||
/* if something was found in the dash-host(s), we use that as our global
|
||||
* pool - set it and we are done
|
||||
*/
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
/* store the results in the global resource pool - this removes the
|
||||
* list items
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
goto DISPLAY;
|
||||
}
|
||||
|
||||
/* Our next option is to look for a hostfile and assign our global
|
||||
* pool from there.
|
||||
*
|
||||
* Individual hostfile names, if given, are included
|
||||
* in the app_contexts for this job. We therefore need to
|
||||
* retrieve the app_contexts for the job, and then cycle
|
||||
* through them to see if anything is there. The parser will
|
||||
* add the nodes found in each hostfile to our list - i.e.,
|
||||
* the resulting list contains the UNION of all nodes specified
|
||||
* in hostfiles from across all app_contexts
|
||||
*
|
||||
* Note that any relative node syntax found in the hostfiles will
|
||||
* generate an error in this scenario, so only non-relative syntax
|
||||
* can be present
|
||||
*/
|
||||
for (i=0; i < jdata->apps->size; i++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||
continue;
|
||||
}
|
||||
if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate adding hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
|
||||
|
||||
/* hostfile was specified - parse it and add it to the list */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) {
|
||||
free(hosts);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
/* set an error event */
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
free(hosts);
|
||||
}
|
||||
}
|
||||
|
||||
/* if something was found in the hostfiles(s), we use that as our global
|
||||
* pool - set it and we are done
|
||||
*/
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
/* store the results in the global resource pool - this removes the
|
||||
* list items
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
goto DISPLAY;
|
||||
}
|
||||
|
||||
/* if nothing was found so far, then look for a default hostfile */
|
||||
if (NULL != orte_default_hostfile) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate parsing default hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_default_hostfile));
|
||||
|
||||
/* a default hostfile was provided - parse it */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
|
||||
orte_default_hostfile))) {
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* if something was found in the default hostfile, we use that as our global
|
||||
* pool - set it and we are done
|
||||
*/
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
/* store the results in the global resource pool - this removes the
|
||||
* list items
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
goto DISPLAY;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:allocate nothing found in rankfile - inserting current node",
|
||||
"%s ras:base:allocate nothing found in hostfiles - inserting current node",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
addlocal:
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -190,19 +190,23 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
/* check for oversubscribe directives */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
} else {
|
||||
/* pass along the directive */
|
||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
} else {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
/* pass along the directive */
|
||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
} else {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* check for no-use-local directive */
|
||||
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||
}
|
||||
}
|
||||
/* ditto for rank policy */
|
||||
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
||||
|
@ -190,49 +190,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
return rc;
|
||||
}
|
||||
free(hosts);
|
||||
} else if (NULL != orte_rankfile) {
|
||||
/* use the rankfile, if provided */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s using rankfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_rankfile));
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
|
||||
orte_rankfile))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 == opal_list_get_size(&nodes)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s nothing found in given rankfile",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
} else if (NULL != orte_default_hostfile) {
|
||||
/* fall back to the default hostfile, if provided */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s using default hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_default_hostfile));
|
||||
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
|
||||
orte_default_hostfile))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* this is a special case - we always install a default
|
||||
* hostfile, but it is empty. If the user didn't remove it
|
||||
* or put something into it, then we will have pursued that
|
||||
* option and found nothing. This isn't an error, we just need
|
||||
* to add all the known nodes
|
||||
*/
|
||||
if (0 == opal_list_get_size(&nodes)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s nothing in default hostfile - using known nodes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto addknown;
|
||||
}
|
||||
} else {
|
||||
/* if nothing else was available, then use all known nodes, which
|
||||
/* if nothing else was specified by the app, then use all known nodes, which
|
||||
* will include ourselves
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
@ -585,9 +544,7 @@ orte_proc_t* orte_rmaps_base_setup_proc(orte_job_t *jdata,
|
||||
* available slots - otherwise, it does */
|
||||
if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
|
||||
node->num_procs++;
|
||||
if (node->slots_inuse < node->slots) {
|
||||
++node->slots_inuse;
|
||||
}
|
||||
++node->slots_inuse;
|
||||
}
|
||||
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -357,7 +357,7 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
* as the #slots were specifically given, either by the host RM or
|
||||
* via hostfile/dash-host */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -89,7 +89,9 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t);
|
||||
#define ORTE_MAPPING_SUBSCRIBE_GIVEN 0x0400
|
||||
#define ORTE_MAPPING_SPAN 0x0800
|
||||
/* an error flag */
|
||||
#define ORTE_MAPPING_CONFLICTED 0x2000
|
||||
#define ORTE_MAPPING_CONFLICTED 0x1000
|
||||
/* directives given */
|
||||
#define ORTE_MAPPING_LOCAL_GIVEN 0x2000
|
||||
#define ORTE_MAPPING_GIVEN 0x4000
|
||||
/* mapping a debugger job */
|
||||
#define ORTE_MAPPING_DEBUGGER 0x8000
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -184,7 +184,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
* as the #slots were specifically given, either by the host RM or
|
||||
* via hostfile/dash-host */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
@ -366,7 +366,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
* as the #slots were specifically given, either by the host RM or
|
||||
* via hostfile/dash-host */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
@ -584,7 +584,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
* as the #slots were specifically given, either by the host RM or
|
||||
* via hostfile/dash-host */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app, orte_process_info.nodename);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
@ -418,7 +418,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
/* if we weren't given a directive either way, then we will error out
|
||||
* as the #slots were specifically given, either by the host RM or
|
||||
* via hostfile/dash-host */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||
true, app->num_procs, app->app);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
@ -333,8 +333,13 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
|
||||
/*** NO USE LOCAL ***/
|
||||
} else if (0 == strcmp(info->key, OPAL_PMIX_NO_PROCS_ON_HEAD)) {
|
||||
OPAL_CHECK_BOOL(info, flag);
|
||||
orte_set_attribute(&jdata->attributes, ORTE_MAPPING_NO_USE_LOCAL,
|
||||
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
|
||||
if (flag) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||
} else {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||
}
|
||||
/* mark that the user specified it */
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_LOCAL_GIVEN);
|
||||
|
||||
/*** OVERSUBSCRIBE ***/
|
||||
} else if (0 == strcmp(info->key, OPAL_PMIX_NO_OVERSUBSCRIBE)) {
|
||||
@ -344,6 +349,8 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
|
||||
} else {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
}
|
||||
/* mark that the user specified it */
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
|
||||
/*** REPORT BINDINGS ***/
|
||||
} else if (0 == strcmp(info->key, OPAL_PMIX_REPORT_BINDINGS)) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user