Bring the modified/updated pernode and npernode behaviors over from the openrte repository. This change enables npernode to pay attention to the total #procs to be launched, and cleans up the bynode vs. byslot mapping directives when in pernode and npernode modes.
This commit was SVN r13191.
Этот коммит содержится в:
родитель
4ef4cbb5ad
Коммит
455e4ada9a
@ -55,6 +55,8 @@ extern "C" {
|
||||
opal_list_t rmaps_opened;
|
||||
/** Sorted list of available components (highest priority first) */
|
||||
opal_list_t rmaps_available;
|
||||
/* user specified mapping policy? */
|
||||
bool user_specified_policy;
|
||||
/* map by node or not */
|
||||
bool bynode;
|
||||
/** whether or not we allow oversubscription of nodes */
|
||||
|
@ -113,24 +113,26 @@ int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *incoming_attributes)
|
||||
}
|
||||
|
||||
|
||||
/* check the mapping policy */
|
||||
if (orte_rmaps_base.bynode) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
|
||||
ORTE_STRING, "bynode",
|
||||
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
/* check the mapping policy - set it IFF the user specified it in the environment */
|
||||
if (orte_rmaps_base.user_specified_policy) {
|
||||
if (orte_rmaps_base.bynode) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
|
||||
ORTE_STRING, "bynode",
|
||||
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
} else {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
|
||||
ORTE_STRING, "byslot",
|
||||
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
|
||||
ORTE_STRING, "byslot",
|
||||
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
|
||||
/* check pernode - add it if it was set by the environment. Note that this
|
||||
/* check pernode - add it if it was set by the environment. Note that this
|
||||
* attribute only cares if it exists - its value is irrelevant and hence
|
||||
* not provided
|
||||
*/
|
||||
|
@ -87,13 +87,18 @@ int orte_rmaps_base_open(void)
|
||||
|
||||
param = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||
"Scheduling Policy for RMAPS. [slot | node]",
|
||||
false, false, "slot", &policy);
|
||||
false, false, "unspec", &policy);
|
||||
|
||||
opal_output(orte_rmaps_base.rmaps_output, "Scheduling policy: %s", policy);
|
||||
|
||||
if (0 == strcmp(policy, "node")) {
|
||||
orte_rmaps_base.bynode = true;
|
||||
if (0 == strcmp(policy, "unspec")) {
|
||||
orte_rmaps_base.user_specified_policy = false;
|
||||
orte_rmaps_base.bynode = false; /* default to byslot */
|
||||
} else if (0 == strcmp(policy, "node")) {
|
||||
orte_rmaps_base.user_specified_policy = true;
|
||||
orte_rmaps_base.bynode = true;
|
||||
} else {
|
||||
orte_rmaps_base.user_specified_policy = true;
|
||||
orte_rmaps_base.bynode = false;
|
||||
}
|
||||
|
||||
|
@ -36,3 +36,12 @@ There are not enough nodes in your allocation to satisfy your request to launch
|
||||
%d processes on a per-node basis - only %d nodes were available.
|
||||
|
||||
Either request fewer processes, or obtain a larger allocation.
|
||||
[orte-rmaps-rr:n-per-node-and-too-many-procs]
|
||||
There are not enough nodes in your allocation to satisfy your request to launch
|
||||
%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available.
|
||||
|
||||
Either request fewer processes, or obtain a larger allocation.
|
||||
[orte-rmaps-rr:n-per-node-and-not-enough-slots]
|
||||
There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available.
|
||||
|
||||
Either request fewer processes/node, or obtain a larger allocation.
|
||||
|
@ -219,6 +219,13 @@ static int map_app_by_slot(
|
||||
num_slots_to_take = node->node_slots - node->node_slots_inuse;
|
||||
}
|
||||
|
||||
/* check if we are in npernode mode - if so, then set the num_slots_to_take
|
||||
* to the num_per_node
|
||||
*/
|
||||
if (mca_rmaps_round_robin_component.n_per_node) {
|
||||
num_slots_to_take = num_per_node;
|
||||
}
|
||||
|
||||
for( i = 0; i < num_slots_to_take; ++i) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
|
||||
nodes, max_used_nodes,
|
||||
@ -269,25 +276,19 @@ static int orte_rmaps_rr_process_attrs(opal_list_t *attributes)
|
||||
char *policy;
|
||||
orte_attribute_t *attr;
|
||||
orte_std_cntr_t *scptr;
|
||||
bool policy_override;
|
||||
|
||||
mca_rmaps_round_robin_component.bynode = false; /* set default mapping policy */
|
||||
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_MAP_POLICY))) {
|
||||
/* they specified a mapping policy - extract its name */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&policy, attr->value, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 == strcmp(policy, "bynode")) {
|
||||
mca_rmaps_round_robin_component.bynode = true;
|
||||
}
|
||||
}
|
||||
|
||||
mca_rmaps_round_robin_component.bynode = false; /* set default mapping policy to byslot*/
|
||||
policy_override = false;
|
||||
|
||||
mca_rmaps_round_robin_component.per_node = false;
|
||||
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_PERNODE))) {
|
||||
/* was provided - set boolean accordingly */
|
||||
mca_rmaps_round_robin_component.per_node = true;
|
||||
/* indicate that we are going to map this job bynode */
|
||||
mca_rmaps_round_robin_component.bynode = true;
|
||||
/* indicate that this is to *be* the policy no matter what */
|
||||
policy_override = true;
|
||||
}
|
||||
|
||||
mca_rmaps_round_robin_component.n_per_node = false;
|
||||
@ -300,8 +301,28 @@ static int orte_rmaps_rr_process_attrs(opal_list_t *attributes)
|
||||
return rc;
|
||||
}
|
||||
num_per_node = *scptr;
|
||||
/* indicate that we are going to map this job bynode */
|
||||
mca_rmaps_round_robin_component.bynode = true;
|
||||
/* default to byslot mapping */
|
||||
mca_rmaps_round_robin_component.bynode = false;
|
||||
}
|
||||
|
||||
/* define the mapping policy. This *must* come after we process the pernode
|
||||
* options since those set a default mapping policy - we want to be able
|
||||
* to override that setting if requested
|
||||
*
|
||||
* NOTE: we don't do this step if the policy_override has been set!
|
||||
*/
|
||||
if (!policy_override &&
|
||||
NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_MAP_POLICY))) {
|
||||
/* they specified a mapping policy - extract its name */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&policy, attr->value, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 == strcmp(policy, "bynode")) {
|
||||
mca_rmaps_round_robin_component.bynode = true;
|
||||
} else {
|
||||
mca_rmaps_round_robin_component.bynode = false;
|
||||
}
|
||||
}
|
||||
|
||||
mca_rmaps_round_robin_component.no_use_local = false;
|
||||
@ -344,6 +365,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
|
||||
bool modify_app_context = false;
|
||||
char *sptr;
|
||||
orte_attribute_t *attr;
|
||||
orte_std_cntr_t slots_per_node;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
@ -492,9 +514,31 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
} else if (mca_rmaps_round_robin_component.n_per_node) {
|
||||
/* set the num_procs to equal the specified num/node * the number of nodes */
|
||||
app->num_procs = num_per_node * num_nodes;
|
||||
modify_app_context = true;
|
||||
/* first, let's check to see if there are enough slots/node to
|
||||
* meet the request - error out if not
|
||||
*/
|
||||
slots_per_node = num_slots / num_nodes;
|
||||
if (num_per_node > slots_per_node) {
|
||||
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots",
|
||||
true, num_per_node, slots_per_node, NULL);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* there are three use-cases that we need to deal with:
|
||||
* (a) if -np was not provided, then we just use the n/node * #nodes
|
||||
* (b) if -np was provided AND #procs > (n/node * #nodes), then error out
|
||||
* (c) if -np was provided AND #procs <= (n/node * #nodes), then launch
|
||||
* the specified #procs n/node. In this case, we just
|
||||
* leave app->num_procs alone
|
||||
*/
|
||||
if (0 == app->num_procs) {
|
||||
/* set the num_procs to equal the specified num/node * the number of nodes */
|
||||
app->num_procs = num_per_node * num_nodes;
|
||||
modify_app_context = true;
|
||||
} else if (app->num_procs > (num_per_node * num_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs",
|
||||
true, app->num_procs, num_per_node, num_nodes, num_slots, NULL);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
} else if (0 == app->num_procs) {
|
||||
/** set the num_procs to equal the number of slots on these mapped nodes - if
|
||||
user has specified "-bynode", then set it to the number of nodes
|
||||
|
@ -412,8 +412,10 @@ int orterun(int argc, char *argv[])
|
||||
OBJ_DESTRUCT(&attributes);
|
||||
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
/* JMS show_help */
|
||||
opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc);
|
||||
/* JMS show_help unless it is ERR_SILENT */
|
||||
if (ORTE_ERR_SILENT != rc) {
|
||||
opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc);
|
||||
}
|
||||
} else {
|
||||
|
||||
if (orterun_globals.do_not_launch) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user