1
1

Based on Tetsuya's patch, with some changes, correct the case of map-by node where multiple cpus/rank are requested and result in a non-integer match with num slots. Also correct tests for binding policy given to use the proper macro.

Refs trac:4296

This commit was SVN r30857.

The following Trac tickets were found above:
  Ticket 4296 --> https://svn.open-mpi.org/trac/ompi/ticket/4296
Этот коммит содержится в:
Ralph Castain 2014-02-26 18:12:23 +00:00
родитель 4572bd58e5
Коммит 61a21e4f31
4 изменённых файлов: 72 добавлений и 70 удалений

Просмотреть файл

@ -496,7 +496,7 @@ static int do_child(orte_app_context_t* context,
msg = "failed to convert bitmap list to hwloc bitmap"; msg = "failed to convert bitmap list to hwloc bitmap";
} }
if (OPAL_BINDING_REQUIRED(jobdat->map->binding) && if (OPAL_BINDING_REQUIRED(jobdat->map->binding) &&
(OPAL_BIND_GIVEN & jobdat->map->binding)) { OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
/* If binding is required and a binding directive was explicitly /* If binding is required and a binding directive was explicitly
* given (i.e., we are not binding due to a default policy), * given (i.e., we are not binding due to a default policy),
* send an error up the pipe (which exits -- it doesn't return). * send an error up the pipe (which exits -- it doesn't return).
@ -517,7 +517,7 @@ static int do_child(orte_app_context_t* context,
/* bind as specified */ /* bind as specified */
rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0); rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0);
/* if we got an error and this wasn't a default binding policy, then report it */ /* if we got an error and this wasn't a default binding policy, then report it */
if (rc < 0 && (OPAL_BIND_GIVEN & jobdat->map->binding)) { if (rc < 0 && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
char *tmp = NULL; char *tmp = NULL;
if (errno == ENOSYS) { if (errno == ENOSYS) {
msg = "hwloc indicates cpu binding not supported"; msg = "hwloc indicates cpu binding not supported";
@ -579,7 +579,7 @@ static int do_child(orte_app_context_t* context,
* anything unless the user actually specified the binding policy * anything unless the user actually specified the binding policy
*/ */
rc = opal_hwloc_base_set_process_membind_policy(); rc = opal_hwloc_base_set_process_membind_policy();
if (ORTE_SUCCESS != rc && (OPAL_BIND_GIVEN & jobdat->map->binding)) { if (ORTE_SUCCESS != rc && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
if (errno == ENOSYS) { if (errno == ENOSYS) {
msg = "hwloc indicates memory binding not supported"; msg = "hwloc indicates memory binding not supported";
} else if (errno == EXDEV) { } else if (errno == EXDEV) {

Просмотреть файл

@ -179,7 +179,7 @@ static int bind_upwards(orte_job_t *jdata,
*/ */
if (ncpus < data->num_bound && if (ncpus < data->num_bound &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name, opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus); data->num_bound, ncpus);
@ -294,7 +294,7 @@ static int bind_downwards(orte_job_t *jdata,
*/ */
if (ncpus < data->num_bound && if (ncpus < data->num_bound &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
if (OPAL_BIND_GIVEN & opal_hwloc_binding_policy) { if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name, opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus); data->num_bound, ncpus);
@ -382,8 +382,8 @@ static int bind_in_place(orte_job_t *jdata,
*/ */
if (!support->cpubind->set_thisproc_cpubind && if (!support->cpubind->set_thisproc_cpubind &&
!support->cpubind->set_thisthread_cpubind) { !support->cpubind->set_thisthread_cpubind) {
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || if (!OPAL_BINDING_REQUIRED(map->binding) ||
!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { !OPAL_BINDING_POLICY_IS_SET(map->binding)) {
/* we are not required to bind, so ignore this */ /* we are not required to bind, so ignore this */
continue; continue;
} }
@ -400,7 +400,7 @@ static int bind_in_place(orte_job_t *jdata,
*/ */
if (!support->membind->set_thisproc_membind && if (!support->membind->set_thisproc_membind &&
!support->membind->set_thisthread_membind && !support->membind->set_thisthread_membind &&
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { OPAL_BINDING_POLICY_IS_SET(map->binding)) {
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
membind_warned = true; membind_warned = true;
@ -416,7 +416,7 @@ static int bind_in_place(orte_job_t *jdata,
* computing a binding due to our default policy, and no cores are found * computing a binding due to our default policy, and no cores are found
* on this node, just silently skip it - we will not bind * on this node, just silently skip it - we will not bind
*/ */
if (!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy) && if (!OPAL_BINDING_POLICY_IS_SET(map->binding) &&
HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output, opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"Unable to bind-to core by default on node %s as no cores detected", "Unable to bind-to core by default on node %s as no cores detected",
@ -466,7 +466,7 @@ static int bind_in_place(orte_job_t *jdata,
*/ */
if (ncpus < data->num_bound && if (ncpus < data->num_bound &&
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
opal_hwloc_base_print_binding(map->binding), node->name, opal_hwloc_base_print_binding(map->binding), node->name,
data->num_bound, ncpus); data->num_bound, ncpus);
@ -763,8 +763,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
*/ */
if (!support->cpubind->set_thisproc_cpubind && if (!support->cpubind->set_thisproc_cpubind &&
!support->cpubind->set_thisthread_cpubind) { !support->cpubind->set_thisthread_cpubind) {
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || if (!OPAL_BINDING_REQUIRED(jdata->map->binding) ||
!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
/* we are not required to bind, so ignore this */ /* we are not required to bind, so ignore this */
continue; continue;
} }
@ -782,7 +782,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
*/ */
if (!support->membind->set_thisproc_membind && if (!support->membind->set_thisproc_membind &&
!support->membind->set_thisthread_membind && !support->membind->set_thisthread_membind &&
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) { OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
membind_warned = true; membind_warned = true;
@ -799,7 +799,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
* computing a binding due to our default policy, and no cores are found * computing a binding due to our default policy, and no cores are found
* on this node, just silently skip it - we will not bind * on this node, just silently skip it - we will not bind
*/ */
if (!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy) && if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) &&
HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output, opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"Unable to bind-to core by default on node %s as no cores detected", "Unable to bind-to core by default on node %s as no cores detected",

Просмотреть файл

@ -393,13 +393,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
* bind to those cpus - any other binding policy is an * bind to those cpus - any other binding policy is an
* error * error
*/ */
if (!(OPAL_BIND_GIVEN & OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy))) { if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
if (opal_hwloc_use_hwthreads_as_cpus) {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
} else {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
}
} else {
if (opal_hwloc_use_hwthreads_as_cpus) { if (opal_hwloc_use_hwthreads_as_cpus) {
if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true, orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
@ -415,15 +409,21 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
"bind-to core"); "bind-to core");
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
} else {
if (opal_hwloc_use_hwthreads_as_cpus) {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
} else {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
}
} }
#endif #endif
} }
if (orte_rmaps_base_pernode) { if (orte_rmaps_base_pernode) {
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
"--pernode, -pernode", "--map-by node:PPR=1", "--pernode, -pernode", "--map-by ppr:1:node",
"rmaps_base_pernode, rmaps_ppr_pernode", "rmaps_base_pernode, rmaps_ppr_pernode",
"rmaps_base_mapping_policy=node:PPR=1"); "rmaps_base_mapping_policy=ppr:1:node");
/* there is no way to resolve this conflict, so if something else was /* there is no way to resolve this conflict, so if something else was
* given, we have no choice but to error out * given, we have no choice but to error out
*/ */
@ -441,9 +441,9 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
if (0 < orte_rmaps_base_n_pernode) { if (0 < orte_rmaps_base_n_pernode) {
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
"--npernode, -npernode", "--map-by node:PPR=N", "--npernode, -npernode", "--map-by ppr:N:node",
"rmaps_base_n_pernode, rmaps_ppr_n_pernode", "rmaps_base_n_pernode, rmaps_ppr_n_pernode",
"rmaps_base_mapping_policy=node:PPR=N"); "rmaps_base_mapping_policy=ppr:N:node");
/* there is no way to resolve this conflict, so if something else was /* there is no way to resolve this conflict, so if something else was
* given, we have no choice but to error out * given, we have no choice but to error out
*/ */
@ -461,9 +461,9 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
if (0 < orte_rmaps_base_n_persocket) { if (0 < orte_rmaps_base_n_persocket) {
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
"--npersocket, -npersocket", "--map-by socket:PPR=N", "--npersocket, -npersocket", "--map-by ppr:N:socket",
"rmaps_base_n_persocket, rmaps_ppr_n_persocket", "rmaps_base_n_persocket, rmaps_ppr_n_persocket",
"rmaps_base_mapping_policy=socket:PPR=N"); "rmaps_base_mapping_policy=ppr:N:socket");
/* there is no way to resolve this conflict, so if something else was /* there is no way to resolve this conflict, so if something else was
* given, we have no choice but to error out * given, we have no choice but to error out
*/ */

Просмотреть файл

@ -253,6 +253,8 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
/* compute how many extra procs to put on each node */ /* compute how many extra procs to put on each node */
balance = (float)(((int)app->num_procs - nprocs_mapped) - (navg * nnodes)) / (float)nnodes; balance = (float)(((int)app->num_procs - nprocs_mapped) - (navg * nnodes)) / (float)nnodes;
extra_procs_to_assign = (int)balance; extra_procs_to_assign = (int)balance;
nxtra_nodes = 0;
add_one = false;
if (0 < (balance - (float)extra_procs_to_assign)) { if (0 < (balance - (float)extra_procs_to_assign)) {
/* compute how many nodes need an extra proc */ /* compute how many nodes need an extra proc */
nxtra_nodes = ((int)app->num_procs - nprocs_mapped) - ((navg + extra_procs_to_assign) * nnodes); nxtra_nodes = ((int)app->num_procs - nprocs_mapped) - ((navg + extra_procs_to_assign) * nnodes);
@ -289,18 +291,21 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
OBJ_RETAIN(node); /* maintain accounting on object */ OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes); ++(jdata->map->num_nodes);
} }
/* compute the number of procs to go on this node */
if (add_one) {
if (0 == nxtra_nodes) {
--extra_procs_to_assign;
add_one = false;
} else {
--nxtra_nodes;
}
}
if (oversubscribed) { if (oversubscribed) {
/* compute the number of procs to go on this node */
if (add_one) {
if (0 == nxtra_nodes) {
--extra_procs_to_assign;
add_one = false;
} else {
--nxtra_nodes;
}
}
/* everybody just takes their share */ /* everybody just takes their share */
num_procs_to_assign = navg + extra_procs_to_assign; num_procs_to_assign = navg + extra_procs_to_assign;
} else if (node->slots <= node->slots_inuse) {
/* since we are not oversubcribed, ignore this node */
continue;
} else { } else {
/* if we are not oversubscribed, then there are enough /* if we are not oversubscribed, then there are enough
* slots to handle all the procs. However, not every * slots to handle all the procs. However, not every
@ -308,44 +313,38 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
* have to track how many procs to "shift" elsewhere * have to track how many procs to "shift" elsewhere
* to make up the difference * to make up the difference
*/ */
if (node->slots <= node->slots_inuse) {
/* if there are no extras to take, then we can /* compute the number of procs to go on this node */
* ignore this node if (add_one) {
*/ if (0 == nxtra_nodes) {
num_procs_to_assign = 0; --extra_procs_to_assign;
/* update how many we are lagging behind */ add_one = false;
lag += navg + extra_procs_to_assign;
} else {
/* add in the extras */
lag += extra_procs_to_assign;
/* if slots < avg (adjusted for cpus/proc), then take all */
if ((node->slots - node->slots_inuse) < (navg * orte_rmaps_base.cpus_per_rank)) {
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank;
/* update how many we are lagging behind */
lag += navg - num_procs_to_assign;
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
"%s NODE %s LAGGING %d AVG %d ASSIGN %d EXTRA %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
lag, navg, num_procs_to_assign, extra_procs_to_assign));
} else { } else {
/* take the avg plus as much of the "lag" as we can */ --nxtra_nodes;
delta = 0;
if (0 < lag) {
delta = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) - navg;
if (lag < delta) {
delta = lag;
}
lag -= delta;
}
num_procs_to_assign = navg + delta;
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
"%s NODE %s DELTA %d LAGGING %d AVG %d ASSIGN %d EXTRA %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, delta,
lag, navg, num_procs_to_assign, extra_procs_to_assign));
} }
} }
nnodes++; // track how many nodes remain available /* add in the extras */
lag += extra_procs_to_assign;
/* if slots < avg (adjusted for cpus/proc), then we can't put anything here */
if ((node->slots - node->slots_inuse) < (navg * orte_rmaps_base.cpus_per_rank)) {
continue;
}
/* take the avg plus as much of the "lag" as we can */
delta = 0;
if (0 < lag) {
delta = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) - navg;
if (lag < delta) {
delta = lag;
}
lag -= delta;
}
num_procs_to_assign = navg + delta;
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
"%s NODE %s DELTA %d LAGGING %d AVG %d ASSIGN %d EXTRA %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, delta,
lag, navg, num_procs_to_assign, extra_procs_to_assign));
} }
nnodes++; // track how many nodes remain available
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
"%s NODE %s ASSIGNING %d", "%s NODE %s ASSIGNING %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
@ -386,6 +385,9 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
obj = hwloc_get_root_obj(node->topology); obj = hwloc_get_root_obj(node->topology);
} }
#endif #endif
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
"%s ADDING PROC TO NODE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }