Based on Tetsuya's patch, with some changes, correct the case of map-by node where multiple cpus/rank are requested and result in a non-integer match with num slots. Also correct tests for binding policy given to use the proper macro.
Refs trac:4296 This commit was SVN r30857. The following Trac tickets were found above: Ticket 4296 --> https://svn.open-mpi.org/trac/ompi/ticket/4296
Этот коммит содержится в:
родитель
4572bd58e5
Коммит
61a21e4f31
@ -496,7 +496,7 @@ static int do_child(orte_app_context_t* context,
|
||||
msg = "failed to convert bitmap list to hwloc bitmap";
|
||||
}
|
||||
if (OPAL_BINDING_REQUIRED(jobdat->map->binding) &&
|
||||
(OPAL_BIND_GIVEN & jobdat->map->binding)) {
|
||||
OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
|
||||
/* If binding is required and a binding directive was explicitly
|
||||
* given (i.e., we are not binding due to a default policy),
|
||||
* send an error up the pipe (which exits -- it doesn't return).
|
||||
@ -517,7 +517,7 @@ static int do_child(orte_app_context_t* context,
|
||||
/* bind as specified */
|
||||
rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0);
|
||||
/* if we got an error and this wasn't a default binding policy, then report it */
|
||||
if (rc < 0 && (OPAL_BIND_GIVEN & jobdat->map->binding)) {
|
||||
if (rc < 0 && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
|
||||
char *tmp = NULL;
|
||||
if (errno == ENOSYS) {
|
||||
msg = "hwloc indicates cpu binding not supported";
|
||||
@ -579,7 +579,7 @@ static int do_child(orte_app_context_t* context,
|
||||
* anything unless the user actually specified the binding policy
|
||||
*/
|
||||
rc = opal_hwloc_base_set_process_membind_policy();
|
||||
if (ORTE_SUCCESS != rc && (OPAL_BIND_GIVEN & jobdat->map->binding)) {
|
||||
if (ORTE_SUCCESS != rc && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) {
|
||||
if (errno == ENOSYS) {
|
||||
msg = "hwloc indicates memory binding not supported";
|
||||
} else if (errno == EXDEV) {
|
||||
|
@ -179,7 +179,7 @@ static int bind_upwards(orte_job_t *jdata,
|
||||
*/
|
||||
if (ncpus < data->num_bound &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
|
||||
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
@ -294,7 +294,7 @@ static int bind_downwards(orte_job_t *jdata,
|
||||
*/
|
||||
if (ncpus < data->num_bound &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
|
||||
if (OPAL_BIND_GIVEN & opal_hwloc_binding_policy) {
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
@ -382,8 +382,8 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) ||
|
||||
!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
if (!OPAL_BINDING_REQUIRED(map->binding) ||
|
||||
!OPAL_BINDING_POLICY_IS_SET(map->binding)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
@ -400,7 +400,7 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind &&
|
||||
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
OPAL_BINDING_POLICY_IS_SET(map->binding)) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
@ -416,7 +416,7 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
* computing a binding due to our default policy, and no cores are found
|
||||
* on this node, just silently skip it - we will not bind
|
||||
*/
|
||||
if (!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy) &&
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(map->binding) &&
|
||||
HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"Unable to bind-to core by default on node %s as no cores detected",
|
||||
@ -466,7 +466,7 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
*/
|
||||
if (ncpus < data->num_bound &&
|
||||
!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
|
||||
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
|
||||
opal_hwloc_base_print_binding(map->binding), node->name,
|
||||
data->num_bound, ncpus);
|
||||
@ -763,8 +763,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) ||
|
||||
!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
if (!OPAL_BINDING_REQUIRED(jdata->map->binding) ||
|
||||
!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
@ -782,7 +782,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind &&
|
||||
(OPAL_BIND_GIVEN & opal_hwloc_binding_policy)) {
|
||||
OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
@ -799,7 +799,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
||||
* computing a binding due to our default policy, and no cores are found
|
||||
* on this node, just silently skip it - we will not bind
|
||||
*/
|
||||
if (!(OPAL_BIND_GIVEN & opal_hwloc_binding_policy) &&
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) &&
|
||||
HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"Unable to bind-to core by default on node %s as no cores detected",
|
||||
|
@ -393,13 +393,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
* bind to those cpus - any other binding policy is an
|
||||
* error
|
||||
*/
|
||||
if (!(OPAL_BIND_GIVEN & OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy))) {
|
||||
if (opal_hwloc_use_hwthreads_as_cpus) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
|
||||
} else {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
||||
}
|
||||
} else {
|
||||
if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
||||
if (opal_hwloc_use_hwthreads_as_cpus) {
|
||||
if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
|
||||
@ -415,15 +409,21 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
"bind-to core");
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
} else {
|
||||
if (opal_hwloc_use_hwthreads_as_cpus) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
|
||||
} else {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (orte_rmaps_base_pernode) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--pernode, -pernode", "--map-by node:PPR=1",
|
||||
"--pernode, -pernode", "--map-by ppr:1:node",
|
||||
"rmaps_base_pernode, rmaps_ppr_pernode",
|
||||
"rmaps_base_mapping_policy=node:PPR=1");
|
||||
"rmaps_base_mapping_policy=ppr:1:node");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
@ -441,9 +441,9 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
if (0 < orte_rmaps_base_n_pernode) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--npernode, -npernode", "--map-by node:PPR=N",
|
||||
"--npernode, -npernode", "--map-by ppr:N:node",
|
||||
"rmaps_base_n_pernode, rmaps_ppr_n_pernode",
|
||||
"rmaps_base_mapping_policy=node:PPR=N");
|
||||
"rmaps_base_mapping_policy=ppr:N:node");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
@ -461,9 +461,9 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
if (0 < orte_rmaps_base_n_persocket) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--npersocket, -npersocket", "--map-by socket:PPR=N",
|
||||
"--npersocket, -npersocket", "--map-by ppr:N:socket",
|
||||
"rmaps_base_n_persocket, rmaps_ppr_n_persocket",
|
||||
"rmaps_base_mapping_policy=socket:PPR=N");
|
||||
"rmaps_base_mapping_policy=ppr:N:socket");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
|
@ -253,6 +253,8 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
/* compute how many extra procs to put on each node */
|
||||
balance = (float)(((int)app->num_procs - nprocs_mapped) - (navg * nnodes)) / (float)nnodes;
|
||||
extra_procs_to_assign = (int)balance;
|
||||
nxtra_nodes = 0;
|
||||
add_one = false;
|
||||
if (0 < (balance - (float)extra_procs_to_assign)) {
|
||||
/* compute how many nodes need an extra proc */
|
||||
nxtra_nodes = ((int)app->num_procs - nprocs_mapped) - ((navg + extra_procs_to_assign) * nnodes);
|
||||
@ -289,6 +291,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
OBJ_RETAIN(node); /* maintain accounting on object */
|
||||
++(jdata->map->num_nodes);
|
||||
}
|
||||
if (oversubscribed) {
|
||||
/* compute the number of procs to go on this node */
|
||||
if (add_one) {
|
||||
if (0 == nxtra_nodes) {
|
||||
@ -298,9 +301,11 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
--nxtra_nodes;
|
||||
}
|
||||
}
|
||||
if (oversubscribed) {
|
||||
/* everybody just takes their share */
|
||||
num_procs_to_assign = navg + extra_procs_to_assign;
|
||||
} else if (node->slots <= node->slots_inuse) {
|
||||
/* since we are not oversubcribed, ignore this node */
|
||||
continue;
|
||||
} else {
|
||||
/* if we are not oversubscribed, then there are enough
|
||||
* slots to handle all the procs. However, not every
|
||||
@ -308,26 +313,22 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
* have to track how many procs to "shift" elsewhere
|
||||
* to make up the difference
|
||||
*/
|
||||
if (node->slots <= node->slots_inuse) {
|
||||
/* if there are no extras to take, then we can
|
||||
* ignore this node
|
||||
*/
|
||||
num_procs_to_assign = 0;
|
||||
/* update how many we are lagging behind */
|
||||
lag += navg + extra_procs_to_assign;
|
||||
|
||||
/* compute the number of procs to go on this node */
|
||||
if (add_one) {
|
||||
if (0 == nxtra_nodes) {
|
||||
--extra_procs_to_assign;
|
||||
add_one = false;
|
||||
} else {
|
||||
--nxtra_nodes;
|
||||
}
|
||||
}
|
||||
/* add in the extras */
|
||||
lag += extra_procs_to_assign;
|
||||
/* if slots < avg (adjusted for cpus/proc), then take all */
|
||||
/* if slots < avg (adjusted for cpus/proc), then we can't put anything here */
|
||||
if ((node->slots - node->slots_inuse) < (navg * orte_rmaps_base.cpus_per_rank)) {
|
||||
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank;
|
||||
/* update how many we are lagging behind */
|
||||
lag += navg - num_procs_to_assign;
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
|
||||
"%s NODE %s LAGGING %d AVG %d ASSIGN %d EXTRA %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
|
||||
lag, navg, num_procs_to_assign, extra_procs_to_assign));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
/* take the avg plus as much of the "lag" as we can */
|
||||
delta = 0;
|
||||
if (0 < lag) {
|
||||
@ -343,9 +344,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, delta,
|
||||
lag, navg, num_procs_to_assign, extra_procs_to_assign));
|
||||
}
|
||||
}
|
||||
nnodes++; // track how many nodes remain available
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
|
||||
"%s NODE %s ASSIGNING %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
|
||||
@ -386,6 +385,9 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
|
||||
obj = hwloc_get_root_obj(node->topology);
|
||||
}
|
||||
#endif
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
|
||||
"%s ADDING PROC TO NODE %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
|
||||
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user