Support -cpu-set by binding to the specified cpus in the absence of any other binding directive. Allows users to subdivide nodes for multiple parallel mpirun invocations.
This commit was SVN r26186.
Этот коммит содержится в:
родитель
8f8dde166e
Коммит
ce0caf7567
@ -256,6 +256,16 @@ int opal_hwloc_base_open(void)
|
||||
mca_base_param_reg_string_name("hwloc", "base_cpu_set",
|
||||
"Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]",
|
||||
false, false, NULL, &opal_hwloc_base_cpu_set);
|
||||
if (NULL != opal_hwloc_base_cpu_set) {
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
||||
/* it is okay if a binding policy was already given - just ensure that
|
||||
* we do bind to the given cpus if provided, otherwise this would be
|
||||
* ignored if someone didn't also specify a binding policy
|
||||
*/
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET);
|
||||
opal_hwloc_binding_policy |= OPAL_BIND_GIVEN;
|
||||
}
|
||||
}
|
||||
|
||||
/* to support tools such as ompi_info, add the components
|
||||
* to a list
|
||||
|
@ -79,43 +79,48 @@ static int bind_upwards(orte_job_t *jdata,
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (!orte_do_not_launch) {
|
||||
/* if we don't want to launch, then we are just testing the system,
|
||||
* so ignore questions about support capabilities
|
||||
*/
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get the number of objects of this type on this node */
|
||||
@ -246,43 +251,48 @@ static int bind_downwards(orte_job_t *jdata,
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (!orte_do_not_launch) {
|
||||
/* if we don't want to launch, then we are just testing the system,
|
||||
* so ignore questions about support capabilities
|
||||
*/
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get the number of objects of this type on this node */
|
||||
@ -394,43 +404,48 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (!orte_do_not_launch) {
|
||||
/* if we don't want to launch, then we are just testing the system,
|
||||
* so ignore questions about support capabilities
|
||||
*/
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
if (NULL != nbound) {
|
||||
free(nbound);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get the number of objects of this type on this node */
|
||||
@ -510,13 +525,113 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int bind_to_cpuset(orte_job_t *jdata)
|
||||
{
|
||||
/* bind each process to opal_hwloc_base_cpu_set */
|
||||
int i, j;
|
||||
orte_job_map_t *map;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
struct hwloc_topology_support *support;
|
||||
opal_hwloc_topo_data_t *sum;
|
||||
hwloc_obj_t root;
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps: bind job %s to cpuset %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
opal_hwloc_base_cpu_set);
|
||||
/* initialize */
|
||||
map = jdata->map;
|
||||
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
if (!orte_do_not_launch) {
|
||||
/* if we don't want to launch, then we are just testing the system,
|
||||
* so ignore questions about support capabilities
|
||||
*/
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
|
||||
/* check if topology supports cpubind - have to be careful here
|
||||
* as Linux doesn't currently support thread-level binding. This
|
||||
* may change in the future, though, and it isn't clear how hwloc
|
||||
* interprets the current behavior. So check both flags to be sure.
|
||||
*/
|
||||
if (!support->cpubind->set_thisproc_cpubind &&
|
||||
!support->cpubind->set_thisthread_cpubind) {
|
||||
if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
|
||||
/* we are not required to bind, so ignore this */
|
||||
continue;
|
||||
}
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* check if topology supports membind - have to be careful here
|
||||
* as hwloc treats this differently than I (at least) would have
|
||||
* expected. Per hwloc, Linux memory binding is at the thread,
|
||||
* and not process, level. Thus, hwloc sets the "thisproc" flag
|
||||
* to "false" on all Linux systems, and uses the "thisthread" flag
|
||||
* to indicate binding capability
|
||||
*/
|
||||
if (!support->membind->set_thisproc_membind &&
|
||||
!support->membind->set_thisthread_membind) {
|
||||
if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
|
||||
membind_warned = true;
|
||||
} else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
root = hwloc_get_root_obj(node->topology);
|
||||
if (NULL == root->userdata) {
|
||||
/* something went wrong */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
sum = (opal_hwloc_topo_data_t*)root->userdata;
|
||||
if (NULL == sum->available) {
|
||||
/* another error */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs that have already been bound - should
|
||||
* never happen, but safer
|
||||
*/
|
||||
if (NULL != proc->cpu_bitmap) {
|
||||
continue;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, sum->available);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
||||
{
|
||||
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
||||
if (ORTE_MAPPING_BYUSER == ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
/* user specified binding by rankfile - nothing for us to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
||||
int rc;
|
||||
/* cpuset was given - setup the bindings */
|
||||
if (ORTE_SUCCESS != (rc = bind_to_cpuset(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) ||
|
||||
OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
||||
/* no binding requested */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user