Provide two new opal paffinity utilities for printing a hex representation of the cpu set and parsing that string back into a cpu set on the other end. Also add a new MCA param for passing the cpu set applied to a process during launch down to that process so it can know what we attempted to do.
All to be used in some new MPI extensions provided by Jeff so that users can easily query their binding situation. This commit was SVN r22998.
Этот коммит содержится в:
родитель
338920656f
Коммит
86228aee38
@ -216,6 +216,11 @@ OPAL_DECLSPEC int opal_paffinity_base_get_physical_socket_id(int logical_socket_
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_paffinity_base_get_physical_core_id(int physical_socket_id, int logical_core_id);
|
||||
|
||||
/* Print a char string representation of a cpu set */
|
||||
OPAL_DECLSPEC char *opal_paffinity_base_print_binding(opal_paffinity_base_cpu_set_t cpumask);
|
||||
|
||||
/* Parse the binding string created by above function back into a cpu set */
|
||||
OPAL_DECLSPEC int opal_paffinity_base_parse_binding(char *binding, opal_paffinity_base_cpu_set_t cpumask);
|
||||
|
||||
/**
|
||||
* Indication of whether a component was successfully selected or
|
||||
@ -267,6 +272,9 @@ OPAL_DECLSPEC extern char *opal_paffinity_base_slot_list;
|
||||
*/
|
||||
OPAL_DECLSPEC extern bool opal_paffinity_base_bound;
|
||||
|
||||
/* String passed down from launcher that contains applied binding */
|
||||
OPAL_DECLSPEC extern char *opal_paffinity_base_applied_binding;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_BASE_PAFFINITY_H */
|
||||
|
@ -46,7 +46,7 @@ opal_list_t opal_paffinity_base_components_opened;
|
||||
bool opal_paffinity_alone = false;
|
||||
char *opal_paffinity_base_slot_list;
|
||||
bool opal_paffinity_base_bound;
|
||||
|
||||
char *opal_paffinity_base_applied_binding;
|
||||
/*
|
||||
* Register some paffinity-wide MCA params
|
||||
*/
|
||||
@ -86,6 +86,11 @@ int opal_paffinity_base_register_params(void)
|
||||
false, &value);
|
||||
opal_paffinity_base_bound = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_string_name("paffinity", "base_applied_binding",
|
||||
"Binding from launcher",
|
||||
true, false,
|
||||
NULL, &opal_paffinity_base_applied_binding);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -129,3 +129,79 @@ int opal_paffinity_base_get_physical_core_id(int physical_socket_id, int logical
|
||||
return opal_paffinity_base_module->paff_get_physical_core_id(physical_socket_id, logical_core_id);
|
||||
}
|
||||
|
||||
char *opal_paffinity_base_print_binding(opal_paffinity_base_cpu_set_t cpumask)
|
||||
{
|
||||
char *tmp;
|
||||
size_t i, j, masksize, save;
|
||||
|
||||
/* get space for element separators and trailing NULL */
|
||||
tmp = (char*)malloc(OPAL_PAFFINITY_CPU_SET_NUM_BYTES+OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS + 1);
|
||||
if (NULL == tmp) {
|
||||
return NULL;
|
||||
}
|
||||
memset(tmp, 0, OPAL_PAFFINITY_CPU_SET_NUM_BYTES+OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS + 1);
|
||||
masksize = sizeof(opal_paffinity_base_bitmask_t);
|
||||
|
||||
if (4 == masksize) {
|
||||
for (i=0, j=0; i < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; i++) {
|
||||
sprintf(&tmp[j], "%04lx", cpumask.bitmask[i]);
|
||||
j += 4;
|
||||
tmp[j] = ':';
|
||||
j++;
|
||||
}
|
||||
} else if (8 == masksize) {
|
||||
for (i=0, j=0; i < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; i++) {
|
||||
sprintf(&tmp[j], "%08lx", cpumask.bitmask[i]);
|
||||
j += 8;
|
||||
tmp[j] = ':';
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
/* find the last non-zero entry */
|
||||
save = OPAL_PAFFINITY_CPU_SET_NUM_BYTES+OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS;
|
||||
for (i=OPAL_PAFFINITY_CPU_SET_NUM_BYTES+OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS-1; 0 <= i; i--) {
|
||||
if ('0' != tmp[i] && ':' != tmp[i]) {
|
||||
tmp[save] = '\0';
|
||||
break;
|
||||
} else if (':' == tmp[i]) {
|
||||
save = i;
|
||||
}
|
||||
}
|
||||
if ('\0' == tmp[0]) {
|
||||
/* there was nothing in the mask */
|
||||
free(tmp);
|
||||
tmp = NULL;
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int opal_paffinity_base_parse_binding(char *binding, opal_paffinity_base_cpu_set_t cpumask)
|
||||
{
|
||||
size_t i, masksize;
|
||||
char *tmp, *save;
|
||||
|
||||
if (NULL == binding || 0 == strlen(binding)) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_PAFFINITY_CPU_ZERO(cpumask);
|
||||
masksize = sizeof(opal_paffinity_base_bitmask_t);
|
||||
|
||||
tmp = binding;
|
||||
for (i=0; i < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; i++) {
|
||||
cpumask.bitmask[i] = strtoul(tmp, &save, 16);
|
||||
tmp = save;
|
||||
if (NULL == tmp) {
|
||||
/* end of the line */
|
||||
break;
|
||||
}
|
||||
tmp++;
|
||||
if (NULL == tmp || 0 == strlen(tmp)) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -150,6 +150,12 @@ typedef unsigned long int opal_paffinity_base_bitmask_t;
|
||||
*/
|
||||
#define OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS (OPAL_PAFFINITY_BITMASK_CPU_MAX / OPAL_PAFFINITY_BITMASK_T_NUM_BITS)
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* How many bytes in a cpu set
|
||||
*/
|
||||
#define OPAL_PAFFINITY_CPU_SET_NUM_BYTES (OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS * sizeof(opal_paffinity_base_bitmask_t))
|
||||
|
||||
/**
|
||||
* Public processor bitmask type
|
||||
*/
|
||||
|
@ -244,7 +244,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
orte_local_rank_t lrank;
|
||||
int target_socket, npersocket, logical_skt;
|
||||
int logical_cpu, phys_core, phys_cpu, ncpu;
|
||||
char *param;
|
||||
char *param, *tmp;
|
||||
|
||||
if (NULL != child) {
|
||||
/* should pull this information from MPIRUN instead of going with
|
||||
@ -505,9 +505,11 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
logical_cpu += jobdat->stride;
|
||||
}
|
||||
if (orte_report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to socket %d cpus %04lx",
|
||||
tmp = opal_paffinity_base_print_binding(mask);
|
||||
opal_output(0, "%s odls:default:fork binding child %s to socket %d cpus %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
|
||||
ORTE_NAME_PRINT(child->name), target_socket, tmp);
|
||||
free(tmp);
|
||||
}
|
||||
} else {
|
||||
/* my starting core has to be offset by cpus_per_rank */
|
||||
@ -560,9 +562,11 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
logical_cpu += jobdat->stride;
|
||||
}
|
||||
if (orte_report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to cpus %04lx",
|
||||
tmp = opal_paffinity_base_print_binding(mask);
|
||||
opal_output(0, "%s odls:default:fork binding child %s to cpus %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
|
||||
ORTE_NAME_PRINT(child->name), tmp);
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||
@ -769,9 +773,11 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
/* if this resulted in no binding, generate warning if not suppressed */
|
||||
ORTE_ODLS_WARN_NOT_BOUND(mask, 3);
|
||||
if (orte_report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to socket %d cpus %04lx",
|
||||
tmp = opal_paffinity_base_print_binding(mask);
|
||||
opal_output(0, "%s odls:default:fork binding child %s to socket %d cpus %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
|
||||
ORTE_NAME_PRINT(child->name), target_socket, tmp);
|
||||
free(tmp);
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
||||
@ -821,6 +827,13 @@ LAUNCH_PROCS:
|
||||
param = mca_base_param_environ_variable("paffinity","base","bound");
|
||||
opal_setenv(param, "1", true, &environ_copy);
|
||||
free(param);
|
||||
/* and provide a char representation of what we did */
|
||||
tmp = opal_paffinity_base_print_binding(mask);
|
||||
if (NULL != tmp) {
|
||||
param = mca_base_param_environ_variable("paffinity","base","applied_binding");
|
||||
opal_setenv(param, tmp, true, &environ_copy);
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user