Better support automated tests for map, rank, and bind options
Этот коммит содержится в:
родитель
459e15479f
Коммит
e26e7ad736
@ -409,9 +409,10 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
*/
|
*/
|
||||||
if (jdata->map->display_map) {
|
if (jdata->map->display_map) {
|
||||||
char *output=NULL;
|
char *output=NULL;
|
||||||
int i, j;
|
int i, j, cnt;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
|
char tmp1[1024];
|
||||||
|
|
||||||
if (orte_display_diffable_output) {
|
if (orte_display_diffable_output) {
|
||||||
/* intended solely to test mapping methods, this output
|
/* intended solely to test mapping methods, this output
|
||||||
@ -421,40 +422,37 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
* the output a line at a time here
|
* the output a line at a time here
|
||||||
*/
|
*/
|
||||||
/* display just the procs in a diffable format */
|
/* display just the procs in a diffable format */
|
||||||
opal_output(orte_clean_output, "<map>\n\t<jobid=%s>\n\t<offset=%s>",
|
opal_output(orte_clean_output, "<map>\n");
|
||||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset));
|
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
/* loop through nodes */
|
/* loop through nodes */
|
||||||
|
cnt = 0;
|
||||||
for (i=0; i < jdata->map->nodes->size; i++) {
|
for (i=0; i < jdata->map->nodes->size; i++) {
|
||||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
opal_output(orte_clean_output, "\t<host name=%s>", (NULL == node->name) ? "UNKNOWN" : node->name);
|
opal_output(orte_clean_output, "\t<host num=%d>", cnt);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
|
cnt++;
|
||||||
for (j=0; j < node->procs->size; j++) {
|
for (j=0; j < node->procs->size; j++) {
|
||||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
{
|
{
|
||||||
char locale[64];
|
hwloc_obj_t bd=NULL;;
|
||||||
hwloc_obj_t loc;
|
|
||||||
char *cpu_bitmap;
|
|
||||||
|
|
||||||
loc = NULL;
|
orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR);
|
||||||
if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&loc, OPAL_PTR)) {
|
if (NULL == bd) {
|
||||||
hwloc_bitmap_list_snprintf(locale, 64, loc->cpuset);
|
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
|
||||||
|
} else {
|
||||||
|
if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology, bd->cpuset)) {
|
||||||
|
(void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND"));
|
||||||
}
|
}
|
||||||
cpu_bitmap = NULL;
|
}
|
||||||
orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING);
|
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu binding=%s>",
|
||||||
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu locale=%s binding=%s>",
|
|
||||||
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
|
ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx,
|
||||||
(unsigned long)proc->local_rank,
|
(unsigned long)proc->local_rank,
|
||||||
(unsigned long)proc->node_rank, locale,
|
(unsigned long)proc->node_rank, tmp1);
|
||||||
(NULL == cpu_bitmap) ? "NULL" : cpu_bitmap);
|
|
||||||
if (NULL != cpu_bitmap) {
|
|
||||||
free(cpu_bitmap);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>",
|
opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>",
|
||||||
@ -467,6 +465,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
opal_output(orte_clean_output, "\t</host>");
|
opal_output(orte_clean_output, "\t</host>");
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
{
|
{
|
||||||
opal_hwloc_locality_t locality;
|
opal_hwloc_locality_t locality;
|
||||||
|
@ -477,8 +477,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
|||||||
complete:
|
complete:
|
||||||
/* remove all nodes that are already at max usage, and
|
/* remove all nodes that are already at max usage, and
|
||||||
* compute the total number of allocated slots while
|
* compute the total number of allocated slots while
|
||||||
* we do so
|
* we do so */
|
||||||
*/
|
|
||||||
num_slots = 0;
|
num_slots = 0;
|
||||||
item = opal_list_get_first(allocated_nodes);
|
item = opal_list_get_first(allocated_nodes);
|
||||||
while (item != opal_list_get_end(allocated_nodes)) {
|
while (item != opal_list_get_end(allocated_nodes)) {
|
||||||
@ -541,6 +540,24 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* - if a max number was given, only take that many */
|
||||||
|
if (0 < orte_max_vm_size &&
|
||||||
|
orte_max_vm_size < (int)opal_list_get_size(allocated_nodes)) {
|
||||||
|
item = opal_list_get_first(allocated_nodes);
|
||||||
|
num_slots = 0;
|
||||||
|
for (i=0; i < orte_max_vm_size; i++) {
|
||||||
|
node = (orte_node_t*)item;
|
||||||
|
num_slots += node->slots - node->slots_inuse;
|
||||||
|
item = opal_list_get_next(item);
|
||||||
|
}
|
||||||
|
while (item != opal_list_get_end(allocated_nodes)) {
|
||||||
|
next = opal_list_get_next(item);
|
||||||
|
opal_list_remove_item(allocated_nodes, item);
|
||||||
|
OBJ_RELEASE(item); /* "un-retain" it */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* pass back the total number of available slots */
|
||||||
*total_num_slots = num_slots;
|
*total_num_slots = num_slots;
|
||||||
|
|
||||||
if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) {
|
if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) {
|
||||||
|
@ -474,7 +474,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
|
|||||||
hwloc_cpuset_t mycpus;
|
hwloc_cpuset_t mycpus;
|
||||||
char tmp1[1024], tmp2[1024];
|
char tmp1[1024], tmp2[1024];
|
||||||
char *str=NULL, *cpu_bitmap=NULL;
|
char *str=NULL, *cpu_bitmap=NULL;
|
||||||
;
|
|
||||||
if (orte_get_attribute(&src->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) &&
|
if (orte_get_attribute(&src->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) &&
|
||||||
NULL != src->node->topology) {
|
NULL != src->node->topology) {
|
||||||
mycpus = hwloc_bitmap_alloc();
|
mycpus = hwloc_bitmap_alloc();
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user