1
1

Fix --bynode option to remember for subsequent jobs where it left off last time.

Add a ''map_bynode'' info key to determine if the job to be started by comm_spawn* should be mapped by node or by slot. Default is to map according to the default policy set when the parent job was started.

cmr:v1.5.1

This commit was SVN r22564.
Этот коммит содержится в:
Josh Hursey 2010-02-05 15:37:49 +00:00
родитель 1c096aece9
Коммит a3583b8f57
5 изменённых файлов: 53 добавлений и 8 удалений

Просмотреть файл

@ -35,6 +35,9 @@
#include "orte/mca/plm/plm.h" #include "orte/mca/plm/plm.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h" #include "orte/mca/rml/rml_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/routed.h" #include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h" #include "orte/util/name_fns.h"
@ -506,6 +509,7 @@ static int spawn(int count, char **array_of_commands,
orte_job_t *jdata; orte_job_t *jdata;
orte_app_context_t *app; orte_app_context_t *app;
bool local_spawn, non_mpi; bool local_spawn, non_mpi;
bool local_bynode = false;
/* parse the info object */ /* parse the info object */
/* check potentially for: /* check potentially for:
@ -665,6 +669,32 @@ static int spawn(int count, char **array_of_commands,
jdata->controls |= ORTE_JOB_CONTROL_LOCAL_SLAVE; jdata->controls |= ORTE_JOB_CONTROL_LOCAL_SLAVE;
} }
/* check for 'map_bynode' */
ompi_info_get_bool(array_of_info[i], "map_bynode", &local_bynode, &flag);
if ( flag ) {
jdata->map = OBJ_NEW(orte_job_map_t);
if (NULL == jdata->map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* load it with the system defaults */
jdata->map->policy = orte_default_mapping_policy;
jdata->map->npernode = orte_rmaps_base.npernode;
jdata->map->nperboard = orte_rmaps_base.nperboard;
jdata->map->npersocket = orte_rmaps_base.npersocket;
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
jdata->map->stride = orte_rmaps_base.stride;
jdata->map->oversubscribe = orte_rmaps_base.oversubscribe;
jdata->map->display_map = orte_rmaps_base.display_map;
if( local_bynode ) {
jdata->map->policy = ORTE_MAPPING_BYNODE;
}
else {
jdata->map->policy = ORTE_MAPPING_BYSLOT;
}
}
/* check for 'preload_binary' */ /* check for 'preload_binary' */
ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag);
if ( flag ) { if ( flag ) {

Просмотреть файл

@ -152,6 +152,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
If that parameter already exists in the If that parameter already exists in the
environment, the value will be overwritten environment, the value will be overwritten
by the provided value. by the provided value.
map_bynode bool If set to true, the processes are mapped bynode.
If set to false, the processes are mapped byslot.
By default, mapping is determined by the default
mapping policy set when the job was started.
.fi .fi
\fIbool\fP info keys are actually strings but are evaluated as \fIbool\fP info keys are actually strings but are evaluated as

Просмотреть файл

@ -163,6 +163,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
If that parameter already exists in the If that parameter already exists in the
environment, the value will be overwritten environment, the value will be overwritten
by the provided value. by the provided value.
map_bynode bool If set to true, the processes are mapped bynode.
If set to false, the processes are mapped byslot.
By default, mapping is determined by the default
mapping policy set when the job was started.
.fi .fi
.sp .sp

Просмотреть файл

@ -232,6 +232,7 @@ static void process_msg(int fd, short event, void *data)
goto ANSWER_LAUNCH; goto ANSWER_LAUNCH;
} }
if( NULL == parent->bookmark ) {
/* find the sender's node in the job map */ /* find the sender's node in the job map */
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, msgpkt->sender.vpid))) { if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, msgpkt->sender.vpid))) {
/* set the bookmark so the child starts from that place - this means /* set the bookmark so the child starts from that place - this means
@ -241,6 +242,9 @@ static void process_msg(int fd, short event, void *data)
*/ */
jdata->bookmark = proc->node; jdata->bookmark = proc->node;
} }
} else {
jdata->bookmark = parent->bookmark;
}
/* launch it */ /* launch it */
if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) { if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) {

Просмотреть файл

@ -342,5 +342,8 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
cur_node_item = next; cur_node_item = next;
} }
/* save the bookmark */
jdata->bookmark = (orte_node_t*)cur_node_item;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }