Fix --bynode option to remember for subsequent jobs where it left off last time.
Add a ''map_bynode'' info key to determine if the job to be started by comm_spawn* should be mapped by node or by slot. Default is to map according to the default policy set when the parent job was started. cmr:v1.5.1 This commit was SVN r22564.
Этот коммит содержится в:
родитель
1c096aece9
Коммит
a3583b8f57
@ -35,6 +35,9 @@
|
||||
#include "orte/mca/plm/plm.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rmaps/rmaps_types.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -506,6 +509,7 @@ static int spawn(int count, char **array_of_commands,
|
||||
orte_job_t *jdata;
|
||||
orte_app_context_t *app;
|
||||
bool local_spawn, non_mpi;
|
||||
bool local_bynode = false;
|
||||
|
||||
/* parse the info object */
|
||||
/* check potentially for:
|
||||
@ -665,6 +669,32 @@ static int spawn(int count, char **array_of_commands,
|
||||
jdata->controls |= ORTE_JOB_CONTROL_LOCAL_SLAVE;
|
||||
}
|
||||
|
||||
/* check for 'map_bynode' */
|
||||
ompi_info_get_bool(array_of_info[i], "map_bynode", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* load it with the system defaults */
|
||||
jdata->map->policy = orte_default_mapping_policy;
|
||||
jdata->map->npernode = orte_rmaps_base.npernode;
|
||||
jdata->map->nperboard = orte_rmaps_base.nperboard;
|
||||
jdata->map->npersocket = orte_rmaps_base.npersocket;
|
||||
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
|
||||
jdata->map->stride = orte_rmaps_base.stride;
|
||||
jdata->map->oversubscribe = orte_rmaps_base.oversubscribe;
|
||||
jdata->map->display_map = orte_rmaps_base.display_map;
|
||||
|
||||
if( local_bynode ) {
|
||||
jdata->map->policy = ORTE_MAPPING_BYNODE;
|
||||
}
|
||||
else {
|
||||
jdata->map->policy = ORTE_MAPPING_BYSLOT;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for 'preload_binary' */
|
||||
ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag);
|
||||
if ( flag ) {
|
||||
|
@ -152,6 +152,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
|
||||
If that parameter already exists in the
|
||||
environment, the value will be overwritten
|
||||
by the provided value.
|
||||
map_bynode bool If set to true, the processes are mapped bynode.
|
||||
If set to false, the processes are mapped byslot.
|
||||
By default, mapping is determined by the default
|
||||
mapping policy set when the job was started.
|
||||
.fi
|
||||
|
||||
\fIbool\fP info keys are actually strings but are evaluated as
|
||||
|
@ -163,6 +163,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
|
||||
If that parameter already exists in the
|
||||
environment, the value will be overwritten
|
||||
by the provided value.
|
||||
map_bynode bool If set to true, the processes are mapped bynode.
|
||||
If set to false, the processes are mapped byslot.
|
||||
By default, mapping is determined by the default
|
||||
mapping policy set when the job was started.
|
||||
.fi
|
||||
|
||||
.sp
|
||||
|
@ -232,6 +232,7 @@ static void process_msg(int fd, short event, void *data)
|
||||
goto ANSWER_LAUNCH;
|
||||
}
|
||||
|
||||
if( NULL == parent->bookmark ) {
|
||||
/* find the sender's node in the job map */
|
||||
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, msgpkt->sender.vpid))) {
|
||||
/* set the bookmark so the child starts from that place - this means
|
||||
@ -241,6 +242,9 @@ static void process_msg(int fd, short event, void *data)
|
||||
*/
|
||||
jdata->bookmark = proc->node;
|
||||
}
|
||||
} else {
|
||||
jdata->bookmark = parent->bookmark;
|
||||
}
|
||||
|
||||
/* launch it */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) {
|
||||
|
@ -342,5 +342,8 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
|
||||
cur_node_item = next;
|
||||
}
|
||||
|
||||
/* save the bookmark */
|
||||
jdata->bookmark = (orte_node_t*)cur_node_item;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user