Fix --bynode option to remember for subsequent jobs where it left off last time.
Add a ''map_bynode'' info key to determine if the job to be started by comm_spawn* should be mapped by node or by slot. Default is to map according to the default policy set when the parent job was started. cmr:v1.5.1 This commit was SVN r22564.
Этот коммит содержится в:
родитель
1c096aece9
Коммит
a3583b8f57
@ -35,6 +35,9 @@
|
|||||||
#include "orte/mca/plm/plm.h"
|
#include "orte/mca/plm/plm.h"
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
#include "orte/mca/rmaps/rmaps.h"
|
||||||
|
#include "orte/mca/rmaps/rmaps_types.h"
|
||||||
|
#include "orte/mca/rmaps/base/base.h"
|
||||||
#include "orte/mca/rml/base/rml_contact.h"
|
#include "orte/mca/rml/base/rml_contact.h"
|
||||||
#include "orte/mca/routed/routed.h"
|
#include "orte/mca/routed/routed.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
@ -506,6 +509,7 @@ static int spawn(int count, char **array_of_commands,
|
|||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
orte_app_context_t *app;
|
orte_app_context_t *app;
|
||||||
bool local_spawn, non_mpi;
|
bool local_spawn, non_mpi;
|
||||||
|
bool local_bynode = false;
|
||||||
|
|
||||||
/* parse the info object */
|
/* parse the info object */
|
||||||
/* check potentially for:
|
/* check potentially for:
|
||||||
@ -665,6 +669,32 @@ static int spawn(int count, char **array_of_commands,
|
|||||||
jdata->controls |= ORTE_JOB_CONTROL_LOCAL_SLAVE;
|
jdata->controls |= ORTE_JOB_CONTROL_LOCAL_SLAVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check for 'map_bynode' */
|
||||||
|
ompi_info_get_bool(array_of_info[i], "map_bynode", &local_bynode, &flag);
|
||||||
|
if ( flag ) {
|
||||||
|
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||||
|
if (NULL == jdata->map) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
/* load it with the system defaults */
|
||||||
|
jdata->map->policy = orte_default_mapping_policy;
|
||||||
|
jdata->map->npernode = orte_rmaps_base.npernode;
|
||||||
|
jdata->map->nperboard = orte_rmaps_base.nperboard;
|
||||||
|
jdata->map->npersocket = orte_rmaps_base.npersocket;
|
||||||
|
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
|
||||||
|
jdata->map->stride = orte_rmaps_base.stride;
|
||||||
|
jdata->map->oversubscribe = orte_rmaps_base.oversubscribe;
|
||||||
|
jdata->map->display_map = orte_rmaps_base.display_map;
|
||||||
|
|
||||||
|
if( local_bynode ) {
|
||||||
|
jdata->map->policy = ORTE_MAPPING_BYNODE;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
jdata->map->policy = ORTE_MAPPING_BYSLOT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* check for 'preload_binary' */
|
/* check for 'preload_binary' */
|
||||||
ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag);
|
ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag);
|
||||||
if ( flag ) {
|
if ( flag ) {
|
||||||
|
@ -152,6 +152,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
|
|||||||
If that parameter already exists in the
|
If that parameter already exists in the
|
||||||
environment, the value will be overwritten
|
environment, the value will be overwritten
|
||||||
by the provided value.
|
by the provided value.
|
||||||
|
map_bynode bool If set to true, the processes are mapped bynode.
|
||||||
|
If set to false, the processes are mapped byslot.
|
||||||
|
By default, mapping is determined by the default
|
||||||
|
mapping policy set when the job was started.
|
||||||
.fi
|
.fi
|
||||||
|
|
||||||
\fIbool\fP info keys are actually strings but are evaluated as
|
\fIbool\fP info keys are actually strings but are evaluated as
|
||||||
|
@ -163,6 +163,10 @@ ompi_param char * Pass an OMPI MCA parameter to the chil
|
|||||||
If that parameter already exists in the
|
If that parameter already exists in the
|
||||||
environment, the value will be overwritten
|
environment, the value will be overwritten
|
||||||
by the provided value.
|
by the provided value.
|
||||||
|
map_bynode bool If set to true, the processes are mapped bynode.
|
||||||
|
If set to false, the processes are mapped byslot.
|
||||||
|
By default, mapping is determined by the default
|
||||||
|
mapping policy set when the job was started.
|
||||||
.fi
|
.fi
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
@ -232,6 +232,7 @@ static void process_msg(int fd, short event, void *data)
|
|||||||
goto ANSWER_LAUNCH;
|
goto ANSWER_LAUNCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( NULL == parent->bookmark ) {
|
||||||
/* find the sender's node in the job map */
|
/* find the sender's node in the job map */
|
||||||
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, msgpkt->sender.vpid))) {
|
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, msgpkt->sender.vpid))) {
|
||||||
/* set the bookmark so the child starts from that place - this means
|
/* set the bookmark so the child starts from that place - this means
|
||||||
@ -241,6 +242,9 @@ static void process_msg(int fd, short event, void *data)
|
|||||||
*/
|
*/
|
||||||
jdata->bookmark = proc->node;
|
jdata->bookmark = proc->node;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
jdata->bookmark = parent->bookmark;
|
||||||
|
}
|
||||||
|
|
||||||
/* launch it */
|
/* launch it */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) {
|
if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) {
|
||||||
|
@ -342,5 +342,8 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
|
|||||||
cur_node_item = next;
|
cur_node_item = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* save the bookmark */
|
||||||
|
jdata->bookmark = (orte_node_t*)cur_node_item;
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user