Correctly add support for the "env" MPI_Info key during comm_spawn, update the "map-by", "rank-by", and "bind-to" Info key behaviors to match the new mapping/ranking/binding system, and update all docs and comments to match.
Fix comm_spawn on a single host - with the new default mapping scheme, we were incorrectly computing the number of procs to put on the node. Refs trac:4003 This commit was SVN r30033. The following Trac tickets were found above: Ticket 4003 --> https://svn.open-mpi.org/trac/ompi/ticket/4003
Этот коммит содержится в:
родитель
4cd1958deb
Коммит
31248c0985
@ -40,6 +40,7 @@
|
||||
#include "opal/util/opal_getcwd.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/db/db.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
@ -673,18 +674,18 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
int have_wdir=0;
|
||||
int flag=0;
|
||||
char cwd[OPAL_PATH_MAX];
|
||||
char host[OPAL_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/
|
||||
char prefix[OPAL_PATH_MAX];
|
||||
char stdin_target[OPAL_PATH_MAX];
|
||||
char params[OPAL_PATH_MAX];
|
||||
char mapper[OPAL_PATH_MAX];
|
||||
char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/
|
||||
char prefix[OPAL_MAX_INFO_VAL];
|
||||
char stdin_target[OPAL_MAX_INFO_VAL];
|
||||
char params[OPAL_MAX_INFO_VAL];
|
||||
char mapper[OPAL_MAX_INFO_VAL];
|
||||
int npernode;
|
||||
char slot_list[OPAL_PATH_MAX];
|
||||
char slot_list[OPAL_MAX_INFO_VAL];
|
||||
|
||||
orte_job_t *jdata;
|
||||
orte_app_context_t *app;
|
||||
bool local_spawn, non_mpi;
|
||||
bool local_bynode = false;
|
||||
char **envars;
|
||||
|
||||
/* parse the info object */
|
||||
/* check potentially for:
|
||||
@ -697,7 +698,9 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
- "add-hostfile": add the hosts in the hostfile to the
|
||||
known list of available resources and spawn
|
||||
these procs on them
|
||||
- "prefix": the path to the root of the directory tree where ompi
|
||||
- "env": a newline-delimited list of envar values to be
|
||||
placed into the app's environment (of form "foo=bar")
|
||||
- "ompi_prefix": the path to the root of the directory tree where ompi
|
||||
executables and libraries can be found on all nodes
|
||||
used to spawn these procs
|
||||
- "arch": desired architecture
|
||||
@ -705,6 +708,19 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
- "path": list of directories where to look for the executable
|
||||
- "file": filename, where additional information is provided.
|
||||
- "soft": see page 92 of MPI-2.
|
||||
- "mapper": indicate the mapper to be used for the job
|
||||
- "display_map": display the map of the spawned job
|
||||
- "npernode": number of procs/node to spawn
|
||||
- "pernode": spawn one proc/node
|
||||
- "ppr": spawn specified number of procs per specified object
|
||||
- "map_by": specify object by which the procs should be mapped
|
||||
- "rank_by": specify object by which the procs should be ranked
|
||||
- "bind_to": specify object to which the procs should be bound
|
||||
- "ompi_preload_binary": move binaries to nodes prior to execution
|
||||
- "ompi_preload_files": move specified files to nodes prior to execution
|
||||
- "ompi_non_mpi": spawned job will not call MPI_Init
|
||||
- "ompi_param": list of MCA params to be in the spawned job's environment
|
||||
- "env": newline (\n) delimited list of envar values to be passed to spawned procs
|
||||
*/
|
||||
|
||||
/* setup the job object */
|
||||
@ -815,9 +831,14 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
app->add_host = opal_argv_split(host, ',');
|
||||
}
|
||||
|
||||
/* check for env */
|
||||
ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag);
|
||||
if ( flag ) {
|
||||
opal_argv_append_nosize(&app->env, host);
|
||||
envars = opal_argv_split(host, '\n');
|
||||
for (j=0; NULL != envars[j]; j++) {
|
||||
opal_argv_append_nosize(&app->env, envars[j]);
|
||||
}
|
||||
opal_argv_free(envars);
|
||||
}
|
||||
|
||||
/* 'path', 'arch', 'file', 'soft' -- to be implemented */
|
||||
@ -878,9 +899,10 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
/* not allowed to provide multiple mapping policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_PPR;
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR);
|
||||
asprintf(&(jdata->map->ppr), "%d:n", npernode);
|
||||
}
|
||||
ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag);
|
||||
@ -893,9 +915,10 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
/* not allowed to provide multiple mapping policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_PPR;
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR);
|
||||
jdata->map->ppr = strdup("1:n");
|
||||
}
|
||||
ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag);
|
||||
@ -908,14 +931,15 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
/* not allowed to provide multiple mapping policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_PPR;
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR);
|
||||
jdata->map->ppr = strdup(slot_list);
|
||||
}
|
||||
|
||||
/* check for 'map_byxxx' */
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_node", &local_bynode, &flag);
|
||||
/* check for 'map_by' */
|
||||
ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
@ -925,256 +949,17 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
/* not allowed to provide multiple mapping policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYNODE;
|
||||
}
|
||||
#if OPAL_HAVE_HWLOC
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_board", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping,
|
||||
NULL, slot_list))) {
|
||||
return rc;
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYBOARD;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_numa", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYNUMA;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_socket", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYSOCKET;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_l3cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYL3CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_l2cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYL2CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_l1cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYL1CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_core", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYCORE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "map_by_hwthread", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->mapping |= ORTE_MAPPING_BYHWTHREAD;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* check for 'rank_byxxx' */
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_node", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_NODE;
|
||||
}
|
||||
#if OPAL_HAVE_HWLOC
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_board", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_BOARD;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_numa", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_NUMA;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_socket", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_SOCKET;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_l3cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_L3CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_l2cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_L2CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_l1cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_L1CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_core", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_CORE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "rank_by_hwthread", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (0 != jdata->map->ranking) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->ranking = ORTE_RANK_BY_HWTHREAD;
|
||||
}
|
||||
|
||||
/* check for 'bind_toxxx' */
|
||||
ompi_info_get_bool(array_of_info[i], "bind_if_supported", &local_bynode, &flag);
|
||||
/* check for 'rank_by' */
|
||||
ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
@ -1183,20 +968,19 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_IF_SUPPORTED;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_overload_allowed", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
||||
/* not allowed to provide multiple ranking policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking,
|
||||
jdata->map->mapping, slot_list))) {
|
||||
return rc;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_none", &local_bynode, &flag);
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* check for 'bind_to' */
|
||||
ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
@ -1206,121 +990,12 @@ static int spawn(int count, const char *array_of_commands[],
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
/* not allowed to provide multiple binding policies */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_NONE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_board", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, slot_list))) {
|
||||
return rc;
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_BOARD;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_numa", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_NUMA;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_socket", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_SOCKET;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_l3cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_L3CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_l2cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_L2CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_l1cache", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_L1CACHE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_core", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_CORE;
|
||||
}
|
||||
ompi_info_get_bool(array_of_info[i], "bind_to_hwthread", &local_bynode, &flag);
|
||||
if ( flag ) {
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == jdata->map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
jdata->map->binding |= OPAL_BIND_TO_HWTHREAD;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -146,19 +146,10 @@ wdir char * Directory where the executable is
|
||||
\fIompi_preload_files_dest_dir\fP.
|
||||
ompi_prefix char * Same as the --prefix command line
|
||||
argument to mpirun.
|
||||
ompi_local_slave bool If set to true, launch the specified
|
||||
process as a local \fIslave\fP to the
|
||||
calling process. The new process will
|
||||
only be known to the caller, and will
|
||||
only be able to communicate with the
|
||||
caller.
|
||||
ompi_preload_binary bool If set to true, pre-position the
|
||||
specified executable onto the remote
|
||||
host. A destination directory must
|
||||
also be provided.
|
||||
ompi_preload_files_dest_dir
|
||||
char * Target directory where pre-positioned
|
||||
files are to be placed.
|
||||
ompi_preload_files char * A comma-separated list of files that
|
||||
are to be pre-positioned in addition
|
||||
to the executable. Note that this
|
||||
@ -166,12 +157,8 @@ ompi_preload_files char * A comma-separated list of files that
|
||||
\fIompi_preload_binary\fP - files can
|
||||
be moved to the target even if an
|
||||
executable is not moved.
|
||||
ompi_preload_files_src_dir
|
||||
char * Source directory where files and
|
||||
executables that are to be
|
||||
pre-positioned can be found. If not
|
||||
specified, the current working
|
||||
directory will be used.
|
||||
ompi_stdin_target char* Comma-delimited list of ranks to
|
||||
receive stdin when forwarded.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL. Failure to set
|
||||
@ -183,12 +170,25 @@ ompi_param char * Pass an OMPI MCA parameter to the
|
||||
exists in the environment, the value
|
||||
will be overwritten by the provided
|
||||
value.
|
||||
map_bynode bool If set to true, the processes are
|
||||
mapped bynode. If set to false, the
|
||||
processes are mapped byslot. By
|
||||
default, mapping is determined by the
|
||||
default mapping policy set when the
|
||||
job was started.
|
||||
mapper char* Mapper to be used for this job
|
||||
map_by char* Mapping directive indicating how
|
||||
processes are to be mapped (slot,
|
||||
node, socket, etc.).
|
||||
rank_by char * Ranking directive indicating how
|
||||
processes are to be ranked (slot,
|
||||
node, socket, etc.).
|
||||
bind_to char * Binding directive indicating how
|
||||
processes are to be bound (core, slot,
|
||||
node, socket, etc.).
|
||||
path char* List of directories to search for
|
||||
the executable
|
||||
npernode char* Number of processes to spawn on
|
||||
each node of the allocation
|
||||
pernode bool Equivalent to npernode of 1
|
||||
ppr char* Spawn specified number of processes
|
||||
on each of the identified object type
|
||||
env char* Newline-delimited list of envars to
|
||||
be passed to the spawned procs
|
||||
.fi
|
||||
|
||||
\fIbool\fP info keys are actually strings but are evaluated as
|
||||
|
@ -150,19 +150,10 @@ wdir char * Directory where the executable is
|
||||
\fIompi_preload_files_dest_dir\fP.
|
||||
ompi_prefix char * Same as the --prefix command line
|
||||
argument to mpirun.
|
||||
ompi_local_slave bool If set to true, launch the specified
|
||||
process as a local \fIslave\fP to the
|
||||
calling process. The new process will
|
||||
only be known to the caller, and will
|
||||
only be able to communicate with the
|
||||
caller.
|
||||
ompi_preload_binary bool If set to true, pre-position the
|
||||
specified executable onto the remote
|
||||
host. A destination directory must
|
||||
also be provided.
|
||||
ompi_preload_files_dest_dir
|
||||
char * Target directory where pre-positioned
|
||||
files are to be placed.
|
||||
ompi_preload_files char * A comma-separated list of files that
|
||||
are to be pre-positioned in addition
|
||||
to the executable. Note that this
|
||||
@ -170,12 +161,8 @@ ompi_preload_files char * A comma-separated list of files that
|
||||
\fIompi_preload_binary\fP - files can
|
||||
be moved to the target even if an
|
||||
executable is not moved.
|
||||
ompi_preload_files_src_dir
|
||||
char * Source directory where files and
|
||||
executables that are to be
|
||||
pre-positioned can be found. If not
|
||||
specified, the current working
|
||||
directory will be used.
|
||||
ompi_stdin_target char* Comma-delimited list of ranks to
|
||||
receive stdin when forwarded.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL. Failure to set
|
||||
@ -187,12 +174,25 @@ ompi_param char * Pass an OMPI MCA parameter to the
|
||||
exists in the environment, the value
|
||||
will be overwritten by the provided
|
||||
value.
|
||||
map_bynode bool If set to true, the processes are
|
||||
mapped bynode. If set to false, the
|
||||
processes are mapped byslot. By
|
||||
default, mapping is determined by the
|
||||
default mapping policy set when the
|
||||
job was started.
|
||||
mapper char* Mapper to be used for this job
|
||||
map_by char* Mapping directive indicating how
|
||||
processes are to be mapped (slot,
|
||||
node, socket, etc.).
|
||||
rank_by char * Ranking directive indicating how
|
||||
processes are to be ranked (slot,
|
||||
node, socket, etc.).
|
||||
bind_to char * Binding directive indicating how
|
||||
processes are to be bound (core, slot,
|
||||
node, socket, etc.).
|
||||
path char* List of directories to search for
|
||||
the executable
|
||||
npernode char* Number of processes to spawn on
|
||||
each node of the allocation
|
||||
pernode bool Equivalent to npernode of 1
|
||||
ppr char* Spawn specified number of processes
|
||||
on each of the identified object type
|
||||
env char* Newline-delimited list of envars to
|
||||
be passed to the spawned procs
|
||||
.fi
|
||||
|
||||
.sp
|
||||
|
@ -86,6 +86,8 @@ OPAL_DECLSPEC extern char *opal_hwloc_base_topo_file;
|
||||
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
|
||||
char *cpuset1, char *cpuset2);
|
||||
|
||||
OPAL_DECLSPEC int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec);
|
||||
|
||||
/**
|
||||
* Loads opal_hwloc_my_cpuset (global variable in
|
||||
* opal/mca/hwloc/hwloc.h) for this process. opal_hwloc_my_cpuset
|
||||
|
@ -183,55 +183,12 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
int i, value;
|
||||
int rc;
|
||||
opal_data_type_t tmp;
|
||||
char **tmpvals, **quals;
|
||||
|
||||
/* binding specification */
|
||||
if (NULL == opal_hwloc_base_binding_policy) {
|
||||
/* default to bind-to core, and that no binding policy was specified */
|
||||
opal_hwloc_binding_policy = OPAL_BIND_TO_CORE;
|
||||
} else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NONE);
|
||||
} else {
|
||||
tmpvals = opal_argv_split(opal_hwloc_base_binding_policy, ':');
|
||||
if (1 < opal_argv_count(tmpvals)) {
|
||||
quals = opal_argv_split(tmpvals[1], ',');
|
||||
for (i=0; NULL != quals[i]; i++) {
|
||||
if (0 == strcasecmp(quals[i], "if-supported")) {
|
||||
opal_hwloc_binding_policy |= OPAL_BIND_IF_SUPPORTED;
|
||||
} else if (0 == strcasecmp(quals[i], "overload-allowed")) {
|
||||
opal_hwloc_binding_policy |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||
} else {
|
||||
/* unknown option */
|
||||
opal_output(0, "Unknown qualifier to orte_process_binding: %s", opal_hwloc_base_binding_policy);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
}
|
||||
opal_argv_free(quals);
|
||||
}
|
||||
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "core")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_L1CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_L2CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_L3CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NUMA);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_BOARD);
|
||||
} else {
|
||||
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", opal_hwloc_base_binding_policy);
|
||||
opal_argv_free(tmpvals);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
opal_argv_free(tmpvals);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&opal_hwloc_binding_policy,
|
||||
opal_hwloc_base_binding_policy))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (opal_hwloc_base_bind_to_core) {
|
||||
@ -290,14 +247,14 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
/* declare the hwloc data types */
|
||||
tmp = OPAL_HWLOC_TOPO;
|
||||
if (OPAL_SUCCESS != (value = opal_dss.register_type(opal_hwloc_pack,
|
||||
opal_hwloc_unpack,
|
||||
(opal_dss_copy_fn_t)opal_hwloc_copy,
|
||||
(opal_dss_compare_fn_t)opal_hwloc_compare,
|
||||
(opal_dss_print_fn_t)opal_hwloc_print,
|
||||
OPAL_DSS_STRUCTURED,
|
||||
"OPAL_HWLOC_TOPO", &tmp))) {
|
||||
return value;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_hwloc_pack,
|
||||
opal_hwloc_unpack,
|
||||
(opal_dss_copy_fn_t)opal_hwloc_copy,
|
||||
(opal_dss_compare_fn_t)opal_hwloc_compare,
|
||||
(opal_dss_print_fn_t)opal_hwloc_print,
|
||||
OPAL_DSS_STRUCTURED,
|
||||
"OPAL_HWLOC_TOPO", &tmp))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -530,4 +487,65 @@ OBJ_CLASS_INSTANCE(orte_rmaps_numa_node_t,
|
||||
opal_list_item_t,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec)
|
||||
{
|
||||
int i;
|
||||
opal_binding_policy_t tmp;
|
||||
char **tmpvals, **quals;
|
||||
|
||||
/* set default */
|
||||
tmp = 0;
|
||||
|
||||
/* binding specification */
|
||||
if (NULL == spec) {
|
||||
/* default to bind-to core, and that no binding policy was specified */
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
||||
tmp &= ~OPAL_BIND_GIVEN;
|
||||
} else if (0 == strncasecmp(spec, "none", strlen("none"))) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NONE);
|
||||
} else {
|
||||
tmpvals = opal_argv_split(spec, ':');
|
||||
if (1 < opal_argv_count(tmpvals)) {
|
||||
quals = opal_argv_split(tmpvals[1], ',');
|
||||
for (i=0; NULL != quals[i]; i++) {
|
||||
if (0 == strcasecmp(quals[i], "if-supported")) {
|
||||
tmp |= OPAL_BIND_IF_SUPPORTED;
|
||||
} else if (0 == strcasecmp(quals[i], "overload-allowed")) {
|
||||
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||
} else {
|
||||
/* unknown option */
|
||||
opal_output(0, "Unknown qualifier to orte_process_binding: %s", spec);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
}
|
||||
opal_argv_free(quals);
|
||||
}
|
||||
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "core")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
||||
} else {
|
||||
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
||||
opal_argv_free(tmpvals);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
opal_argv_free(tmpvals);
|
||||
}
|
||||
|
||||
*policy = tmp;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
@ -122,6 +122,12 @@ ORTE_DECLSPEC int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
|
||||
opal_list_t *nodes,
|
||||
bool remove);
|
||||
|
||||
ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
||||
char **device, char *spec);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy,
|
||||
orte_mapping_policy_t mapping,
|
||||
char *spec);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -233,11 +233,7 @@ static int orte_rmaps_base_close(void)
|
||||
*/
|
||||
static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
int i, rc;
|
||||
orte_mapping_policy_t tmp=0;
|
||||
orte_ranking_policy_t rtmp=0;
|
||||
char **ck, **ck2;
|
||||
size_t len;
|
||||
int rc;
|
||||
|
||||
/* init the globals */
|
||||
OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
|
||||
@ -260,187 +256,16 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (NULL == rmaps_base_mapping_policy) {
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
} else {
|
||||
ck = opal_argv_split(rmaps_base_mapping_policy, ':');
|
||||
if (2 < opal_argv_count(ck)) {
|
||||
/* incorrect format */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (2 == opal_argv_count(ck)) {
|
||||
/* if the policy is "dist", then we set the policy to that value
|
||||
* and save the second argument as the device
|
||||
*/
|
||||
#if OPAL_HAVE_HWLOC
|
||||
if (0 == strncasecmp(ck[0], "dist", strlen(ck[0]))) {
|
||||
tmp = ORTE_MAPPING_BYDIST;
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
if (ck2[0] != NULL) {
|
||||
orte_rmaps_base.device = strdup(ck2[0]);
|
||||
for (i=1; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
orte_rmaps_base.mapping |= ORTE_MAPPING_SPAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
goto setpolicy;
|
||||
}
|
||||
#endif
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
for (i=0; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
|
||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* error - cannot redefine the default mapping policy */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) {
|
||||
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* error - cannot redefine the default mapping policy */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"nooversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "mapping", ck2[i]);
|
||||
opal_argv_free(ck);
|
||||
opal_argv_free(ck2);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
}
|
||||
len = strlen(ck[0]);
|
||||
if (0 == strncasecmp(ck[0], "slot", len)) {
|
||||
tmp = ORTE_MAPPING_BYSLOT;
|
||||
} else if (0 == strncasecmp(ck[0], "node", len)) {
|
||||
tmp = ORTE_MAPPING_BYNODE;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
} else if (0 == strncasecmp(ck[0], "core", len)) {
|
||||
tmp = ORTE_MAPPING_BYCORE;
|
||||
} else if (0 == strncasecmp(ck[0], "l1cache", len)) {
|
||||
tmp = ORTE_MAPPING_BYL1CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "l2cache", len)) {
|
||||
tmp = ORTE_MAPPING_BYL2CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "l3cache", len)) {
|
||||
tmp = ORTE_MAPPING_BYL3CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "socket", len)) {
|
||||
tmp = ORTE_MAPPING_BYSOCKET;
|
||||
} else if (0 == strncasecmp(ck[0], "numa", len)) {
|
||||
tmp = ORTE_MAPPING_BYNUMA;
|
||||
} else if (0 == strncasecmp(ck[0], "board", len)) {
|
||||
tmp = ORTE_MAPPING_BYBOARD;
|
||||
} else if (0 == strncasecmp(ck[0], "hwthread", len)) {
|
||||
tmp = ORTE_MAPPING_BYHWTHREAD;
|
||||
/* if we are mapping processes to individual hwthreads, then
|
||||
* we need to treat those hwthreads as separate cpus
|
||||
*/
|
||||
opal_hwloc_use_hwthreads_as_cpus = true;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
#if OPAL_HAVE_HWLOC
|
||||
setpolicy:
|
||||
#endif
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, tmp);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
opal_argv_free(ck);
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
|
||||
&orte_rmaps_base.device,
|
||||
rmaps_base_mapping_policy))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (NULL == rmaps_base_ranking_policy) {
|
||||
/* check for map-by object directives - we set the
|
||||
* ranking to match if one was given
|
||||
*/
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
if (ORTE_MAPPING_BYCORE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE);
|
||||
} else if (ORTE_MAPPING_BYNODE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
|
||||
} else if (ORTE_MAPPING_BYL1CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L1CACHE);
|
||||
} else if (ORTE_MAPPING_BYL2CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L2CACHE);
|
||||
} else if (ORTE_MAPPING_BYL3CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L3CACHE);
|
||||
} else if (ORTE_MAPPING_BYSOCKET & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SOCKET);
|
||||
} else if (ORTE_MAPPING_BYNUMA & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NUMA);
|
||||
} else if (ORTE_MAPPING_BYBOARD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_BOARD);
|
||||
} else if (ORTE_MAPPING_BYHWTHREAD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_HWTHREAD);
|
||||
}
|
||||
} else {
|
||||
/* if no map-by was given, default to by-slot */
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
|
||||
}
|
||||
} else {
|
||||
ck = opal_argv_split(rmaps_base_ranking_policy, ':');
|
||||
if (2 < opal_argv_count(ck)) {
|
||||
/* incorrect format */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", rmaps_base_ranking_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (2 == opal_argv_count(ck)) {
|
||||
if (0 == strncasecmp(ck[1], "span", strlen(ck[1]))) {
|
||||
orte_rmaps_base.ranking |= ORTE_RANKING_SPAN;
|
||||
} else if (0 == strncasecmp(ck[1], "fill", strlen(ck[1]))) {
|
||||
orte_rmaps_base.ranking |= ORTE_RANKING_FILL;
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "ranking", ck[1]);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
len = strlen(ck[0]);
|
||||
if (0 == strncasecmp(ck[0], "slot", len)) {
|
||||
rtmp = ORTE_RANK_BY_SLOT;
|
||||
} else if (0 == strncasecmp(ck[0], "node", len)) {
|
||||
rtmp = ORTE_RANK_BY_NODE;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
} else if (0 == strncasecmp(ck[0], "hwthread", len)) {
|
||||
rtmp = ORTE_RANK_BY_HWTHREAD;
|
||||
} else if (0 == strncasecmp(ck[0], "core", len)) {
|
||||
rtmp = ORTE_RANK_BY_CORE;
|
||||
} else if (0 == strncasecmp(ck[0], "l1cache", len)) {
|
||||
rtmp = ORTE_RANK_BY_L1CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "l2cache", len)) {
|
||||
rtmp = ORTE_RANK_BY_L2CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "l3cache", len)) {
|
||||
rtmp = ORTE_RANK_BY_L3CACHE;
|
||||
} else if (0 == strncasecmp(ck[0], "socket", len)) {
|
||||
rtmp = ORTE_RANK_BY_SOCKET;
|
||||
} else if (0 == strncasecmp(ck[0], "numa", len)) {
|
||||
rtmp = ORTE_RANK_BY_NUMA;
|
||||
} else if (0 == strncasecmp(ck[0], "board", len)) {
|
||||
rtmp = ORTE_RANK_BY_BOARD;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", rmaps_base_ranking_policy);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, rtmp);
|
||||
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking,
|
||||
orte_rmaps_base.mapping,
|
||||
rmaps_base_ranking_policy))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (rmaps_base_byslot) {
|
||||
@ -553,3 +378,208 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rmaps, "ORTE Mapping Subsystem",
|
||||
OBJ_CLASS_INSTANCE(orte_rmaps_base_selected_module_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
|
||||
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
||||
char **device, char *spec)
|
||||
{
|
||||
char **ck, **ck2;
|
||||
orte_mapping_policy_t tmp;
|
||||
int i;
|
||||
size_t len;
|
||||
|
||||
/* set defaults */
|
||||
tmp = 0;
|
||||
*device = NULL;
|
||||
|
||||
if (NULL == spec) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN);
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||
} else {
|
||||
ck = opal_argv_split(spec, ':');
|
||||
if (2 < opal_argv_count(ck)) {
|
||||
/* incorrect format */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (2 == opal_argv_count(ck)) {
|
||||
/* if the policy is "dist", then we set the policy to that value
|
||||
* and save the second argument as the device
|
||||
*/
|
||||
#if OPAL_HAVE_HWLOC
|
||||
if (0 == strncasecmp(ck[0], "dist", strlen(ck[0]))) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
if (ck2[0] != NULL) {
|
||||
*device = strdup(ck2[0]);
|
||||
for (i=1; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN);
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
goto setpolicy;
|
||||
}
|
||||
#endif
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
for (i=0; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN);
|
||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(tmp)) {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "mapping", ck2[i]);
|
||||
opal_argv_free(ck);
|
||||
opal_argv_free(ck2);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
}
|
||||
}
|
||||
len = strlen(ck[0]);
|
||||
if (0 == strncasecmp(ck[0], "slot", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
|
||||
} else if (0 == strncasecmp(ck[0], "node", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
|
||||
#if OPAL_HAVE_HWLOC
|
||||
} else if (0 == strncasecmp(ck[0], "core", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
|
||||
} else if (0 == strncasecmp(ck[0], "l1cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l2cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l3cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "socket", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||
} else if (0 == strncasecmp(ck[0], "numa", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
|
||||
} else if (0 == strncasecmp(ck[0], "board", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
|
||||
} else if (0 == strncasecmp(ck[0], "hwthread", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
|
||||
/* if we are mapping processes to individual hwthreads, then
|
||||
* we need to treat those hwthreads as separate cpus
|
||||
*/
|
||||
opal_hwloc_use_hwthreads_as_cpus = true;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
opal_argv_free(ck);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
setpolicy:
|
||||
#endif
|
||||
*policy = tmp;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy,
|
||||
orte_mapping_policy_t mapping,
|
||||
char *spec)
|
||||
{
|
||||
orte_ranking_policy_t tmp;
|
||||
char **ck;
|
||||
size_t len;
|
||||
|
||||
/* set default */
|
||||
tmp = 0;
|
||||
|
||||
if (NULL == spec) {
|
||||
/* check for map-by object directives - we set the
|
||||
* ranking to match if one was given
|
||||
*/
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(mapping)) {
|
||||
if (ORTE_MAPPING_BYCORE & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_CORE);
|
||||
} else if (ORTE_MAPPING_BYNODE & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NODE);
|
||||
} else if (ORTE_MAPPING_BYL1CACHE & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L1CACHE);
|
||||
} else if (ORTE_MAPPING_BYL2CACHE & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L2CACHE);
|
||||
} else if (ORTE_MAPPING_BYL3CACHE & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L3CACHE);
|
||||
} else if (ORTE_MAPPING_BYSOCKET & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SOCKET);
|
||||
} else if (ORTE_MAPPING_BYNUMA & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NUMA);
|
||||
} else if (ORTE_MAPPING_BYBOARD & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_BOARD);
|
||||
} else if (ORTE_MAPPING_BYHWTHREAD & ORTE_GET_MAPPING_POLICY(mapping)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_HWTHREAD);
|
||||
}
|
||||
} else {
|
||||
/* if no map-by was given, default to by-slot */
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
|
||||
}
|
||||
} else {
|
||||
ck = opal_argv_split(spec, ':');
|
||||
if (2 < opal_argv_count(ck)) {
|
||||
/* incorrect format */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (2 == opal_argv_count(ck)) {
|
||||
if (0 == strncasecmp(ck[1], "span", strlen(ck[1]))) {
|
||||
ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_SPAN);
|
||||
} else if (0 == strncasecmp(ck[1], "fill", strlen(ck[1]))) {
|
||||
ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_FILL);
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "ranking", ck[1]);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
len = strlen(ck[0]);
|
||||
if (0 == strncasecmp(ck[0], "slot", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SLOT);
|
||||
} else if (0 == strncasecmp(ck[0], "node", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NODE);
|
||||
#if OPAL_HAVE_HWLOC
|
||||
} else if (0 == strncasecmp(ck[0], "hwthread", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_HWTHREAD);
|
||||
} else if (0 == strncasecmp(ck[0], "core", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_CORE);
|
||||
} else if (0 == strncasecmp(ck[0], "l1cache", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L1CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l2cache", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L2CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l3cache", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_L3CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "socket", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_SOCKET);
|
||||
} else if (0 == strncasecmp(ck[0], "numa", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_NUMA);
|
||||
} else if (0 == strncasecmp(ck[0], "board", len)) {
|
||||
ORTE_SET_RANKING_POLICY(tmp, ORTE_RANK_BY_BOARD);
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "ranking", rmaps_base_ranking_policy);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
opal_argv_free(ck);
|
||||
ORTE_SET_RANKING_DIRECTIVE(tmp, ORTE_RANKING_GIVEN);
|
||||
}
|
||||
|
||||
*policy = tmp;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -100,12 +100,18 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
"mca:rmaps: nprocs %s",
|
||||
ORTE_VPID_PRINT(nprocs));
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps mapping given - using default");
|
||||
map->mapping = orte_rmaps_base.mapping;
|
||||
} else {
|
||||
/* default based on number of procs */
|
||||
if (nprocs <= 2) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps mapping not given - using byslot");
|
||||
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
} else {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps mapping not given - using bysocket");
|
||||
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSOCKET);
|
||||
}
|
||||
}
|
||||
@ -141,8 +147,12 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||
/* default based on number of procs */
|
||||
if (nprocs <= 2) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps mapping not set by user - using byslot");
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
} else {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps mapping not set by user - using bysocket");
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET);
|
||||
}
|
||||
}
|
||||
|
@ -493,9 +493,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
} else {
|
||||
if (node->slots > node->slots_inuse) {
|
||||
/* add the available slots */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s node %s has %d slots available",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name, node->slots));
|
||||
num_slots += node->slots - node->slots_inuse;
|
||||
} else {
|
||||
/* always allocate at least one */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
|
||||
"%s node %s has %d slots %d used",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name, node->slots, node->slots_inuse));
|
||||
num_slots++;
|
||||
}
|
||||
}
|
||||
|
@ -472,17 +472,22 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* compute how many extra procs to put on each node */
|
||||
balance = (float)(((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots) / (float)opal_list_get_size(node_list);
|
||||
extra_procs_to_assign = (int)balance;
|
||||
if (0 < (balance - (float)extra_procs_to_assign)) {
|
||||
/* compute how many nodes need an extra proc */
|
||||
nxtra_nodes = ((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots - (extra_procs_to_assign * opal_list_get_size(node_list));
|
||||
/* add one so that we add an extra proc to the first nodes
|
||||
* until all procs are mapped
|
||||
*/
|
||||
extra_procs_to_assign++;
|
||||
/* flag that we added one */
|
||||
add_one = true;
|
||||
if (1 == opal_list_get_size(node_list)) {
|
||||
/* if there is only one node, then they all have to go on it */
|
||||
extra_procs_to_assign = app->num_procs;
|
||||
} else {
|
||||
balance = (float)(((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots) / (float)opal_list_get_size(node_list);
|
||||
extra_procs_to_assign = (int)balance;
|
||||
if (0 < (balance - (float)extra_procs_to_assign)) {
|
||||
/* compute how many nodes need an extra proc */
|
||||
nxtra_nodes = ((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots - (extra_procs_to_assign * opal_list_get_size(node_list));
|
||||
/* add one so that we add an extra proc to the first nodes
|
||||
* until all procs are mapped
|
||||
*/
|
||||
extra_procs_to_assign++;
|
||||
/* flag that we added one */
|
||||
add_one = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,11 +12,11 @@ int main(int argc, char* argv[])
|
||||
char hostname[512];
|
||||
pid_t pid;
|
||||
|
||||
pid = getpid();
|
||||
printf("[pid %ld] starting up!\n", (long)pid);
|
||||
pid = getpid();
|
||||
printf("[pid %ld] starting up!\n", (long)pid);
|
||||
MPI_Init(NULL, NULL);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
printf("%d completed MPI_Init\n", rank);
|
||||
printf("%d completed MPI_Init\n", rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
MPI_Comm_get_parent(&parent);
|
||||
/* If we get COMM_NULL back, then we're the parent */
|
||||
@ -24,7 +24,7 @@ printf("%d completed MPI_Init\n", rank);
|
||||
pid = getpid();
|
||||
printf("Parent [pid %ld] about to spawn!\n", (long)pid);
|
||||
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 3, MPI_INFO_NULL,
|
||||
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
|
||||
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
|
||||
printf("Child failed to spawn\n");
|
||||
return rc;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user