diff --git a/opal/dss/dss.h b/opal/dss/dss.h index 35e3589577..a9f4deedf8 100644 --- a/opal/dss/dss.h +++ b/opal/dss/dss.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +33,16 @@ BEGIN_C_DECLS +/* Provide a macro for determining the bool value of an opal_value_t */ +#define OPAL_CHECK_BOOL(v, p) \ + do { \ + if (OPAL_UNDEF == (v)->type) { \ + (p) = true; \ + } else { \ + (p) = (v)->data.flag; \ + } \ + } while(0) + /* A non-API function for something that happens in a number * of places throughout the code base - loading a value into * an opal_value_t structure diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index de86c4ae49..0ad5780efc 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -344,6 +344,21 @@ typedef uint32_t pmix_rank_t; // job - i.e., not part of the "comm_world" of the job #define PMIX_SET_SESSION_CWD "pmix.ssncwd" // (bool) set the application's current working directory to // the session working directory assigned by the RM +#define PMIX_TAG_OUTPUT "pmix.tagout" // (bool) tag application output with the ID of the source +#define PMIX_TIMESTAMP_OUTPUT "pmix.tsout" // (bool) timestamp output from applications +#define PMIX_MERGE_STDERR_STDOUT "pmix.mergeerrout" // (bool) merge stdout and stderr streams from application procs +#define PMIX_OUTPUT_TO_FILE "pmix.outfile" // (char*) output application output to given file +#define PMIX_INDEX_ARGV "pmix.indxargv" // (bool) mark the argv with the rank of the proc +#define PMIX_CPUS_PER_PROC "pmix.cpuperproc" // (uint32_t) #cpus to assign to each rank +#define PMIX_NO_PROCS_ON_HEAD "pmix.nolocal" // (bool) do not place procs on the head node +#define PMIX_NO_OVERSUBSCRIBE "pmix.noover" // (bool) do not oversubscribe the cpus +#define PMIX_REPORT_BINDINGS "pmix.repbind" // (bool) report bindings of the individual procs +#define PMIX_CPU_LIST "pmix.cpulist" // (char*) list of cpus to use for this job +#define PMIX_JOB_RECOVERABLE "pmix.recover" // (bool) application supports recoverable operations +#define PMIX_JOB_CONTINUOUS "pmix.continuous" // (bool) application is continuous, all failed procs should + // be immediately restarted +#define PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job + /* query attributes */ #define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 35b32d2ada..0a612af151 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -471,7 +471,7 @@ static pmix_status_t parse_uri_file(char *filename, * user isn't authorized to access it - or it may just * not exist yet! Check for existence */ if (0 != access(filename, R_OK)) { - if (ENOENT == errno) { + if (ENOENT == errno && 0 < mca_ptl_tcp_component.wait_to_connect) { /* the file does not exist, so give it * a little time to see if the server * is still starting up */ @@ -979,6 +979,7 @@ static pmix_status_t df_search(char *dirname, char *prefix, } newdir = pmix_os_path(false, dirname, dir_entry->d_name, NULL); if (-1 == stat(newdir, &buf)) { + free(newdir); continue; } /* if it is a directory, down search */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index 6ec1f259f6..db76b13dee 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -343,10 +343,12 @@ void pmix2x_server_deregister_nspace(opal_jobid_t jobid, if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ OPAL_PMIX_CONSTRUCT_LOCK(&lock); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); OPAL_PMIX_WAIT_THREAD(&lock); OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); break; diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index e1104dc9dd..4b2ebfc478 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -259,6 +259,21 @@ BEGIN_C_DECLS // job - i.e., not part of the "comm_world" of the job #define OPAL_PMIX_SET_SESSION_CWD "pmix.ssncwd" // (bool) set the application's current working directory to // the session working directory assigned by the RM +#define OPAL_PMIX_TAG_OUTPUT "pmix.tagout" // (bool) tag application output with the ID of the source +#define OPAL_PMIX_TIMESTAMP_OUTPUT "pmix.tsout" // (bool) timestamp output from applications +#define OPAL_PMIX_MERGE_STDERR_STDOUT "pmix.mergeerrout" // (bool) merge stdout and stderr streams from application procs +#define OPAL_PMIX_OUTPUT_TO_FILE "pmix.outfile" // (char*) output application output to given file +#define OPAL_PMIX_INDEX_ARGV "pmix.indxargv" // (bool) mark the argv with the rank of the proc +#define OPAL_PMIX_CPUS_PER_PROC "pmix.cpuperproc" // (uint32_t) #cpus to assign to each rank +#define OPAL_PMIX_NO_PROCS_ON_HEAD "pmix.nolocal" // (bool) do not place procs on the head node +#define OPAL_PMIX_NO_OVERSUBSCRIBE "pmix.noover" // (bool) do not oversubscribe the cpus +#define OPAL_PMIX_REPORT_BINDINGS "pmix.repbind" // (bool) report bindings of the individual procs +#define OPAL_PMIX_CPU_LIST "pmix.cpulist" // (char*) list of cpus to use for this job +#define OPAL_PMIX_JOB_RECOVERABLE "pmix.recover" // (bool) application supports recoverable operations +#define OPAL_PMIX_JOB_CONTINUOUS "pmix.continuous" // (bool) application is continuous, all failed procs should + // be immediately restarted +#define OPAL_PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job + /* query attributes */ #define OPAL_PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces @@ -282,6 +297,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation // for the specified nspace + /* log attributes */ #define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr #define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 67a7ef3ad6..08f9efb767 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -96,7 +96,7 @@ static int opal_err2str(int errnum, const char **errmsg) { const char *retval; -opal_output(0, "OPAL ERR2STR %d", errnum); + switch (errnum) { case OPAL_SUCCESS: retval = "Success"; diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index e4799856a2..ef1b44da91 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -145,6 +145,8 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); ++(jdata->map->num_nodes); } if (add_one) { @@ -284,6 +286,8 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, /* add this node to the map, but only do so once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); ++(jdata->map->num_nodes); } if (oversubscribed) { @@ -532,6 +536,8 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); ++(jdata->map->num_nodes); } nmapped = 0; @@ -678,6 +684,8 @@ static int byobj_span(orte_job_t *jdata, /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); ++(jdata->map->num_nodes); } /* get the number of objects of this type on this node */ diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 6271281567..d04d81ec40 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -502,11 +502,6 @@ static opal_cmd_line_init_t cmd_line_init[] = { "Create a persistent distributed virtual machine (DVM)", OPAL_CMD_LINE_OTYPE_DVM }, - /* tell the dvm to terminate */ - { NULL, '\0', "terminate", "terminate", 0, - &orte_cmd_options.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, - "Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM }, - /* fwd mpirun port */ { "orte_fwd_mpirun_port", '\0', "fwd-mpirun-port", "fwd-mpirun-port", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index ad6111a263..3462df57bd 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -450,7 +450,7 @@ static void check_complete(int fd, short args, void *cbdata) * we call the errmgr so that any attempt to restart the job will * avoid doing so in the exact same place as the current job */ - if (NULL != jdata->map && jdata->state == ORTE_JOB_STATE_TERMINATED) { + if (NULL != jdata->map) { map = jdata->map; for (index = 0; index < map->nodes->size; index++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) { diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 19f7f690bf..6e4c725cd7 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -820,7 +820,6 @@ int orte_submit_job(char *argv[], int *index, orte_set_attribute(&jdata->attributes, ORTE_JOB_MERGE_STDERR_STDOUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } - /* check what user wants us to do with stdin */ if (NULL != orte_cmd_options.stdin_target) { if (0 == strcmp(orte_cmd_options.stdin_target, "all")) { @@ -902,7 +901,7 @@ int orte_submit_job(char *argv[], int *index, ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL); } if (orte_cmd_options.no_oversubscribe) { - ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); } if (orte_cmd_options.oversubscribe) { ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 0e29e23ef8..a5701cd685 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -42,6 +42,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/base.h" +#include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/util/threads.h" @@ -59,6 +60,7 @@ void pmix_server_launch_resp(int status, orte_process_name_t* sender, int rc, room; int32_t ret, cnt; orte_jobid_t jobid; + orte_job_t *jdata; /* unpack the status */ cnt = 1; @@ -93,6 +95,11 @@ void pmix_server_launch_resp(int status, orte_process_name_t* sender, if (NULL != req->spcbfunc) { req->spcbfunc(ret, jobid, req->cbdata); } + /* if we failed to launch, then ensure we cleanup */ + if (ORTE_SUCCESS != ret) { + jdata = orte_get_job_data_object(jobid); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); + } /* cleanup */ OBJ_RELEASE(req); } @@ -164,8 +171,9 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, opal_pmix_app_t *papp; opal_value_t *info, *next; opal_list_t *cache; - int rc; + int rc, i; char cwd[OPAL_PATH_MAX]; + bool flag; opal_output_verbose(2, orte_pmix_server_globals.output, "%s spawn called from proc %s", @@ -176,108 +184,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, jdata = OBJ_NEW(orte_job_t); jdata->map = OBJ_NEW(orte_job_map_t); - /* transfer the job info across */ - OPAL_LIST_FOREACH_SAFE(info, next, job_info, opal_value_t) { - if (0 == strcmp(info->key, OPAL_PMIX_PERSONALITY)) { - jdata->personality = opal_argv_split(info->data.string, ','); - } else if (0 == strcmp(info->key, OPAL_PMIX_MAPPER)) { - jdata->map->req_mapper = strdup(info->data.string); - } else if (0 == strcmp(info->key, OPAL_PMIX_DISPLAY_MAP)) { - jdata->map->display_map = true; - } else if (0 == strcmp(info->key, OPAL_PMIX_PPR)) { - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "mapping", info->data.string, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_BAD_PARAM; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - jdata->map->ppr = strdup(info->data.string); - } else if (0 == strcmp(info->key, OPAL_PMIX_MAPBY)) { - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "mapping", info->data.string, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_BAD_PARAM; - } - rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, - NULL, info->data.string); - if (ORTE_SUCCESS != rc) { - return rc; - } - } else if (0 == strcmp(info->key, OPAL_PMIX_RANKBY)) { - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "ranking", info->data.string, - orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); - return ORTE_ERR_BAD_PARAM; - } - rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, - jdata->map->mapping, - info->data.string); - if (ORTE_SUCCESS != rc) { - return rc; - } - } else if (0 == strcmp(info->key, OPAL_PMIX_BINDTO)) { - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - info->data.string, - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return ORTE_ERR_BAD_PARAM; - } - rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, - info->data.string); - if (ORTE_SUCCESS != rc) { - return rc; - } - } else if (0 == strcmp(info->key, OPAL_PMIX_NON_PMI)) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (0 == strcmp(info->key, OPAL_PMIX_REQUESTOR_IS_TOOL)) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - /* request that IO be forwarded to the requesting tool */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_FWDIO_TO_TOOL, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (0 == strcmp(info->key, OPAL_PMIX_STDIN_TGT)) { - if (0 == strcmp(info->data.string, "all")) { - jdata->stdin_target = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(info->data.string, "none")) { - jdata->stdin_target = ORTE_VPID_INVALID; - } else { - jdata->stdin_target = strtoul(info->data.string, NULL, 10); - } - } else if (0 == strcmp(info->key, OPAL_PMIX_NOTIFY_COMPLETION)) { - if (OPAL_UNDEF == info->type || info->data.flag) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - } else if (0 == strcmp(info->key, OPAL_PMIX_DEBUG_STOP_ON_EXEC)) { - /* we don't know how to do this */ - return ORTE_ERR_NOT_SUPPORTED; - } else { - /* cache for inclusion with job info at registration */ - cache = NULL; - opal_list_remove_item(job_info, &info->super); - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && - NULL != cache) { - opal_list_append(cache, &info->super); - } else { - cache = OBJ_NEW(opal_list_t); - opal_list_append(cache, &info->super); - orte_set_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR); - } - } - } - /* if the job is missing a personality setting, add it */ - if (NULL == jdata->personality) { - opal_argv_append_nosize(&jdata->personality, "ompi"); - } - /* transfer the apps across */ OPAL_LIST_FOREACH(papp, apps, opal_pmix_app_t) { app = OBJ_NEW(orte_app_context_t); @@ -334,8 +240,9 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, app->cwd = opal_os_path(false, cwd, info->data.string, NULL); } } else if (0 == strcmp(info->key, OPAL_PMIX_PRELOAD_BIN)) { + OPAL_CHECK_BOOL(info, flag); orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, - ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); } else if (0 == strcmp(info->key, OPAL_PMIX_PRELOAD_FILES)) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_GLOBAL, info->data.string, OPAL_STRING); @@ -347,9 +254,223 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, } } + /* transfer the job info across */ + OPAL_LIST_FOREACH_SAFE(info, next, job_info, opal_value_t) { + /*** PERSONALITY ***/ + if (0 == strcmp(info->key, OPAL_PMIX_PERSONALITY)) { + jdata->personality = opal_argv_split(info->data.string, ','); + + /*** REQUESTED MAPPER ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_MAPPER)) { + jdata->map->req_mapper = strdup(info->data.string); + + /*** DISPLAY MAP ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_DISPLAY_MAP)) { + OPAL_CHECK_BOOL(info, jdata->map->display_map); + + /*** PPR (PROCS-PER-RESOURCE) ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_PPR)) { + if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { + /* not allowed to provide multiple mapping policies */ + orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", + true, "mapping", info->data.string, + orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + return ORTE_ERR_BAD_PARAM; + } + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); + jdata->map->ppr = strdup(info->data.string); + + /*** MAP-BY ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_MAPBY)) { + if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { + /* not allowed to provide multiple mapping policies */ + orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", + true, "mapping", info->data.string, + orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + return ORTE_ERR_BAD_PARAM; + } + rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, + NULL, info->data.string); + if (ORTE_SUCCESS != rc) { + return rc; + } + + /*** RANK-BY ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_RANKBY)) { + if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { + /* not allowed to provide multiple ranking policies */ + orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", + true, "ranking", info->data.string, + orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); + return ORTE_ERR_BAD_PARAM; + } + rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, + jdata->map->mapping, + info->data.string); + if (ORTE_SUCCESS != rc) { + return rc; + } + + /*** BIND-TO ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_BINDTO)) { + if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { + /* not allowed to provide multiple mapping policies */ + orte_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, + info->data.string, + opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); + return ORTE_ERR_BAD_PARAM; + } + rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, + info->data.string); + if (ORTE_SUCCESS != rc) { + return rc; + } + + /*** CPUS/RANK ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_CPUS_PER_PROC)) { + jdata->map->cpus_per_rank = info->data.uint32; + + /*** NO USE LOCAL ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_NO_PROCS_ON_HEAD)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_MAPPING_NO_USE_LOCAL, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** OVERSUBSCRIBE ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_NO_OVERSUBSCRIBE)) { + OPAL_CHECK_BOOL(info, flag); + if (flag) { + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + } else { + ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + } + + /*** REPORT BINDINGS ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_REPORT_BINDINGS)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_REPORT_BINDINGS, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** CPU LIST ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_CPU_LIST)) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_CPU_LIST, + ORTE_ATTR_GLOBAL, info->data.string, OPAL_BOOL); + + /*** RECOVERABLE ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_JOB_RECOVERABLE)) { + OPAL_CHECK_BOOL(info, flag); + if (flag) { + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_RECOVERABLE); + } else { + ORTE_FLAG_UNSET(jdata, ORTE_JOB_FLAG_RECOVERABLE); + } + + /*** MAX RESTARTS ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_MAX_RESTARTS)) { + for (i=0; i < jdata->apps->size; i++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { + continue; + } + orte_set_attribute(&app->attributes, ORTE_APP_MAX_RESTARTS, + ORTE_ATTR_GLOBAL, &info->data.uint32, OPAL_INT32); + } + + /*** CONTINUOUS OPERATION ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_JOB_CONTINUOUS)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** NON-PMI JOB ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_NON_PMI)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** SPAWN REQUESTOR IS TOOL ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_REQUESTOR_IS_TOOL)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + if (flag) { + /* request that IO be forwarded to the requesting tool */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_FWDIO_TO_TOOL, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + } + + /*** NOTIFY UPON JOB COMPLETION ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_NOTIFY_COMPLETION)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** STOP ON EXEC FOR DEBUGGER ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_DEBUG_STOP_ON_EXEC)) { + /* we don't know how to do this */ + return ORTE_ERR_NOT_SUPPORTED; + + /*** TAG STDOUT ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_TAG_OUTPUT)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_TAG_OUTPUT, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** TIMESTAMP OUTPUT ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_TIMESTAMP_OUTPUT)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_TIMESTAMP_OUTPUT, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** OUTPUT TO FILES ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_OUTPUT_TO_FILE)) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, + ORTE_ATTR_GLOBAL, info->data.string, OPAL_STRING); + + /*** MERGE STDERR TO STDOUT ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_MERGE_STDERR_STDOUT)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_MERGE_STDERR_STDOUT, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** STDIN TARGET ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_STDIN_TGT)) { + if (0 == strcmp(info->data.string, "all")) { + jdata->stdin_target = ORTE_VPID_WILDCARD; + } else if (0 == strcmp(info->data.string, "none")) { + jdata->stdin_target = ORTE_VPID_INVALID; + } else { + jdata->stdin_target = strtoul(info->data.string, NULL, 10); + } + + /*** INDEX ARGV ***/ + } else if (0 == strcmp(info->key, OPAL_PMIX_INDEX_ARGV)) { + OPAL_CHECK_BOOL(info, flag); + orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, + ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); + + /*** DEFAULT - CACHE FOR INCLUSION WITH JOB INFO ***/ + } else { + /* cache for inclusion with job info at registration */ + cache = NULL; + opal_list_remove_item(job_info, &info->super); + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && + NULL != cache) { + opal_list_append(cache, &info->super); + } else { + cache = OBJ_NEW(opal_list_t); + opal_list_append(cache, &info->super); + orte_set_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR); + } + } + } + /* if the job is missing a personality setting, add it */ + if (NULL == jdata->personality) { + opal_argv_append_nosize(&jdata->personality, "ompi"); + } + /* indicate the requestor so bookmarks can be correctly set */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, ORTE_ATTR_GLOBAL, - requestor, OPAL_NAME); + orte_set_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, + ORTE_ATTR_GLOBAL, requestor, OPAL_NAME); /* setup a spawn tracker so we know who to call back when this is done * and thread-shift the entire thing so it can be safely added to diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 94e2cd8a0f..39850edf17 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -785,10 +785,12 @@ static void _toolconn(int sd, short args, void *cbdata) OBJ_RETAIN(node); opal_pointer_array_add(jdata->map->nodes, node); jdata->map->num_nodes++; - /* and it obviously is on the node */ + /* and it obviously is on the node - note that + * we do _not_ increment the #procs on the node + * as the tool doesn't count against the slot + * allocation */ OBJ_RETAIN(proc); opal_pointer_array_add(node->procs, proc); - node->num_procs++; /* set the trivial */ proc->local_rank = 0; proc->node_rank = 0; diff --git a/orte/tools/prun/prun.c b/orte/tools/prun/prun.c index b8b940da5d..0183848433 100644 --- a/orte/tools/prun/prun.c +++ b/orte/tools/prun/prun.c @@ -82,70 +82,20 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/base/base.h" #include "orte/mca/state/state.h" +#include "orte/orted/orted_submit.h" /* ensure I can behave like a daemon */ #include "prun.h" -/** - * Global struct for caching orte command line options. - */ -struct orte_cmd_options_t { - char *help; - bool version; - bool verbose; - char *report_pid; - char *report_uri; - bool terminate; - bool debugger; - int num_procs; - char *appfile; - char *wdir; - bool set_cwd_to_session_dir; - char *path; - char *preload_files; - bool sleep; - char *stdin_target; - char *prefix; - char *path_to_mpirun; - bool disable_recovery; - bool preload_binaries; - bool index_argv; - bool run_as_root; - char *personality; - bool create_dvm; +static struct { bool terminate_dvm; - bool nolocal; - bool no_oversubscribe; - bool oversubscribe; - int cpus_per_proc; - bool pernode; - int npernode; - bool use_hwthreads_as_cpus; - int npersocket; - char *mapping_policy; - char *ranking_policy; - char *binding_policy; - bool report_bindings; - char *cpu_list; - bool debug; - bool tag_output; - bool timestamp_output; - char *output_filename; - bool merge; - bool continuous; - char *hnp; - bool staged_exec; - int timeout; - bool report_state_on_timeout; - bool get_stack_traces; - int pid; - bool system_server_only; bool system_server_first; -}; -typedef struct orte_cmd_options_t orte_cmd_options_t; -static orte_cmd_options_t orte_cmd_options = {0}; -static opal_cmd_line_t *orte_cmd_line = NULL; + bool system_server_only; + int pid; +} myoptions; + static opal_list_t job_info; static volatile bool active = false; @@ -158,335 +108,24 @@ static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfil static opal_cmd_line_init_t cmd_line_init[] = { - /* Various "obvious" options */ - { NULL, 'h', NULL, "help", 1, - &orte_cmd_options.help, OPAL_CMD_LINE_TYPE_STRING, - "This help message", OPAL_CMD_LINE_OTYPE_GENERAL }, - { NULL, 'V', NULL, "version", 0, - &orte_cmd_options.version, OPAL_CMD_LINE_TYPE_BOOL, - "Print version and exit", OPAL_CMD_LINE_OTYPE_GENERAL }, - { "orte_execute_quiet", 'q', NULL, "quiet", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Suppress helpful messages", OPAL_CMD_LINE_OTYPE_GENERAL }, - - /* exit status reporting */ - { "orte_report_child_jobs_separately", '\0', "report-child-jobs-separately", "report-child-jobs-separately", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Return the exit status of the primary job only", OPAL_CMD_LINE_OTYPE_OUTPUT }, - - /* select XML output */ - { "orte_xml_output", '\0', "xml", "xml", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Provide all output in XML format", OPAL_CMD_LINE_OTYPE_OUTPUT }, - { "orte_xml_file", '\0', "xml-file", "xml-file", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide all output in XML format to the specified file", OPAL_CMD_LINE_OTYPE_OUTPUT }, - - /* tag output */ - { "orte_tag_output", '\0', "tag-output", "tag-output", 0, - &orte_cmd_options.tag_output, OPAL_CMD_LINE_TYPE_BOOL, - "Tag all output with [job,rank]", OPAL_CMD_LINE_OTYPE_OUTPUT }, - { "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0, - &orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL, - "Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT }, - { "orte_output_filename", '\0', "output-filename", "output-filename", 1, - &orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING, - "Redirect output from application processes into filename/job/rank/std[out,err,diag]", - OPAL_CMD_LINE_OTYPE_OUTPUT }, - { NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0, - &orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL, - "Merge stderr to stdout for each process", OPAL_CMD_LINE_OTYPE_OUTPUT }, - { "orte_xterm", '\0', "xterm", "xterm", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Create a new xterm window and display output from the specified ranks there", - OPAL_CMD_LINE_OTYPE_OUTPUT }, - - /* select stdin option */ - { NULL, '\0', "stdin", "stdin", 1, - &orte_cmd_options.stdin_target, OPAL_CMD_LINE_TYPE_STRING, - "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)", - OPAL_CMD_LINE_OTYPE_INPUT }, - - /* request that argv[0] be indexed */ - { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0, - &orte_cmd_options.index_argv, OPAL_CMD_LINE_TYPE_BOOL, - "Uniquely index argv[0] for each process using its rank", - OPAL_CMD_LINE_OTYPE_INPUT }, - - /* Specify the launch agent to be used */ - { "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Command used to start processes on remote nodes (default: orted)", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* Preload the binary on the remote machine */ - { NULL, 's', NULL, "preload-binary", 0, - &orte_cmd_options.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, - "Preload the binary on the remote machine before starting the remote process.", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* Preload files on the remote machine */ - { NULL, '\0', NULL, "preload-files", 1, - &orte_cmd_options.preload_files, OPAL_CMD_LINE_TYPE_STRING, - "Preload the comma separated list of files to the remote machines current working directory before starting the remote process.", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* Use an appfile */ - { NULL, '\0', NULL, "app", 1, - &orte_cmd_options.appfile, OPAL_CMD_LINE_TYPE_STRING, - "Provide an appfile; ignore all other command line options", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* Number of processes; -c, -n, --n, -np, and --np are all - synonyms */ - { NULL, 'c', "np", "np", 1, - &orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL }, - { NULL, '\0', "n", "n", 1, - &orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL }, - - /* Set a hostfile */ - { NULL, '\0', "hostfile", "hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, - { NULL, '\0', "machinefile", "machinefile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, - { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a default hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, - { "opal_if_do_not_resolve", '\0', "do-not-resolve", "do-not-resolve", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Do not attempt to resolve interfaces", OPAL_CMD_LINE_OTYPE_DEVEL }, - - { "orte_rankfile", '\0', "rf", "rankfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a rankfile file", OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* Export environment variables; potentially used multiple times, - so it does not make sense to set into a variable */ - { NULL, 'x', NULL, NULL, 1, - NULL, OPAL_CMD_LINE_TYPE_NULL, - "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)", OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* Mapping controls */ - { "rmaps_base_display_map", '\0', "display-map", "display-map", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the process map just before launch", OPAL_CMD_LINE_OTYPE_DEBUG }, - { "rmaps_base_display_devel_map", '\0', "display-devel-map", "display-devel-map", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a detailed process map (mostly intended for developers) just before launch", - OPAL_CMD_LINE_OTYPE_DEVEL }, - { "rmaps_base_display_topo_with_map", '\0', "display-topo", "display-topo", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the topology as part of the process map (mostly intended for developers) just before launch", - OPAL_CMD_LINE_OTYPE_DEVEL }, - { "rmaps_base_display_diffable_map", '\0', "display-diffable-map", "display-diffable-map", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a diffable process map (mostly intended for developers) just before launch", - OPAL_CMD_LINE_OTYPE_DEVEL }, - { NULL, 'H', "host", "host", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on", - OPAL_CMD_LINE_OTYPE_MAPPING }, - { "rmaps_base_no_schedule_local", '\0', "nolocal", "nolocal", 0, - &orte_cmd_options.nolocal, OPAL_CMD_LINE_TYPE_BOOL, - "Do not run any MPI applications on the local node", - OPAL_CMD_LINE_OTYPE_MAPPING }, - { "rmaps_base_no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0, - &orte_cmd_options.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are not to be oversubscribed, even if the system supports such operation", - OPAL_CMD_LINE_OTYPE_MAPPING }, - { "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0, - &orte_cmd_options.oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements", - OPAL_CMD_LINE_OTYPE_MAPPING }, - { "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1, - &orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, - "Number of cpus to use for each process [default=1]", - OPAL_CMD_LINE_OTYPE_MAPPING }, - { "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1, - &orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, - "Synonym for cpus-per-proc", OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* backward compatiblity */ - { "rmaps_base_bycore", '\0', "bycore", "bycore", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by core", - OPAL_CMD_LINE_OTYPE_COMPAT }, - { "rmaps_base_bynode", '\0', "bynode", "bynode", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by node", - OPAL_CMD_LINE_OTYPE_COMPAT }, - { "rmaps_base_byslot", '\0', "byslot", "byslot", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by slot", - OPAL_CMD_LINE_OTYPE_COMPAT }, - - /* Nperxxx options that do not require topology and are always - * available - included for backwards compatibility - */ - { "rmaps_ppr_pernode", '\0', "pernode", "pernode", 0, - &orte_cmd_options.pernode, OPAL_CMD_LINE_TYPE_BOOL, - "Launch one process per available node", - OPAL_CMD_LINE_OTYPE_COMPAT }, - { "rmaps_ppr_n_pernode", '\0', "npernode", "npernode", 1, - &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes", - OPAL_CMD_LINE_OTYPE_COMPAT }, - { "rmaps_ppr_n_pernode", '\0', "N", NULL, 1, - &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes (synonym for 'map-by node')", - OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* declare hardware threads as independent cpus */ - { "hwloc_base_use_hwthreads_as_cpus", '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Use hardware threads as independent cpus", OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* include npersocket for backwards compatibility */ - { "rmaps_ppr_n_persocket", '\0', "npersocket", "npersocket", 1, - &orte_cmd_options.npersocket, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per socket on all allocated nodes", - OPAL_CMD_LINE_OTYPE_COMPAT }, - - /* Mapping options */ - { "rmaps_base_mapping_policy", '\0', NULL, "map-by", 1, - &orte_cmd_options.mapping_policy, OPAL_CMD_LINE_TYPE_STRING, - "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]", - OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* Ranking options */ - { "rmaps_base_ranking_policy", '\0', NULL, "rank-by", 1, - &orte_cmd_options.ranking_policy, OPAL_CMD_LINE_TYPE_STRING, - "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]", - OPAL_CMD_LINE_OTYPE_RANKING }, - - /* Binding options */ - { "hwloc_base_binding_policy", '\0', NULL, "bind-to", 1, - &orte_cmd_options.binding_policy, OPAL_CMD_LINE_TYPE_STRING, - "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported", OPAL_CMD_LINE_OTYPE_BINDING }, - - /* backward compatiblity */ - { "hwloc_base_bind_to_core", '\0', "bind-to-core", "bind-to-core", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Bind processes to cores", OPAL_CMD_LINE_OTYPE_COMPAT }, - { "hwloc_base_bind_to_socket", '\0', "bind-to-socket", "bind-to-socket", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Bind processes to sockets", OPAL_CMD_LINE_OTYPE_COMPAT }, - - { "hwloc_base_report_bindings", '\0', "report-bindings", "report-bindings", 0, - &orte_cmd_options.report_bindings, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to report process bindings to stderr", - OPAL_CMD_LINE_OTYPE_BINDING }, - - /* slot list option */ - { "hwloc_base_cpu_list", '\0', "cpu-list", "cpu-list", 1, - &orte_cmd_options.cpu_list, OPAL_CMD_LINE_TYPE_STRING, - "List of processor IDs to bind processes to [default=NULL]", - OPAL_CMD_LINE_OTYPE_BINDING }, - - /* generalized pattern mapping option */ - { "rmaps_ppr_pattern", '\0', NULL, "ppr", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of number of processes on a given resource type [default: none]", - OPAL_CMD_LINE_OTYPE_MAPPING }, - - /* Allocation options */ - { "orte_display_alloc", '\0', "display-allocation", "display-allocation", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the allocation being used by this job", OPAL_CMD_LINE_OTYPE_DEBUG }, - { "orte_display_devel_alloc", '\0', "display-devel-allocation", "display-devel-allocation", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a detailed list (mostly intended for developers) of the allocation being used by this job", - OPAL_CMD_LINE_OTYPE_DEVEL }, - { "hwloc_base_cpu_set", '\0', "cpu-set", "cpu-set", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]", - OPAL_CMD_LINE_OTYPE_DEBUG }, - - /* mpiexec-like arguments */ - { NULL, '\0', "wdir", "wdir", 1, - &orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Set the working directory of the started processes", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - { NULL, '\0', "wd", "wd", 1, - &orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Synonym for --wdir", OPAL_CMD_LINE_OTYPE_LAUNCH }, - { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0, - &orte_cmd_options.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, - "Set the working directory of the started processes to their session directory", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - { NULL, '\0', "path", "path", 1, - &orte_cmd_options.path, OPAL_CMD_LINE_TYPE_STRING, - "PATH to be used to look for executables to start processes", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - - /* User-level debugger arguments */ - { NULL, '\0', "tv", "tv", 0, - &orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL, - "Deprecated backwards compatibility flag; synonym for \"--debug\"", - OPAL_CMD_LINE_OTYPE_DEBUG }, - { NULL, '\0', "debug", "debug", 0, - &orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL, - "Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter", - OPAL_CMD_LINE_OTYPE_DEBUG }, - { "orte_base_user_debugger", '\0', "debugger", "debugger", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Sequence of debuggers to search for when \"--debug\" is used", - OPAL_CMD_LINE_OTYPE_DEBUG }, - { "orte_output_debugger_proctable", '\0', "output-proctable", "output-proctable", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Output the debugger proctable after launch", - OPAL_CMD_LINE_OTYPE_DEBUG }, - - { "orte_report_events", '\0', "report-events", "report-events", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Report events to a tool listening at the specified URI", OPAL_CMD_LINE_OTYPE_DEBUG }, - - { "orte_enable_recovery", '\0', "enable-recovery", "enable-recovery", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable recovery from process failure [Default = disabled]", - OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, - - { "orte_max_restarts", '\0', "max-restarts", "max-restarts", 1, - NULL, OPAL_CMD_LINE_TYPE_INT, - "Max number of times to restart a failed process", - OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, - - { NULL, '\0', "continuous", "continuous", 0, - &orte_cmd_options.continuous, OPAL_CMD_LINE_TYPE_BOOL, - "Job is to run until explicitly terminated", OPAL_CMD_LINE_OTYPE_DEBUG }, - - { NULL, '\0', "disable-recovery", "disable-recovery", 0, - &orte_cmd_options.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL, - "Disable recovery (resets all recovery options to off)", - OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, - - { NULL, '\0', "personality", "personality", 1, - &orte_cmd_options.personality, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")", - OPAL_CMD_LINE_OTYPE_LAUNCH }, - /* tell the dvm to terminate */ { NULL, '\0', "terminate", "terminate", 0, - &orte_cmd_options.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, + &myoptions.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, "Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM }, /* look first for a system server */ { NULL, '\0', "system-server-first", "system-server-first", 0, - &orte_cmd_options.system_server_first, OPAL_CMD_LINE_TYPE_BOOL, + &myoptions.system_server_first, OPAL_CMD_LINE_TYPE_BOOL, "First look for a system server and connect to it if found", OPAL_CMD_LINE_OTYPE_DVM }, /* connect only to a system server */ { NULL, '\0', "system-server-only", "system-server-only", 0, - &orte_cmd_options.system_server_only, OPAL_CMD_LINE_TYPE_BOOL, + &myoptions.system_server_only, OPAL_CMD_LINE_TYPE_BOOL, "Connect only to a system-level server", OPAL_CMD_LINE_OTYPE_DVM }, /* provide a connection PID */ { NULL, '\0', "pid", "pid", 1, - &orte_cmd_options.pid, OPAL_CMD_LINE_TYPE_INT, + &myoptions.pid, OPAL_CMD_LINE_TYPE_INT, "PID of the session-level daemon to which we should connect", OPAL_CMD_LINE_OTYPE_DVM }, @@ -556,6 +195,7 @@ int prun(int argc, char *argv[]) char *param; opal_pmix_lock_t lock; opal_list_t apps; + opal_pmix_app_t *app; opal_value_t *val; opal_list_t info; opal_jobid_t jobid; @@ -563,6 +203,7 @@ int prun(int argc, char *argv[]) /* init the globals */ memset(&orte_cmd_options, 0, sizeof(orte_cmd_options)); + memset(&myoptions, 0, sizeof(myoptions)); OBJ_CONSTRUCT(&job_info, opal_list_t); OBJ_CONSTRUCT(&apps, opal_list_t); @@ -597,12 +238,30 @@ int prun(int argc, char *argv[]) return rc; } + /* set our proc type for schizo selection */ + orte_process_info.proc_type = ORTE_PROC_TOOL; + + /* open the SCHIZO framework so we can setup the command line */ + if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_schizo_base_framework, 0))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_schizo_base_select())) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* setup our cmd line */ orte_cmd_line = OBJ_NEW(opal_cmd_line_t); if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, cmd_line_init))) { return rc; } + /* setup the rest of the cmd line only once */ + if (OPAL_SUCCESS != (rc = orte_schizo.define_cli(orte_cmd_line))) { + return rc; + } + /* now that options have been defined, finish setup */ mca_base_cmd_line_setup(orte_cmd_line); @@ -684,17 +343,16 @@ int prun(int argc, char *argv[]) exit(0); } - /* tell the ess/tool component that we want to connect only to a system-level - * PMIx server */ - if (orte_cmd_options.system_server_only) { + /* tell the ess/tool component how we want to connect */ + if (myoptions.system_server_only) { opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_only", "1", true, &environ); } - if (orte_cmd_options.system_server_first) { + if (myoptions.system_server_first) { opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_first", "1", true, &environ); } /* if they specified the DVM's pid, then pass it along */ - if (0 != orte_cmd_options.pid) { - asprintf(¶m, "%d", orte_cmd_options.pid); + if (0 != myoptions.pid) { + asprintf(¶m, "%d", myoptions.pid); opal_setenv(OPAL_MCA_PREFIX"ess_tool_server_pid", param, true, &environ); free(param); } @@ -706,7 +364,7 @@ int prun(int argc, char *argv[]) } /* if the user just wants us to terminate a DVM, then do so */ - if (orte_cmd_options.terminate_dvm) { + if (myoptions.terminate_dvm) { OBJ_CONSTRUCT(&info, opal_list_t); val = OBJ_NEW(opal_value_t); val->key = strdup(OPAL_PMIX_JOB_CTRL_TERMINATE); @@ -757,6 +415,200 @@ int prun(int argc, char *argv[]) OPAL_PMIX_DESTRUCT_LOCK(&lock); OPAL_LIST_DESTRUCT(&info); + /* we want to be notified upon job completion */ + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_NOTIFY_COMPLETION); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + + /* see if they specified the personality */ + if (NULL != orte_cmd_options.personality) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_PERSONALITY); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.personality); + opal_list_append(&job_info, &val->super); + } + + /* check for stdout/err directives */ + /* if we were asked to tag output, mark it so */ + if (orte_cmd_options.tag_output) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_TAG_OUTPUT); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + /* if we were asked to timestamp output, mark it so */ + if (orte_cmd_options.timestamp_output) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_TIMESTAMP_OUTPUT); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + /* if we were asked to output to files, pass it along */ + if (NULL != orte_cmd_options.output_filename) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_OUTPUT_TO_FILE); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.output_filename); + opal_list_append(&job_info, &val->super); + } + /* if we were asked to merge stderr to stdout, mark it so */ + if (orte_cmd_options.merge) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_MERGE_STDERR_STDOUT); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + + /* check what user wants us to do with stdin */ + if (NULL != orte_cmd_options.stdin_target) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_STDIN_TGT); + val->type = OPAL_UINT32; + opal_list_append(&job_info, &val->super); + if (0 == strcmp(orte_cmd_options.stdin_target, "all")) { + val->data.uint32 = ORTE_VPID_WILDCARD; + } else if (0 == strcmp(orte_cmd_options.stdin_target, "none")) { + val->data.uint32 = ORTE_VPID_INVALID; + } else { + val->data.uint32 = strtoul(orte_cmd_options.stdin_target, NULL, 10); + } + } + + /* if we want the argv's indexed, indicate that */ + if (orte_cmd_options.index_argv) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_INDEX_ARGV); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + + if (NULL != orte_cmd_options.mapping_policy) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_MAPBY); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.mapping_policy); + opal_list_append(&job_info, &val->super); + } else if (orte_cmd_options.pernode) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_PPR); + val->type = OPAL_STRING; + val->data.string = strdup("1:node"); + opal_list_append(&job_info, &val->super); + } else if (0 < orte_cmd_options.npernode) { + /* define the ppr */ + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_PPR); + val->type = OPAL_STRING; + (void)asprintf(&val->data.string, "%d:node", orte_cmd_options.npernode); + opal_list_append(&job_info, &val->super); + } else if (0 < orte_cmd_options.npersocket) { + /* define the ppr */ + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_PPR); + val->type = OPAL_STRING; + (void)asprintf(&val->data.string, "%d:socket", orte_cmd_options.npernode); + opal_list_append(&job_info, &val->super); + } + + /* if the user specified cpus/rank, set it */ + if (0 < orte_cmd_options.cpus_per_proc) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_CPUS_PER_PROC); + val->type = OPAL_UINT32; + val->data.uint32 = orte_cmd_options.cpus_per_proc; + opal_list_append(&job_info, &val->super); + } + + /* if the user specified a ranking policy, then set it */ + if (NULL != orte_cmd_options.ranking_policy) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_RANKBY); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.ranking_policy); + opal_list_append(&job_info, &val->super); + } + + /* if the user specified a binding policy, then set it */ + if (NULL != orte_cmd_options.binding_policy) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_BINDTO); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.binding_policy); + opal_list_append(&job_info, &val->super); + } + + /* if they asked for nolocal, mark it so */ + if (orte_cmd_options.nolocal) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_NO_PROCS_ON_HEAD); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + if (orte_cmd_options.no_oversubscribe) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + if (orte_cmd_options.oversubscribe) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); + val->type = OPAL_BOOL; + val->data.flag = false; + opal_list_append(&job_info, &val->super); + } + if (orte_cmd_options.report_bindings) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_REPORT_BINDINGS); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + if (NULL != orte_cmd_options.cpu_list) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_CPU_LIST); + val->type = OPAL_STRING; + val->data.string = strdup(orte_cmd_options.cpu_list); + opal_list_append(&job_info, &val->super); + } + + /* mark if recovery was enabled on the cmd line */ + if (orte_enable_recovery) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_JOB_RECOVERABLE); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + /* record the max restarts */ + if (0 < orte_max_restarts) { + OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_MAX_RESTARTS); + val->type = OPAL_UINT32; + val->data.uint32 = orte_max_restarts; + opal_list_append(&app->info, &val->super); + } + } + /* if continuous operation was specified */ + if (orte_cmd_options.continuous) { + /* mark this job as continuously operating */ + val = OBJ_NEW(opal_value_t); + val->key = strdup(OPAL_PMIX_JOB_CONTINUOUS); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(&job_info, &val->super); + } + if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &jobid))) { opal_output(0, "Job failed to spawn: %s", opal_strerror(rc)); goto DONE; @@ -947,7 +799,7 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); val->type = OPAL_BOOL; val->data.flag = true; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); } else { if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { opal_show_help("help-orterun.txt", "orterun:init-failure", @@ -973,7 +825,7 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_HOSTFILE); val->type = OPAL_STRING; val->data.string = value; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); found = true; } } @@ -988,7 +840,7 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_HOSTFILE); val->type = OPAL_STRING; val->data.string = value; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); } } @@ -1004,7 +856,7 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_HOST); val->type = OPAL_STRING; val->data.string = tval; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); } /* check for bozo error */ @@ -1028,12 +880,12 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); val->type = OPAL_BOOL; val->data.flag = true; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); val = OBJ_NEW(opal_value_t); val->key = strdup(OPAL_PMIX_PRELOAD_BIN); val->type = OPAL_BOOL; val->data.flag = true; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); } } if (NULL != orte_cmd_options.preload_files) { @@ -1041,7 +893,7 @@ static int create_app(int argc, char* argv[], val->key = strdup(OPAL_PMIX_PRELOAD_FILES); val->type = OPAL_BOOL; val->data.flag = true; - opal_list_append(&job_info, &val->super); + opal_list_append(&app->info, &val->super); } /* Do not try to find argv[0] here -- the starter is responsible