1
1

Update tool support by adding MCA params to direct orted's to drop

session and/or system-level tool rendezous files. Ensure PMIx is
enabled for tools

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-08-15 17:18:13 -07:00
родитель f7137ecf98
Коммит 65fb6070d9
5 изменённых файлов: 75 добавлений и 0 удалений

Просмотреть файл

@ -133,6 +133,8 @@ typedef uint32_t pmix_rank_t;
#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server
#define PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
#define PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
/* identification attributes */

Просмотреть файл

@ -142,6 +142,29 @@ int orte_ess_base_tool_setup(void)
orte_process_info.super.proc_arch = opal_local_arch;
opal_proc_local_set(&orte_process_info.super);
/* setup the PMIx framework - ensure it skips all non-PMIx components,
* but do not override anything we were given */
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_pmix_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
ORTE_ERROR_LOG(ret);
error = "opal_pmix_base_select";
goto error;
}
/* initialize - the layer below has our name in opal_process_name_t
* and will pass it to PMIx to sync */
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(NULL))) {
ORTE_ERROR_LOG(ret);
error = "opal_pmix.init";
goto error;
}
/* set the event base */
opal_pmix_base_set_evbase(orte_event_base);
/* open and setup the state machine */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -146,6 +146,22 @@ void pmix_server_register_params(void)
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.wait_for_server);
/* whether or not to drop a session-level tool rendezvous point */
orte_pmix_server_globals.session_server = false;
(void) mca_base_var_register ("orte", "pmix", NULL, "session_server",
"Whether or not to drop a session-level tool rendezvous point",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.session_server);
/* whether or not to drop a system-level tool rendezvous point */
orte_pmix_server_globals.system_server = false;
(void) mca_base_var_register ("orte", "pmix", NULL, "system_server",
"Whether or not to drop a system-level tool rendezvous point",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.system_server);
}
static void eviction_cbfunc(struct opal_hotel_t *hotel,
@ -262,6 +278,25 @@ int pmix_server_init(void)
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);
/* if requested, tell the server to drop a session-level
* PMIx connection point */
if (orte_pmix_server_globals.session_server) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_TOOL_SUPPORT);
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);
}
/* if requested, tell the server to drop a system-level
* PMIx connection point */
if (orte_pmix_server_globals.system_server) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT);
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);
}
/* setup the local server */
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {

Просмотреть файл

@ -263,6 +263,8 @@ typedef struct {
orte_process_name_t server;
opal_list_t notifications;
bool pubsub_init;
bool session_server;
bool system_server;
} pmix_server_globals_t;
extern pmix_server_globals_t orte_pmix_server_globals;

Просмотреть файл

@ -85,6 +85,19 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force)
uid = geteuid();
gid = getegid();
/* pass our nspace/rank */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_NSPACE);
kv->data.string = strdup(ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
kv->type = OPAL_STRING;
opal_list_append(info, &kv->super);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_RANK);
kv->data.uint32 = ORTE_PROC_MY_NAME->vpid;
kv->type = OPAL_UINT32;
opal_list_append(info, &kv->super);
/* jobid */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_JOBID);