1
1

Update the tools support so it allows tools to access PMIx

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-07-25 11:19:29 -07:00
родитель 9d3dcc9f69
Коммит 0042c758f1
9 изменённых файлов: 280 добавлений и 104 удалений

Просмотреть файл

@ -693,6 +693,23 @@ typedef int (*opal_pmix_base_module_server_notify_event_fn_t)(int status,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/************************************************************
* TOOL APIs *
************************************************************/
/* Initialize the PMIx tool support
* When called the library will check for the required connection
* information of the local server and will establish the connection.
* The connection info can be provided either in the environment or
* in the list of attributes. If the information is not found, or the
* server connection fails, then an appropriate error constant will
* be returned.
*/
typedef int (*opal_pmix_base_module_tool_init_fn_t)(opal_list_t *ilist);
/* Finalize the PMIx tool support */
typedef int (*opal_pmix_base_module_tool_fini_fn_t)(void);
/************************************************************
* UTILITY APIs *
************************************************************/
@ -837,6 +854,9 @@ typedef struct {
opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork;
opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request;
opal_pmix_base_module_server_notify_event_fn_t server_notify_event;
/* tool APIs */
opal_pmix_base_module_tool_init_fn_t tool_init;
opal_pmix_base_module_tool_fini_fn_t tool_finalize;
/* Utility APIs */
opal_pmix_base_module_get_version_fn_t get_version;
opal_pmix_base_module_register_fn_t register_evhandler;

Просмотреть файл

@ -110,6 +110,9 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = {
.server_setup_fork = pmix2x_server_setup_fork,
.server_dmodex_request = pmix2x_server_dmodex,
.server_notify_event = pmix2x_server_notify_event,
/* tool APIs */
.tool_init = pmix2x_tool_init,
.tool_finalize = pmix2x_tool_fini,
/* utility APIs */
.get_version = PMIx_Get_version,
.register_evhandler = register_handler,

Просмотреть файл

@ -250,6 +250,10 @@ OPAL_MODULE_DECLSPEC int pmix2x_resolve_peers(const char *nodename, opal_jobid_t
opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist);
/**** TOOL FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int pmix2x_tool_init(opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix2x_tool_fini(void);
/**** COMMON FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int pmix2x_store_local(const opal_process_name_t *proc,
opal_value_t *val);

Просмотреть файл

@ -34,6 +34,7 @@
#include "opal/mca/pmix/base/base.h"
#include "pmix2x.h"
#include "pmix.h"
#include "pmix_tool.h"
static pmix_proc_t my_proc;
static char *dbgvalue=NULL;
@ -97,7 +98,9 @@ int pmix2x_client_init(opal_list_t *ilist)
PMIX_INFO_FREE(pinfo, ninfo);
}
if (PMIX_SUCCESS != rc) {
return pmix2x_convert_rc(rc);
dbg = pmix2x_convert_rc(rc);
OPAL_ERROR_LOG(dbg);
return dbg;
}
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
@ -179,6 +182,120 @@ int pmix2x_client_finalize(void)
return pmix2x_convert_rc(rc);
}
int pmix2x_tool_init(opal_list_t *info)
{
pmix_info_t *pinfo;
size_t ninfo, n;
opal_pmix2x_jobid_trkr_t *job;
opal_value_t *val;
pmix_status_t rc;
int ret;
opal_process_name_t pname;
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_tool init");
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
/* convert the incoming list to info structs */
if (NULL != info && 0 < (ninfo = opal_list_get_size(info))) {
PMIX_INFO_CREATE(pinfo, ninfo);
n=0;
OPAL_LIST_FOREACH(val, info, opal_value_t) {
(void)strncpy(pinfo[n].key, val->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&pinfo[n].value, val);
++n;
}
} else {
pinfo = NULL;
ninfo = 0;
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = PMIx_tool_init(&my_proc, pinfo, ninfo);
if (NULL != pinfo) {
PMIX_INFO_FREE(pinfo, ninfo);
}
if (PMIX_SUCCESS != rc) {
ret = pmix2x_convert_rc(rc);
OPAL_ERROR_LOG(ret);
return ret;
}
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
++opal_pmix_base.initialized;
if (1 < opal_pmix_base.initialized) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_SUCCESS;
}
/* store our jobid and rank */
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
/* if we were launched by the OMPI RTE, then
* the jobid is in a special format - so get it */
mca_pmix_pmix2x_component.native_launch = true;
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
} else {
/* we were launched by someone else, so make the
* jobid just be the hash of the nspace */
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
}
/* insert this into our list of jobids - it will be the
* first, and so we'll check it first */
job = OBJ_NEW(opal_pmix2x_jobid_trkr_t);
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
job->jobid = pname.jobid;
opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super);
pname.vpid = pmix2x_convert_rank(my_proc.rank);
opal_proc_set_name(&pname);
/* release the thread in case the event handler fires when
* registered */
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
/* register the default event handler */
event = OBJ_NEW(opal_pmix2x_event_t);
opal_list_append(&mca_pmix_pmix2x_component.events, &event->super);
PMIX_INFO_CREATE(pinfo, 1);
PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING);
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event);
OPAL_PMIX_WAIT_THREAD(&event->lock);
PMIX_INFO_FREE(pinfo, 1);
return OPAL_SUCCESS;
}
int pmix2x_tool_fini(void)
{
pmix_status_t rc;
opal_pmix2x_event_t *event, *ev2;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_tool finalize");
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
--opal_pmix_base.initialized;
if (0 == opal_pmix_base.initialized) {
/* deregister all event handlers */
OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) {
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
OPAL_PMIX_WAIT_THREAD(&event->lock);
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
OBJ_RELEASE(event);
}
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = PMIx_tool_finalize();
return pmix2x_convert_rc(rc);
}
int pmix2x_initialized(void)
{
int init;

Просмотреть файл

@ -65,7 +65,6 @@ BEGIN_C_DECLS
#define OPAL_PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
#define OPAL_PMIX_SERVER_RANK "pmix.srv.rank" // (uint32_t) Rank of this server
/* identification attributes */
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
#define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
@ -35,6 +35,7 @@
#endif
#include "opal/mca/event/event.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/util/arch.h"
@ -67,9 +68,75 @@ int orte_ess_base_tool_setup(void)
int ret;
char *error = NULL;
opal_list_t transports;
orte_jobid_t jobid;
orte_vpid_t vpid;
/* my name is set, xfer it to the OPAL layer */
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
/* setup the PMIx framework - ensure it skips all non-PMIx components,
* but do not override anything we were given */
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_pmix_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
ORTE_ERROR_LOG(ret);
error = "opal_pmix_base_select";
goto error;
}
/* set the event base */
opal_pmix_base_set_evbase(orte_event_base);
/* initialize - PMIx may set our name here if we attach to
* a PMIx server */
if (NULL != opal_pmix.tool_init) {
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(NULL))) {
ORTE_ERROR_LOG(ret);
error = "opal_pmix.init";
goto error;
}
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
} else {
/* if we connected to a PMIx server, then we were assigned
* a name that we should use. Otherwise, we have to define
* one here */
if (NULL != orte_ess_base_jobid &&
NULL != orte_ess_base_vpid) {
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:obtaining name from environment");
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
return(ret);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
return(ret);
}
ORTE_PROC_MY_NAME->vpid = vpid;
} else {
/* If we are a tool with no name, then define it here */
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:computing name");
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
}
/* my name is set, xfer it to the OPAL layer */
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
}
orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
orte_process_info.super.proc_arch = opal_local_arch;
@ -131,7 +198,7 @@ int orte_ess_base_tool_setup(void)
goto error;
}
/* get a conduit for our use - we never route IO over fabric */
/* get a conduit for our use - we never route IO over fabric */
OBJ_CONSTRUCT(&transports, opal_list_t);
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
@ -243,5 +310,7 @@ int orte_ess_base_tool_finalize(void)
(void) mca_base_framework_close(&orte_rml_base_framework);
(void) mca_base_framework_close(&orte_errmgr_base_framework);
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -63,8 +63,6 @@ static int rte_init(void)
{
int ret;
char *error = NULL;
orte_jobid_t jobid;
orte_vpid_t vpid;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -73,40 +71,6 @@ static int rte_init(void)
}
if (NULL != orte_ess_base_jobid &&
NULL != orte_ess_base_vpid) {
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:obtaining name from environment");
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
return(ret);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
return(ret);
}
ORTE_PROC_MY_NAME->vpid = vpid;
} else {
/* If we are a tool with no name, then define it here */
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:computing name");
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
}
/* if requested, get an async event base - we use the
* opal_async one so we don't startup extra threads if
* not needed */
@ -115,7 +79,7 @@ static int rte_init(void)
progress_thread_running = true;
}
/* do the rest of the standard tool init */
/* do the standard tool init */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
@ -174,4 +138,3 @@ static void rte_abort(int status, bool report)
/* Now just exit */
exit(status);
}

Просмотреть файл

@ -291,71 +291,70 @@ static void vm_ready(int fd, short args, void *cbdata)
return;
}
orte_node_info_communicated = true;
if (!orte_static_ports && !orte_fwd_mpirun_port) {
/* pack a flag indicating wiring info is provided */
flag = 1;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
/* get wireup info for daemons */
jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
wireup = OBJ_NEW(opal_buffer_t);
for (v=0; v < jptr->procs->size; v++) {
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) {
continue;
}
val = NULL;
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) {
/* get wireup info for daemons */
jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
wireup = OBJ_NEW(opal_buffer_t);
for (v=0; v < jptr->procs->size; v++) {
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) {
continue;
}
val = NULL;
if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
} else {
/* pack the name of the daemon */
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
} else {
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
}
/* the data is returned as a list of key-value pairs in the opal_value_t */
if (OPAL_PTR != val->type) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
}
modex = (opal_list_t*)val->data.ptr;
numbytes = (int32_t)opal_list_get_size(modex);
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
}
OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
OBJ_RELEASE(wireup);
return;
}
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
OPAL_LIST_RELEASE(modex);
OBJ_RELEASE(val);
}
/* put it in a byte object for xmission */
opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
/* pack the byte object - zero-byte objects are fine */
bo.size = numbytes;
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
}
/* release the data since it has now been copied into our buffer */
if (NULL != bo.bytes) {
free(bo.bytes);
}
OBJ_RELEASE(wireup);
} else {
flag = 0;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);
}
/* put it in a byte object for xmission */
opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
/* pack the byte object - zero-byte objects are fine */
bo.size = numbytes;
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(wireup);
OBJ_RELEASE(buf);
return;
}
/* release the data since it has now been copied into our buffer */
if (NULL != bo.bytes) {
free(bo.bytes);
}
OBJ_RELEASE(wireup);
} else {
flag = 0;
opal_dss.pack(buf, &flag, 1, OPAL_INT8);

Просмотреть файл

@ -364,12 +364,15 @@ int orte_submit_init(int argc, char *argv[],
exit(0);
}
/* if they already set our proc type, then leave it alone */
if (ORTE_PROC_TYPE_NONE == orte_process_info.proc_type) {
/* set the flags - if they gave us a -hnp option, then
* we are a tool. If not, then we are an HNP */
if (NULL == orte_cmd_options.hnp) {
orte_process_info.proc_type = ORTE_PROC_HNP;
} else {
orte_process_info.proc_type = ORTE_PROC_TOOL;
* we are a tool. If not, then we are an HNP */
if (NULL == orte_cmd_options.hnp) {
orte_process_info.proc_type = ORTE_PROC_HNP;
} else {
orte_process_info.proc_type = ORTE_PROC_TOOL;
}
}
if (ORTE_PROC_IS_TOOL) {
if (0 == strncasecmp(orte_cmd_options.hnp, "file", strlen("file"))) {
@ -541,7 +544,6 @@ int orte_submit_init(int argc, char *argv[],
if (ORTE_PROC_IS_TOOL) {
opal_value_t val;
/* extract the name */
if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);