Remove the max_connections parameter from the radix component as it is confusing. Modify PMIx client init so that it simply returns the nspace/rank if called by a server - this allows the server to retrieve its assigned ID. Register the server's nspace so client-side operations can succeed
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
6074c2a2a9
Коммит
64873487b4
@ -258,7 +258,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
|
||||
return PMIX_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
if (0 < pmix_globals.init_cntr) {
|
||||
if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) {
|
||||
/* since we have been called before, the nspace and
|
||||
* rank should be known. So return them here if
|
||||
* requested */
|
||||
|
@ -71,6 +71,28 @@ static void errreg_cbfunc (pmix_status_t status,
|
||||
*active = false;
|
||||
}
|
||||
|
||||
static void opcbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
|
||||
|
||||
if (NULL != op->opcbfunc) {
|
||||
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
|
||||
}
|
||||
if (op->active) {
|
||||
op->status = status;
|
||||
op->active = false;
|
||||
} else {
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
}
|
||||
|
||||
static void op2cbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
volatile bool *active = (volatile bool*)cbdata;
|
||||
|
||||
*active = false;
|
||||
}
|
||||
|
||||
int pmix3x_server_init(opal_pmix_server_module_t *module,
|
||||
opal_list_t *info)
|
||||
{
|
||||
@ -123,6 +145,12 @@ int pmix3x_server_init(opal_pmix_server_module_t *module,
|
||||
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix3x_event_hdlr, errreg_cbfunc, (void*)&active);
|
||||
PMIX_WAIT_FOR_COMPLETION(active);
|
||||
|
||||
/* as we might want to use some client-side functions, be sure
|
||||
* to register our own nspace */
|
||||
active = true;
|
||||
PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active);
|
||||
PMIX_WAIT_FOR_COMPLETION(active);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -163,21 +191,6 @@ int pmix3x_server_gen_ppn(const char *input, char **ppn)
|
||||
return pmix3x_convert_rc(rc);
|
||||
}
|
||||
|
||||
static void opcbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
|
||||
|
||||
if (NULL != op->opcbfunc) {
|
||||
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
|
||||
}
|
||||
if (op->active) {
|
||||
op->status = status;
|
||||
op->active = false;
|
||||
} else {
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
}
|
||||
|
||||
static void _reg_nspace(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix3x_threadshift_t *cd = (pmix3x_threadshift_t*)cbdata;
|
||||
|
@ -434,13 +434,6 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
|
||||
}
|
||||
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* select the errmgr */
|
||||
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -276,10 +276,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
|
||||
ret = target;
|
||||
goto found;
|
||||
} else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) {
|
||||
/* if the job is small enough, send direct to the target's daemon */
|
||||
ret = &daemon;
|
||||
goto found;
|
||||
} else {
|
||||
/* search routing tree for next step to that daemon */
|
||||
for (item = opal_list_get_first(&my_children);
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -20,7 +21,6 @@ BEGIN_C_DECLS
|
||||
typedef struct {
|
||||
orte_routed_component_t super;
|
||||
int radix;
|
||||
orte_vpid_t max_connections;
|
||||
} orte_routed_radix_component_t;
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;
|
||||
|
||||
|
@ -60,14 +60,6 @@ static int orte_routed_radix_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_routed_radix_component.radix);
|
||||
|
||||
mca_routed_radix_component.max_connections = -1;
|
||||
(void) mca_base_component_var_register(c, "max_connections",
|
||||
"Send direct between daemons if the number of nodes is less than this number",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_routed_radix_component.max_connections);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user