Remove the max_connections parameter from the radix component as it is confusing. Modify PMIx client init so that it simply returns the nspace/rank if called by a server - this allows the server to retrieve its assigned ID. Register the server's nspace so client-side operations can succeed
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
6074c2a2a9
Коммит
64873487b4
@ -258,7 +258,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
|
|||||||
return PMIX_ERR_BAD_PARAM;
|
return PMIX_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0 < pmix_globals.init_cntr) {
|
if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) {
|
||||||
/* since we have been called before, the nspace and
|
/* since we have been called before, the nspace and
|
||||||
* rank should be known. So return them here if
|
* rank should be known. So return them here if
|
||||||
* requested */
|
* requested */
|
||||||
|
@ -71,6 +71,28 @@ static void errreg_cbfunc (pmix_status_t status,
|
|||||||
*active = false;
|
*active = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void opcbfunc(pmix_status_t status, void *cbdata)
|
||||||
|
{
|
||||||
|
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
|
||||||
|
|
||||||
|
if (NULL != op->opcbfunc) {
|
||||||
|
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
|
||||||
|
}
|
||||||
|
if (op->active) {
|
||||||
|
op->status = status;
|
||||||
|
op->active = false;
|
||||||
|
} else {
|
||||||
|
OBJ_RELEASE(op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void op2cbfunc(pmix_status_t status, void *cbdata)
|
||||||
|
{
|
||||||
|
volatile bool *active = (volatile bool*)cbdata;
|
||||||
|
|
||||||
|
*active = false;
|
||||||
|
}
|
||||||
|
|
||||||
int pmix3x_server_init(opal_pmix_server_module_t *module,
|
int pmix3x_server_init(opal_pmix_server_module_t *module,
|
||||||
opal_list_t *info)
|
opal_list_t *info)
|
||||||
{
|
{
|
||||||
@ -123,6 +145,12 @@ int pmix3x_server_init(opal_pmix_server_module_t *module,
|
|||||||
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix3x_event_hdlr, errreg_cbfunc, (void*)&active);
|
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix3x_event_hdlr, errreg_cbfunc, (void*)&active);
|
||||||
PMIX_WAIT_FOR_COMPLETION(active);
|
PMIX_WAIT_FOR_COMPLETION(active);
|
||||||
|
|
||||||
|
/* as we might want to use some client-side functions, be sure
|
||||||
|
* to register our own nspace */
|
||||||
|
active = true;
|
||||||
|
PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active);
|
||||||
|
PMIX_WAIT_FOR_COMPLETION(active);
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -163,21 +191,6 @@ int pmix3x_server_gen_ppn(const char *input, char **ppn)
|
|||||||
return pmix3x_convert_rc(rc);
|
return pmix3x_convert_rc(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void opcbfunc(pmix_status_t status, void *cbdata)
|
|
||||||
{
|
|
||||||
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
|
|
||||||
|
|
||||||
if (NULL != op->opcbfunc) {
|
|
||||||
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
|
|
||||||
}
|
|
||||||
if (op->active) {
|
|
||||||
op->status = status;
|
|
||||||
op->active = false;
|
|
||||||
} else {
|
|
||||||
OBJ_RELEASE(op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _reg_nspace(int sd, short args, void *cbdata)
|
static void _reg_nspace(int sd, short args, void *cbdata)
|
||||||
{
|
{
|
||||||
pmix3x_threadshift_t *cd = (pmix3x_threadshift_t*)cbdata;
|
pmix3x_threadshift_t *cd = (pmix3x_threadshift_t*)cbdata;
|
||||||
|
@ -434,13 +434,6 @@ int orte_ess_base_orted_setup(char **hosts)
|
|||||||
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
|
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup the PMIx server */
|
|
||||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
|
||||||
error = "pmix server init";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* select the errmgr */
|
/* select the errmgr */
|
||||||
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
|
@ -276,10 +276,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
|
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
|
||||||
ret = target;
|
ret = target;
|
||||||
goto found;
|
goto found;
|
||||||
} else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) {
|
|
||||||
/* if the job is small enough, send direct to the target's daemon */
|
|
||||||
ret = &daemon;
|
|
||||||
goto found;
|
|
||||||
} else {
|
} else {
|
||||||
/* search routing tree for next step to that daemon */
|
/* search routing tree for next step to that daemon */
|
||||||
for (item = opal_list_get_first(&my_children);
|
for (item = opal_list_get_first(&my_children);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -20,7 +21,6 @@ BEGIN_C_DECLS
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
orte_routed_component_t super;
|
orte_routed_component_t super;
|
||||||
int radix;
|
int radix;
|
||||||
orte_vpid_t max_connections;
|
|
||||||
} orte_routed_radix_component_t;
|
} orte_routed_radix_component_t;
|
||||||
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;
|
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;
|
||||||
|
|
||||||
|
@ -60,14 +60,6 @@ static int orte_routed_radix_component_register(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&mca_routed_radix_component.radix);
|
&mca_routed_radix_component.radix);
|
||||||
|
|
||||||
mca_routed_radix_component.max_connections = -1;
|
|
||||||
(void) mca_base_component_var_register(c, "max_connections",
|
|
||||||
"Send direct between daemons if the number of nodes is less than this number",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&mca_routed_radix_component.max_connections);
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user