1
1

Remove the max_connections parameter from the radix component as it is confusing. Modify PMIx client init so that it simply returns the nspace/rank if called by a server - this allows the server to retrieve its assigned ID. Register the server's nspace so client-side operations can succeed

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2016-11-01 10:14:02 -07:00
родитель 6074c2a2a9
Коммит 64873487b4
6 изменённых файлов: 30 добавлений и 36 удалений

Просмотреть файл

@ -258,7 +258,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
return PMIX_ERR_BAD_PARAM; return PMIX_ERR_BAD_PARAM;
} }
if (0 < pmix_globals.init_cntr) { if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) {
/* since we have been called before, the nspace and /* since we have been called before, the nspace and
* rank should be known. So return them here if * rank should be known. So return them here if
* requested */ * requested */

Просмотреть файл

@ -71,6 +71,28 @@ static void errreg_cbfunc (pmix_status_t status,
*active = false; *active = false;
} }
static void opcbfunc(pmix_status_t status, void *cbdata)
{
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
}
if (op->active) {
op->status = status;
op->active = false;
} else {
OBJ_RELEASE(op);
}
}
static void op2cbfunc(pmix_status_t status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
*active = false;
}
int pmix3x_server_init(opal_pmix_server_module_t *module, int pmix3x_server_init(opal_pmix_server_module_t *module,
opal_list_t *info) opal_list_t *info)
{ {
@ -123,6 +145,12 @@ int pmix3x_server_init(opal_pmix_server_module_t *module,
PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix3x_event_hdlr, errreg_cbfunc, (void*)&active); PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix3x_event_hdlr, errreg_cbfunc, (void*)&active);
PMIX_WAIT_FOR_COMPLETION(active); PMIX_WAIT_FOR_COMPLETION(active);
/* as we might want to use some client-side functions, be sure
* to register our own nspace */
active = true;
PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active);
PMIX_WAIT_FOR_COMPLETION(active);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -163,21 +191,6 @@ int pmix3x_server_gen_ppn(const char *input, char **ppn)
return pmix3x_convert_rc(rc); return pmix3x_convert_rc(rc);
} }
static void opcbfunc(pmix_status_t status, void *cbdata)
{
pmix3x_opcaddy_t *op = (pmix3x_opcaddy_t*)cbdata;
if (NULL != op->opcbfunc) {
op->opcbfunc(pmix3x_convert_rc(status), op->cbdata);
}
if (op->active) {
op->status = status;
op->active = false;
} else {
OBJ_RELEASE(op);
}
}
static void _reg_nspace(int sd, short args, void *cbdata) static void _reg_nspace(int sd, short args, void *cbdata)
{ {
pmix3x_threadshift_t *cd = (pmix3x_threadshift_t*)cbdata; pmix3x_threadshift_t *cd = (pmix3x_threadshift_t*)cbdata;

Просмотреть файл

@ -434,13 +434,6 @@ int orte_ess_base_orted_setup(char **hosts)
orte_rml.set_contact_info(orte_process_info.my_hnp_uri); orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
} }
/* setup the PMIx server */
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
ORTE_ERROR_LOG(ret);
error = "pmix server init";
goto error;
}
/* select the errmgr */ /* select the errmgr */
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -276,10 +276,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
ret = target; ret = target;
goto found; goto found;
} else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) {
/* if the job is small enough, send direct to the target's daemon */
ret = &daemon;
goto found;
} else { } else {
/* search routing tree for next step to that daemon */ /* search routing tree for next step to that daemon */
for (item = opal_list_get_first(&my_children); for (item = opal_list_get_first(&my_children);

Просмотреть файл

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2007 Los Alamos National Security, LLC. * Copyright (c) 2007 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -20,7 +21,6 @@ BEGIN_C_DECLS
typedef struct { typedef struct {
orte_routed_component_t super; orte_routed_component_t super;
int radix; int radix;
orte_vpid_t max_connections;
} orte_routed_radix_component_t; } orte_routed_radix_component_t;
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component; ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;

Просмотреть файл

@ -60,14 +60,6 @@ static int orte_routed_radix_component_register(void)
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&mca_routed_radix_component.radix); &mca_routed_radix_component.radix);
mca_routed_radix_component.max_connections = -1;
(void) mca_base_component_var_register(c, "max_connections",
"Send direct between daemons if the number of nodes is less than this number",
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_routed_radix_component.max_connections);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }