Cleanup the SDS components and move some common code to the base.
Modify the seed and singleton SDS code so it returns the right local rank and num_local_procs. This commit was SVN r14707.
Этот коммит содержится в:
родитель
d9acc93efa
Коммит
3288ce0462
@ -26,4 +26,5 @@ libmca_sds_la_SOURCES += \
|
||||
base/sds_base_interface.c \
|
||||
base/sds_base_orted_contact.c \
|
||||
base/sds_base_universe.c \
|
||||
base/sds_base_get.c \
|
||||
base/sds_base_put.c
|
||||
|
@ -66,6 +66,7 @@ extern "C" {
|
||||
ORTE_DECLSPEC int orte_sds_base_basic_contact_universe(void);
|
||||
ORTE_DECLSPEC int orte_sds_base_seed_set_name(void);
|
||||
ORTE_DECLSPEC int orte_sds_base_contact_orted(char *orted_uri);
|
||||
ORTE_DECLSPEC int orte_sds_env_get(void);
|
||||
|
||||
/*
|
||||
* Put functions
|
||||
|
@ -121,6 +121,12 @@ int
|
||||
orte_sds_base_seed_set_name(void)
|
||||
{
|
||||
int id, flag, rc;
|
||||
|
||||
/* if we are a seed, then there can be only one proc */
|
||||
orte_process_info.num_procs = 1;
|
||||
orte_process_info.vpid_start = 0;
|
||||
orte_process_info.local_rank = 0;
|
||||
orte_process_info.num_local_procs = 1;
|
||||
|
||||
/* if we're a seed and we're not infrastructure, we're also a
|
||||
singleton. So set the singleton flag in that case */
|
||||
|
54
orte/mca/sds/env/sds_env_module.c
поставляемый
54
orte/mca/sds/env/sds_env_module.c
поставляемый
@ -46,13 +46,7 @@ orte_sds_env_set_name(void)
|
||||
{
|
||||
int rc;
|
||||
int id;
|
||||
int vpid_start;
|
||||
int num_procs;
|
||||
int local_rank;
|
||||
int num_local_procs;
|
||||
char* name_string = NULL;
|
||||
char *local_daemon_uri = NULL;
|
||||
|
||||
char* name_string = NULL;
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &name_string);
|
||||
@ -119,48 +113,10 @@ orte_sds_env_set_name(void)
|
||||
}
|
||||
}
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "vpid_start", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &vpid_start);
|
||||
if (vpid_start < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.vpid_start = (orte_vpid_t)vpid_start;
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &num_procs);
|
||||
if (num_procs < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to an invalid value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "local_rank", NULL, ORTE_VPID_INVALID);
|
||||
mca_base_param_lookup_int(id, &local_rank);
|
||||
orte_process_info.local_rank = (orte_vpid_t)local_rank;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to a value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "num_local_procs", NULL, 0);
|
||||
mca_base_param_lookup_int(id, &num_local_procs);
|
||||
orte_process_info.num_local_procs = (orte_std_cntr_t)num_local_procs;
|
||||
|
||||
id = mca_base_param_register_string("orte", "local_daemon", "uri", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &local_daemon_uri);
|
||||
if (NULL != local_daemon_uri) {
|
||||
/* if we are a daemon, then we won't have this param set, so allow
|
||||
* it not to be found
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_sds_base_contact_orted(local_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_sds_env_get())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -52,17 +52,18 @@ orte_sds_singleton_set_name(void)
|
||||
|
||||
orte_process_info.num_procs = 1;
|
||||
orte_process_info.vpid_start = vpid;
|
||||
/* since we are a singleton, then we must have a local_rank of 0
|
||||
* and only 1 local process
|
||||
*/
|
||||
orte_process_info.local_rank = 0;
|
||||
orte_process_info.num_local_procs = 1;
|
||||
|
||||
/* only set the singleton flag is we are NOT infrastructure,
|
||||
and it has not been previously set. */
|
||||
id = mca_base_param_find("orte", NULL, "infrastructure");
|
||||
mca_base_param_lookup_int(id, &flag);
|
||||
if (!flag) {
|
||||
orte_process_info.singleton = true;
|
||||
/* since we are a singleton, then we must have a local_rank of 0
|
||||
* and only 1 local process
|
||||
*/
|
||||
orte_process_info.local_rank = 0;
|
||||
orte_process_info.num_local_procs = 1;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -54,13 +54,8 @@ orte_sds_slurm_set_name(void)
|
||||
{
|
||||
int rc;
|
||||
int id;
|
||||
int vpid_start;
|
||||
int num_procs;
|
||||
int local_rank;
|
||||
int num_local_procs;
|
||||
char* name_string = NULL;
|
||||
int slurm_nodeid;
|
||||
char *local_daemon_uri = NULL;
|
||||
|
||||
/* start by getting our cellid, jobid, and vpid (which is the
|
||||
starting vpid for the list of daemons) */
|
||||
@ -137,50 +132,12 @@ orte_sds_slurm_set_name(void)
|
||||
}
|
||||
orte_system_info.nodename = get_slurm_nodename(slurm_nodeid);
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "vpid_start", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &vpid_start);
|
||||
if (vpid_start < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_sds_env_get())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
orte_process_info.vpid_start = (orte_vpid_t)vpid_start;
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &num_procs);
|
||||
if (num_procs < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to an invalid value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "local_rank", NULL, ORTE_VPID_INVALID);
|
||||
mca_base_param_lookup_int(id, &local_rank);
|
||||
orte_process_info.local_rank = (orte_vpid_t)local_rank;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to a value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "num_local_procs", NULL, 0);
|
||||
mca_base_param_lookup_int(id, &num_local_procs);
|
||||
orte_process_info.num_local_procs = (orte_std_cntr_t)num_local_procs;
|
||||
|
||||
id = mca_base_param_register_string("orte", "local_daemon", "uri", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &local_daemon_uri);
|
||||
if (NULL != local_daemon_uri) {
|
||||
/* if we are a daemon, then we won't have this param set, so allow
|
||||
* it not to be found
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_sds_base_contact_orted(local_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -65,12 +65,10 @@ int orte_sds_xcpu_set_name(void)
|
||||
|
||||
orte_cellid_t cellid;
|
||||
orte_jobid_t jobid;
|
||||
orte_vpid_t vpid;
|
||||
orte_vpid_t vpid_start;
|
||||
char* cellid_string;
|
||||
char* jobid_string;
|
||||
char* vpid_string;
|
||||
int num_procs, local_rank, num_local_procs;
|
||||
char *xcpu_rank_string;
|
||||
int xcpu_rank;
|
||||
int stride;
|
||||
@ -109,55 +107,12 @@ int orte_sds_xcpu_set_name(void)
|
||||
|
||||
xcpu_rank = (int)strtol(xcpu_rank_string+1, NULL, 10);
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "vpid_start", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &vpid_string);
|
||||
if (NULL == vpid_string) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
rc = orte_ns.convert_string_to_vpid(&vpid_start, vpid_string);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_sds_env_get())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* compute our vpid */
|
||||
vpid = vpid_start + xcpu_rank - 1;
|
||||
|
||||
/* create our name */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(
|
||||
&(orte_process_info.my_name),
|
||||
cellid,
|
||||
jobid,
|
||||
vpid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &num_procs);
|
||||
if (num_procs < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to an invalid value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "local_rank", NULL, ORTE_VPID_INVALID);
|
||||
mca_base_param_lookup_int(id, &local_rank);
|
||||
orte_process_info.local_rank = (orte_vpid_t)local_rank;
|
||||
|
||||
/* it is okay for this param not to be found - for example, we don't bother
|
||||
* to set it for orteds - so just set it to a value which indicates
|
||||
* it wasn't found if it isn't there
|
||||
*/
|
||||
id = mca_base_param_register_int("ns", "nds", "num_local_procs", NULL, 0);
|
||||
mca_base_param_lookup_int(id, &num_local_procs);
|
||||
orte_process_info.num_local_procs = (orte_std_cntr_t)num_local_procs;
|
||||
|
||||
#if 0
|
||||
id = mca_base_param_register_string("ns", "nds", "global_vpid_start", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &vpid_string);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user