From 801fffabff35cd5274539e7db2847488deee7765 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 26 Jul 2007 16:51:41 +0000 Subject: [PATCH] Don't assume things about the contact info string in the general case. There is no need for the IP address in most cases (filem being one dubious exception), so just publish and hand around the supposedly opaque contact info strings This commit was SVN r15638. --- orte/mca/filem/base/filem_base_fns.c | 31 +++++++++++++- orte/mca/rmaps/base/rmaps_base_registry_fns.c | 4 +- orte/mca/rmaps/base/rmaps_base_support_fns.c | 4 +- orte/mca/rml/base/rml_base_contact.c | 40 ++++--------------- orte/mca/schema/schema_types.h | 2 +- 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 0099c414ca..81e9e8f7a5 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -30,11 +30,13 @@ #include "opal/mca/base/base.h" #include "opal/util/output.h" +#include "opal/util/argv.h" #include "opal/mca/base/mca_base_param.h" #include "opal/util/os_dirpath.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" @@ -157,7 +159,7 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine /* if it is the root then we need a different key :/ */ if(proc->jobid == 0 && proc->vpid == 0) { - keys[0] = ORTE_PROC_RML_IP_ADDRESS_KEY; + keys[0] = ORTE_PROC_RML_CONTACT_KEY; } else { keys[0] = ORTE_NODE_NAME_KEY; @@ -220,6 +222,33 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine } } } + + if (proc->jobid == 0 && proc->vpid == 0) { + /* we have contact info -- need an IP address. This assumes + that we're using the OOB RML, but that's probably a safe + enough assumption in here. */ + + char *contact_info = *machine_name; + orte_process_name_t peer; + char **uris; + char *ip, *port; + + *machine_name = NULL; + ret = orte_rml_base_parse_uris(contact_info, &peer, &uris); + free(contact_info); + if (ORTE_SUCCESS == ret) { + exit_status = ret; + goto cleanup; + } + + ip = strrchr(uris[0], '/') + 1; + port = strrchr(uris[0], ':'); + port[0] = '\0'; + + *machine_name = strdup(ip); + + opal_argv_free(uris); + } if (NULL == *machine_name ){ exit_status = ORTE_ERROR; diff --git a/orte/mca/rmaps/base/rmaps_base_registry_fns.c b/orte/mca/rmaps/base/rmaps_base_registry_fns.c index 32f8a98b9a..353052c009 100644 --- a/orte/mca/rmaps/base/rmaps_base_registry_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_registry_fns.c @@ -84,7 +84,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) char* dkeys[] = { ORTE_PROC_NAME_KEY, ORTE_NODE_NAME_KEY, - ORTE_PROC_RML_IP_ADDRESS_KEY, + ORTE_PROC_RML_CONTACT_KEY, NULL }; bool daemon_exists; @@ -346,7 +346,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) } continue; } - if (strcmp(keyval->key, ORTE_PROC_RML_IP_ADDRESS_KEY) == 0) { + if (strcmp(keyval->key, ORTE_PROC_RML_CONTACT_KEY) == 0) { /* we don't care about the value here - the existence of the key is * enough to indicate that this daemon must already exist, so flag it */ diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 5de4cb5351..57dfe0caa1 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -630,7 +630,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map) char* dkeys[] = { ORTE_PROC_NAME_KEY, ORTE_NODE_NAME_KEY, - ORTE_PROC_RML_IP_ADDRESS_KEY, + ORTE_PROC_RML_CONTACT_KEY, NULL }; orte_gpr_value_t **dvalues=NULL, *value; @@ -681,7 +681,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map) } continue; } - if (strcmp(keyval->key, ORTE_PROC_RML_IP_ADDRESS_KEY) == 0) { + if (strcmp(keyval->key, ORTE_PROC_RML_CONTACT_KEY) == 0) { /* we don't care about the value here - the existence of the key is * enough to indicate that this daemon must already exist, so flag it */ diff --git a/orte/mca/rml/base/rml_base_contact.c b/orte/mca/rml/base/rml_base_contact.c index 0541a803fa..d06b2723df 100644 --- a/orte/mca/rml/base/rml_base_contact.c +++ b/orte/mca/rml/base/rml_base_contact.c @@ -14,8 +14,6 @@ #include "orte/mca/gpr/gpr.h" #include "orte/mca/oob/oob_types.h" -#define ORTE_RML_BASE_CONTACT_KEY "rml-contact" - extern opal_list_t orte_rml_base_subscriptions; struct orte_rml_base_subscription_t { @@ -31,8 +29,7 @@ static int get_contact_info(orte_jobid_t job, char **tokens, orte_gpr_notify_dat { char *segment; char *keys[] = { - ORTE_RML_BASE_CONTACT_KEY, - ORTE_PROC_RML_IP_ADDRESS_KEY, + ORTE_PROC_RML_CONTACT_KEY, NULL }; orte_gpr_value_t **values; @@ -151,7 +148,7 @@ orte_rml_base_register_subscription(orte_jobid_t jobid, char *trigger) ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR | ORTE_GPR_STRIPPED, segment, NULL, /* look at all containers on this segment */ - ORTE_RML_BASE_CONTACT_KEY, + ORTE_PROC_RML_CONTACT_KEY, orte_rml_base_contact_info_notify, NULL))) { ORTE_ERROR_LOG(rc); free(sub_name); @@ -177,10 +174,10 @@ int orte_rml_base_register_contact_info(void) { orte_std_cntr_t i, num_tokens; - orte_data_value_t *values[2]; - char *tmp, *tmp2, *tmp3; + orte_data_value_t *values[1]; + char *tmp; char *segment, **tokens; - char *keys[] = { ORTE_RML_BASE_CONTACT_KEY, ORTE_PROC_RML_IP_ADDRESS_KEY}; + char *keys[] = { ORTE_PROC_RML_CONTACT_KEY }; int rc; /* setup to put our contact info on registry */ @@ -194,28 +191,6 @@ orte_rml_base_register_contact_info(void) values[0]->data = strdup(tmp); free(tmp); - /* setup the IP address for storage */ - tmp = orte_rml.get_contact_info(); - tmp2 = strrchr(tmp, '/') + 1; - tmp3 = strrchr(tmp, ':'); - if(NULL == tmp2 || NULL == tmp3) { - opal_output(0, "%s orte_rml_base_init: invalid address \'%s\' " - "returned for selected oob interfaces.\n", - ORTE_NAME_PRINT(orte_process_info.my_name), tmp); - ORTE_ERROR_LOG(ORTE_ERROR); - free(tmp); - return ORTE_ERROR; - } - *tmp3 = '\0'; - values[1] = OBJ_NEW(orte_data_value_t); - if (NULL == values[1]) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - values[1]->type = ORTE_STRING; - values[1]->data = strdup(tmp2); - free(tmp); - /* define the segment */ if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(rc); @@ -236,7 +211,7 @@ orte_rml_base_register_contact_info(void) /* put our contact info in registry */ if (ORTE_SUCCESS != (rc = orte_gpr.put_N(ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND, - segment, tokens, 2, keys, values))) { + segment, tokens, 1, keys, values))) { ORTE_ERROR_LOG(rc); } @@ -247,7 +222,6 @@ orte_rml_base_register_contact_info(void) } if (NULL != tokens) free(tokens); OBJ_RELEASE(values[0]); - OBJ_RELEASE(values[1]); return rc; } @@ -273,7 +247,7 @@ orte_rml_base_contact_info_notify(orte_gpr_notify_data_t* data, /* check to make sure this is the requested key */ keyval = value->keyvals[j]; - if(strcmp(keyval->key, ORTE_RML_BASE_CONTACT_KEY) != 0) + if(strcmp(keyval->key, ORTE_PROC_RML_CONTACT_KEY) != 0) continue; orte_dss.get((void**)&(contact_info), keyval->value, ORTE_STRING); orte_rml.set_contact_info(contact_info); diff --git a/orte/mca/schema/schema_types.h b/orte/mca/schema/schema_types.h index 226ddf6dcd..dd88481765 100644 --- a/orte/mca/schema/schema_types.h +++ b/orte/mca/schema/schema_types.h @@ -115,7 +115,7 @@ #define ORTE_PROC_NUM_AT_STG3 "orte-proc-num-stg3" #define ORTE_PROC_NUM_FINALIZED "orte-proc-num-finalized" #define ORTE_PROC_NUM_TERMINATED "orte-proc-num-terminated" -#define ORTE_PROC_RML_IP_ADDRESS_KEY "orte-proc-rml-ip-addr" +#define ORTE_PROC_RML_CONTACT_KEY "orte-proc-rml-contact" #define ORTE_PROC_CPU_LIST_KEY "orte-proc-cpu-list" #define ORTE_JOB_CKPT_STATE_KEY "orte-job-ckpt-state"