1
1

Per Nathan, with a little cleanup by me: update the PMI support to aggregate modex info, thus reducing the number of keys required so it fits within Cray default constraints

This commit was SVN r26456.
Этот коммит содержится в:
Ralph Castain 2012-05-19 16:12:52 +00:00
родитель 1ce59d08b5
Коммит c4f8043064

Просмотреть файл

@ -69,11 +69,14 @@ static int setup_pmi(void);
static int setup_key(const orte_process_name_t *name, const char *key); static int setup_key(const orte_process_name_t *name, const char *key);
/* Local variables */ /* Local variables */
static char *pmi_packed_data = NULL;
static char *pmi_kvs_name = NULL; static char *pmi_kvs_name = NULL;
static char *pmi_kvs_key = NULL; static char *pmi_kvs_key = NULL;
static char *pmi_attr_val = NULL; static char *pmi_attr_val = NULL;
static int pmi_vallen_max = -1; static int pmi_vallen_max = -1;
static int pmi_keylen_max = -1; static int pmi_keylen_max = -1;
static int pmi_pack_key = 0;
static int pmi_packed_data_off = 0;
/* Because Cray uses PMI2 extensions for some, but not all, /* Because Cray uses PMI2 extensions for some, but not all,
* PMI functions, we define a set of wrappers for those * PMI functions, we define a set of wrappers for those
@ -133,6 +136,10 @@ static int init(void)
*/ */
static void finalize(void) static void finalize(void)
{ {
if (NULL != pmi_packed_data) {
free(pmi_packed_data);
pmi_packed_data = NULL;
}
if (NULL != pmi_kvs_name) { if (NULL != pmi_kvs_name) {
free(pmi_kvs_name); free(pmi_kvs_name);
pmi_kvs_name = NULL; pmi_kvs_name = NULL;
@ -214,16 +221,39 @@ static int pmi_allgather(orte_grpcomm_collective_t *coll)
return ORTE_ERR_NOT_SUPPORTED; return ORTE_ERR_NOT_SUPPORTED;
} }
static int pmi_put_last_key (void) {
char tmp_key[32];
int rc;
if (pmi_packed_data_off == 0) {
/* nothing to write */
return ORTE_SUCCESS;
}
sprintf (tmp_key, "key%d", pmi_pack_key);
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, tmp_key))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = kvs_put(pmi_kvs_key, pmi_packed_data);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
pmi_packed_data_off = 0;
pmi_pack_key ++;
return ORTE_SUCCESS;
}
static int pmi_set_proc_attr(const char *attr_name, static int pmi_set_proc_attr(const char *attr_name,
const void *buffer, size_t size) const void *buffer, size_t size)
{ {
int rc; int rc;
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, attr_name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:pmi: set attr %s of size %lu in KVS %s", "%s grpcomm:pmi: set attr %s of size %lu in KVS %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name,
@ -234,12 +264,14 @@ static int pmi_set_proc_attr(const char* attr_name,
return rc; return rc;
} }
rc = kvs_put(pmi_kvs_key, pmi_attr_val); if ((int)(pmi_packed_data_off + strlen (attr_name) + strlen (pmi_attr_val) + 2) > pmi_vallen_max) {
if (PMI_SUCCESS != rc) { pmi_put_last_key ();
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
} }
/* pack attribute */
pmi_packed_data_off += sprintf (pmi_packed_data + pmi_packed_data_off, "%s%s:%s",
pmi_packed_data_off ? "," : "", attr_name, pmi_attr_val);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -247,6 +279,9 @@ static int pmi_get_proc_attr(const orte_process_name_t name,
const char* attr_name, const char* attr_name,
void **buffer, size_t *size) void **buffer, size_t *size)
{ {
char tmp_val[1024];
char *tmp, *tok_ctx, *tmp2;
int remote_key;
int rc; int rc;
/* set default */ /* set default */
@ -258,40 +293,67 @@ static int pmi_get_proc_attr(const orte_process_name_t name,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name,
ORTE_NAME_PRINT(&name), pmi_kvs_name)); ORTE_NAME_PRINT(&name), pmi_kvs_name));
if (ORTE_SUCCESS != (rc = setup_key(&name, attr_name))) { for (remote_key = 0 ; ; ++remote_key) {
char tmp_key[32];
sprintf (tmp_key, "key%d", remote_key);
if (ORTE_SUCCESS != (rc = setup_key(&name, tmp_key))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max); rc = kvs_get(pmi_kvs_key, tmp_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) { if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get"); break;
return ORTE_ERROR;
} }
tmp = strtok_r (tmp_val, ",", &tok_ctx);
do {
tmp2 = strchr (tmp, ':');
if (NULL == tmp2) {
continue;
}
*tmp2 = '\0';
if (strcmp (tmp, attr_name) == 0) {
strcpy (pmi_attr_val, tmp2 + 1);
*buffer = pmi_decode(size); *buffer = pmi_decode(size);
if (NULL == *buffer) { if (NULL == *buffer) {
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
break;
}
} while (NULL != (tmp = strtok_r (NULL, ",", &tok_ctx)));
if (NULL != *buffer) {
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:pmi: got attr %s of size %lu", "%s grpcomm:pmi: got attr %s of size %lu",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
attr_name, (unsigned long)(*size))); attr_name, (unsigned long)(*size)));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
}
return ORTE_ERROR;
}
/*** MODEX SECTION ***/ /*** MODEX SECTION ***/
static int modex(orte_grpcomm_collective_t *coll) static int modex(orte_grpcomm_collective_t *coll)
{ {
int rc, i; int rc, i;
size_t len; size_t len;
char *rml_uri, val[64]; char *rml_uri;
orte_vpid_t v; orte_vpid_t v;
orte_process_name_t name; orte_process_name_t name;
orte_jmap_t *jmap; orte_jmap_t *jmap;
orte_nid_t *nid, *loc; orte_nid_t *nid, *loc;
orte_pmap_t *pmap; orte_pmap_t *pmap;
void *tmp_val;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:pmi: modex entered", "%s grpcomm:pmi: modex entered",
@ -302,65 +364,33 @@ static int modex(orte_grpcomm_collective_t *coll)
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
return ORTE_ERR_VALUE_OUT_OF_BOUNDS; return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
} }
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "HOSTNAME"))) {
ORTE_ERROR_LOG(rc);
rc = pmi_set_proc_attr ("HOSTNAME", orte_process_info.nodename, strlen(orte_process_info.nodename));
if (ORTE_SUCCESS != rc) {
return rc; return rc;
} }
rc = kvs_put(pmi_kvs_key, orte_process_info.nodename);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
/* add our oob endpoint info so that oob communications /* add our oob endpoint info so that oob communications
* can be supported * can be supported
*/ */
rml_uri = orte_rml.get_contact_info(); rml_uri = orte_rml.get_contact_info();
if (strlen(rml_uri) > (size_t)pmi_vallen_max) { rc = pmi_set_proc_attr ("RMLURI", rml_uri, strlen (rml_uri));
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); if (ORTE_SUCCESS != rc) {
return ORTE_ERROR;
}
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "RMLURI"))) {
ORTE_ERROR_LOG(rc);
free(rml_uri);
return rc; return rc;
} }
/* NTH: some characters are not allowed in pmi2 land so we need to encode */
if (ORTE_SUCCESS != (rc = pmi_encode(rml_uri, strlen(rml_uri)))) {
ORTE_ERROR_LOG(rc);
free(rml_uri);
return rc;
}
/* encoding puts the encoded value in pmi_attr_val */
rc = kvs_put(pmi_kvs_key, pmi_attr_val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
free(rml_uri);
return ORTE_ERROR;
}
free(rml_uri); free(rml_uri);
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "BIND_LEVEL"))) { rc = pmi_set_proc_attr ("BIND_LEVEL", &orte_process_info.bind_level, sizeof (orte_process_info.bind_level));
ORTE_ERROR_LOG(rc); if (ORTE_SUCCESS != rc) {
return rc; return rc;
} }
snprintf(val, 64, "%u", (unsigned int)orte_process_info.bind_level);
rc = kvs_put(pmi_kvs_key, val); rc = pmi_set_proc_attr ("BIND_IDX", &orte_process_info.bind_idx, sizeof (orte_process_info.bind_idx));
if (PMI_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "BIND_IDX"))) {
ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
snprintf(val, 64, "%u", orte_process_info.bind_idx);
rc = kvs_put(pmi_kvs_key, val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
#endif #endif
/* get the job map for this job */ /* get the job map for this job */
@ -368,26 +398,18 @@ static int modex(orte_grpcomm_collective_t *coll)
/* get my pidmap entry */ /* get my pidmap entry */
pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, ORTE_PROC_MY_NAME->vpid); pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, ORTE_PROC_MY_NAME->vpid);
/* add our local/node rank info */ rc = pmi_set_proc_attr ("LOCALRANK", &pmap->local_rank, sizeof (pmap->local_rank));
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "LOCALRANK"))) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
snprintf(val, 64, "%lu", (unsigned long)pmap->local_rank); rc = pmi_set_proc_attr ("NODERANK", &pmap->node_rank, sizeof (pmap->node_rank));
rc = kvs_put(pmi_kvs_key, val); if (ORTE_SUCCESS != rc) {
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "NODERANK"))) {
ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
snprintf(val, 64, "%lu", (unsigned long)pmap->node_rank);
rc = kvs_put(pmi_kvs_key, val); rc = pmi_put_last_key ();
if (PMI_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put"); return rc;
return ORTE_ERROR;
} }
/* commit our modex info */ /* commit our modex info */
@ -406,22 +428,14 @@ static int modex(orte_grpcomm_collective_t *coll)
if (v == ORTE_PROC_MY_NAME->vpid) { if (v == ORTE_PROC_MY_NAME->vpid) {
continue; continue;
} }
name.vpid = v; name.vpid = v;
if (ORTE_SUCCESS != (rc = setup_key(&name, "RMLURI"))) { rc = pmi_get_proc_attr (name, "RMLURI", (void **) &rml_uri, &len);
ORTE_ERROR_LOG(rc); if (ORTE_SUCCESS != rc) {
return rc; return rc;
} }
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
/* Had to encode to protect against pmi2-prohibited chars */
rml_uri = pmi_decode(&len);
if (NULL == rml_uri) {
return ORTE_ERROR;
}
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s oob endpoint %s", "%s grpcomm:pmi: proc %s oob endpoint %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -433,40 +447,38 @@ static int modex(orte_grpcomm_collective_t *coll)
} }
free(rml_uri); free(rml_uri);
if (ORTE_SUCCESS != (rc = setup_key(&name, "HOSTNAME"))) { rc = pmi_get_proc_attr (name, "HOSTNAME", &tmp_val, &len);
ORTE_ERROR_LOG(rc); if (ORTE_SUCCESS != rc) {
return rc; return rc;
} }
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s location %s", "%s grpcomm:pmi: proc %s location %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), pmi_attr_val)); ORTE_NAME_PRINT(&name), (char *) tmp_val));
/* see if this node is already in nidmap */ /* see if this node is already in nidmap */
loc = NULL; for (i = 0, loc = NULL; i < orte_nidmap.size; i++) {
for (i=0; i < orte_nidmap.size; i++) {
if (NULL == (nid = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { if (NULL == (nid = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) {
continue; continue;
} }
if (0 == strcmp(pmi_attr_val, nid->name)) { if (0 == strcmp(tmp_val, nid->name)) {
/* found it */ /* found it */
loc = nid; loc = nid;
free (tmp_val);
break; break;
} }
} }
if (NULL == loc) { if (NULL == loc) {
/* new node - save it */ /* new node - save it */
loc = OBJ_NEW(orte_nid_t); loc = OBJ_NEW(orte_nid_t);
loc->name = strdup(pmi_attr_val); loc->name = tmp_val;
loc->index = opal_pointer_array_add(&orte_nidmap, loc); loc->index = opal_pointer_array_add(&orte_nidmap, loc);
loc->daemon = loc->index; loc->daemon = loc->index;
/* keep track */ /* keep track */
orte_process_info.num_nodes++; orte_process_info.num_nodes++;
} }
/* see if this proc is already in the pidmap */ /* see if this proc is already in the pidmap */
if (NULL == (pmap = opal_pointer_array_get_item(&jmap->pmap, v))) { if (NULL == (pmap = opal_pointer_array_get_item(&jmap->pmap, v))) {
/* nope - add it */ /* nope - add it */
@ -477,46 +489,27 @@ static int modex(orte_grpcomm_collective_t *coll)
return rc; return rc;
} }
} }
/* get the proc's local/node rank info */
if (ORTE_SUCCESS != (rc = setup_key(&name, "LOCALRANK"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
pmap->local_rank = (orte_local_rank_t)strtoul(pmi_attr_val, NULL, 10);
if (ORTE_SUCCESS != (rc = setup_key(&name, "NODERANK"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
pmap->node_rank = (orte_node_rank_t)strtoul(pmi_attr_val, NULL, 10);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s lrank %u nrank %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name),
(unsigned int)pmap->local_rank,
(unsigned int)pmap->node_rank));
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
{ {
opal_hwloc_level_t bind_level; opal_hwloc_level_t bind_level;
unsigned int bind_idx; unsigned int bind_idx;
/* get the proc's locality info, if available */ /* get the proc's locality info, if available */
if (ORTE_SUCCESS != (rc = setup_key(&name, "BIND_LEVEL"))) { pmi_get_proc_attr (name, "BIND_LEVEL", &tmp_val, &len);
ORTE_ERROR_LOG(rc); if (ORTE_SUCCESS == rc && 0 < len) {
return rc; assert (len == sizeof (bind_level));
memmove (&bind_level, tmp_val, len);
free (tmp_val);
} }
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
/* don't error out here - if not found, that's okay */ rc = pmi_get_proc_attr (name, "BIND_IDX", &tmp_val, &len);
if (PMI_SUCCESS == rc) { if (ORTE_SUCCESS == rc && 0 < len) {
assert (len == sizeof (bind_idx));
memmove (&bind_idx, tmp_val, len);
free (tmp_val);
}
if (name.jobid == ORTE_PROC_MY_NAME->jobid && if (name.jobid == ORTE_PROC_MY_NAME->jobid &&
name.vpid == ORTE_PROC_MY_NAME->vpid) { name.vpid == ORTE_PROC_MY_NAME->vpid) {
/* if this data is from myself, then set locality to all */ /* if this data is from myself, then set locality to all */
@ -524,38 +517,48 @@ static int modex(orte_grpcomm_collective_t *coll)
} else if (loc->daemon != ORTE_PROC_MY_DAEMON->vpid) { } else if (loc->daemon != ORTE_PROC_MY_DAEMON->vpid) {
/* this is on a different node, then mark as non-local */ /* this is on a different node, then mark as non-local */
pmap->locality = OPAL_PROC_NON_LOCAL; pmap->locality = OPAL_PROC_NON_LOCAL;
} else if (0 == strlen(pmi_attr_val)){ } else if (0 == len) {
/* if we share a node, but we don't know anything more, then /* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know * mark us as on the node as this is all we know
*/ */
pmap->locality = OPAL_PROC_ON_NODE; pmap->locality = OPAL_PROC_ON_NODE;
} else { } else {
bind_level = strtol(pmi_attr_val, NULL, 10);
if (ORTE_SUCCESS != (rc = setup_key(&name, "BIND_IDX"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
/* all we know is we share a node */
pmap->locality = OPAL_PROC_ON_NODE;
} else {
bind_idx = strtol(pmi_attr_val, NULL, 10);
/* determine relative location on our node */ /* determine relative location on our node */
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
orte_process_info.bind_level, orte_process_info.bind_level,
orte_process_info.bind_idx, orte_process_info.bind_idx,
bind_level, bind_idx); bind_level, bind_idx);
} }
}
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale %s", "%s grpcomm:pmi setting proc %s locale %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), ORTE_NAME_PRINT(&name),
opal_hwloc_base_print_locality(pmap->locality))); opal_hwloc_base_print_locality(pmap->locality)));
} }
}
#endif #endif
/* get the proc's local/node rank info */
rc = pmi_get_proc_attr (name, "LOCALRANK", &tmp_val, &len);
if (ORTE_SUCCESS != rc) {
return rc;
}
assert (len == sizeof (pmap->local_rank));
memmove (&pmap->local_rank, tmp_val, len);
free (tmp_val);
rc = pmi_get_proc_attr (name, "NODERANK", &tmp_val, &len);
if (ORTE_SUCCESS != rc) {
return rc;
}
assert (len == sizeof (pmap->node_rank));
memmove (&pmap->node_rank, tmp_val, len);
free (tmp_val);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s lrank %u nrank %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name),
(unsigned int)pmap->local_rank,
(unsigned int)pmap->node_rank));
} }
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
@ -576,50 +579,110 @@ static int purge_proc_attrs(void)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static inline unsigned char pmi_base64_encsym (unsigned char value) {
assert (value < 64);
if (value < 26) {
return 'A' + value;
} else if (value < 52) {
return 'a' + (value - 26);
} else if (value < 62) {
return '0' + (value - 52);
}
return (62 == value) ? '+' : '/';
}
static inline unsigned char pmi_base64_decsym (unsigned char value) {
if ('+' == value) {
return 62;
} else if ('/' == value) {
return 63;
} else if (' ' == value) {
return 64;
} else if (value <= '9') {
return (value - '0') + 52;
} else if (value <= 'Z') {
return (value - 'A');
} else if (value <= 'z') {
return (value - 'a') + 26;
}
return 64;
}
static inline void pmi_base64_encode_block (unsigned char in[3], unsigned char out[4], int len) {
out[0] = pmi_base64_encsym (in[0] >> 2);
out[1] = pmi_base64_encsym (((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4));
/* Cray PMI doesn't allow = in PMI attributes so pad with spaces */
out[2] = 1 < len ? pmi_base64_encsym(((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)) : ' ';
out[3] = 2 < len ? pmi_base64_encsym(in[2] & 0x3f) : ' ';
}
static inline int pmi_base64_decode_block (unsigned char in[4], unsigned char out[3]) {
char in_dec[4];
in_dec[0] = pmi_base64_decsym (in[0]);
in_dec[1] = pmi_base64_decsym (in[1]);
in_dec[2] = pmi_base64_decsym (in[2]);
in_dec[3] = pmi_base64_decsym (in[3]);
out[0] = in_dec[0] << 2 | in_dec[1] >> 4;
if (64 == in_dec[2]) {
return 1;
}
out[1] = in_dec[1] << 4 | in_dec[2] >> 2;
if (64 == in_dec[3]) {
return 2;
}
out[2] = ((in_dec[2] << 6) & 0xc0) | in_dec[3];
return 3;
}
/* PMI only supports strings. For now, do a simple base16 /* PMI only supports strings. For now, do a simple base16
* encoding. Should do something smarter, both with the * encoding. Should do something smarter, both with the
* algorith used and its implementation. */ * algorith used and its implementation. */
static int pmi_encode(const void *val, size_t vallen) { static int pmi_encode(const void *val, size_t vallen) {
static unsigned char encodings[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; unsigned char *tmp = (unsigned char *) pmi_attr_val;
size_t i; size_t i;
/* check for size */ /* check for size */
if ((size_t)pmi_vallen_max < ((vallen * 2) + 1)) { if ((size_t)pmi_vallen_max < (2 + vallen * 4) / 3 + 1) {
return ORTE_ERR_VALUE_OUT_OF_BOUNDS; return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
} }
for (i = 0; i < vallen; i++) {
pmi_attr_val[2 * i] = encodings[((unsigned char *)val)[i] & 0xf]; for (i = 0 ; i < vallen ; i += 3, tmp += 4) {
pmi_attr_val[2 * i + 1] = encodings[((unsigned char *)val)[i] >> 4]; pmi_base64_encode_block ((unsigned char *) val + i, tmp, vallen - i);
} }
pmi_attr_val[vallen * 2] = '\0';
tmp[0] = '\0';
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void *pmi_decode (size_t *retlen) { static void *pmi_decode (size_t *retlen) {
size_t input_len = strlen (pmi_attr_val) / 4;
unsigned char *ret, *val; unsigned char *ret, *val;
int out_len;
size_t i; size_t i;
*retlen = strlen(pmi_attr_val)/2;
ret = calloc(1, *retlen + 1); ret = calloc (1, 3 * input_len + 1);
if (NULL == ret) { if (NULL == ret) {
return ret; return ret;
} }
val = (unsigned char *) pmi_attr_val; val = (unsigned char *) pmi_attr_val;
for (i = 0; i < *retlen; i++) { for (i = 0, out_len = 0 ; i < input_len ; i++, val += 4) {
if (*val >= '0' && *val <= '9') { out_len += pmi_base64_decode_block (val, ret + 3 * i);
ret[i] = *val - '0';
} else {
ret[i] = *val - 'a' + 10;
}
val++;
if (*val >= '0' && *val <= '9') {
ret[i] |= ((*val - '0') << 4);
} else {
ret[i] |= ((*val - 'a' + 10) << 4);
}
val++;
} }
ret[out_len] = '\0';
*retlen = out_len;
return ret; return ret;
} }
@ -641,6 +704,11 @@ static int setup_pmi(void)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
pmi_packed_data = malloc (pmi_vallen_max);
if (NULL == pmi_packed_data) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
#if WANT_CRAY_PMI2_EXT #if WANT_CRAY_PMI2_EXT
/* TODO -- is this ok */ /* TODO -- is this ok */
max_length = 1024; max_length = 1024;