1
1

Cleanup the pmi grpcomm module so it passes non-btl modex data correctly.

This commit was SVN r26992.
Этот коммит содержится в:
Ralph Castain 2012-08-10 20:35:50 +00:00
родитель 159bd2e62e
Коммит c4ee297a60
2 изменённых файлов: 83 добавлений и 17 удалений

Просмотреть файл

@ -193,9 +193,9 @@ static int store(const orte_process_name_t *proc,
opal_byte_object_t *boptr; opal_byte_object_t *boptr;
OPAL_OUTPUT_VERBOSE((5, orte_db_base.output, OPAL_OUTPUT_VERBOSE((5, orte_db_base.output,
"%s db:hash:store: storing key %s data type %d for proc %s", "%s db:hash:store: storing key %s[%s] for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
key, (int)type, ORTE_NAME_PRINT(proc))); key, opal_dss.lookup_data_type(type), ORTE_NAME_PRINT(proc)));
/* get the job data object for this proc */ /* get the job data object for this proc */
jtable = NULL; jtable = NULL;
@ -331,9 +331,11 @@ static int fetch(const orte_process_name_t *proc,
opal_byte_object_t *boptr; opal_byte_object_t *boptr;
OPAL_OUTPUT_VERBOSE((5, orte_db_base.output, OPAL_OUTPUT_VERBOSE((5, orte_db_base.output,
"%s db:hash:fetch: searching for key %s on proc %s", "%s db:hash:fetch: searching for key %s[%s] on proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == key) ? "NULL" : key, ORTE_NAME_PRINT(proc))); (NULL == key) ? "NULL" : key,
opal_dss.lookup_data_type(type),
ORTE_NAME_PRINT(proc)));
/* if the key is NULL, that is an error */ /* if the key is NULL, that is an error */
if (NULL == key) { if (NULL == key) {

Просмотреть файл

@ -337,7 +337,7 @@ static int pmi_get_proc_attr(const orte_process_name_t name,
/*** MODEX SECTION ***/ /*** MODEX SECTION ***/
static int modex(orte_grpcomm_collective_t *coll) static int modex(orte_grpcomm_collective_t *coll)
{ {
int rc; int rc, ival;
size_t len; size_t len;
char *rml_uri; char *rml_uri;
orte_vpid_t v; orte_vpid_t v;
@ -348,7 +348,7 @@ static int modex(orte_grpcomm_collective_t *coll)
opal_list_t modex_data; opal_list_t modex_data;
opal_value_t *kv; opal_value_t *kv;
uint32_t arch; uint32_t arch;
uint8_t th_level; uint16_t ui16;
opal_byte_object_t bo; opal_byte_object_t bo;
char *hostname; char *hostname;
@ -600,31 +600,95 @@ static int modex(orte_grpcomm_collective_t *coll)
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
return rc; return rc;
} }
assert (len == sizeof (uint8_t)); bo.bytes = tmp_val;
memmove (&th_level, tmp_val, len); bo.size = len;
free (tmp_val);
bo.bytes = &th_level;
bo.size = 1;
if (ORTE_SUCCESS != (rc = orte_db.store(&name, "MPI_THREAD_LEVEL", (void*)&bo, OPAL_BYTE_OBJECT))) { if (ORTE_SUCCESS != (rc = orte_db.store(&name, "MPI_THREAD_LEVEL", (void*)&bo, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
free(tmp_val);
/* harvest all btl info that we know about and store it */ /* harvest all other info for keys we know about and store it */
OBJ_CONSTRUCT(&modex_data, opal_list_t); OBJ_CONSTRUCT(&modex_data, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_db.fetch_multiple(ORTE_PROC_MY_NAME, "btl.*", &modex_data))) { if (ORTE_SUCCESS != (rc = orte_db.fetch_multiple(ORTE_PROC_MY_NAME, NULL, &modex_data))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&modex_data))) { while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&modex_data))) {
if (ORTE_SUCCESS != (rc = pmi_get_proc_attr(name, kv->key, &tmp_val, &len))) { /* if this is an entry we already handled, then don't include it here */
return rc; if (0 == strcmp(kv->key, ORTE_DB_HOSTNAME) ||
0 == strcmp(kv->key, ORTE_DB_DAEMON_VPID) ||
0 == strcmp(kv->key, ORTE_DB_NODERANK) ||
0 == strcmp(kv->key, ORTE_DB_LOCALRANK) ||
0 == strcmp(kv->key, ORTE_DB_BIND_LEVEL) ||
0 == strcmp(kv->key, ORTE_DB_BIND_INDEX)) {
/* do NOT release the kv object here as we only
* have a pointer to it!
*/
continue;
} }
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, (void*)&(kv->data.bo), OPAL_BYTE_OBJECT))) { if (ORTE_SUCCESS != (rc = pmi_get_proc_attr(name, kv->key, &tmp_val, &len))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
OBJ_RELEASE(kv); OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:pmi: got modex value for proc %s key %s[%s] len %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), kv->key,
opal_dss.lookup_data_type(kv->type), (int)len));
/* must be stored as same type so the fetch works correctly */
switch (kv->type) {
case OPAL_STRING:
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, tmp_val, kv->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
free(tmp_val);
break;
case OPAL_INT:
assert (len == sizeof (int));
memmove(&ival, tmp_val, len);
free(tmp_val);
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, &ival, kv->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
break;
case ORTE_VPID:
case OPAL_UINT32:
assert (len == sizeof (uint32_t));
memmove(&arch, tmp_val, len);
free(tmp_val);
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, &arch, kv->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
break;
case OPAL_UINT16:
assert (len == sizeof (uint16_t));
memmove(&ui16, tmp_val, len);
free(tmp_val);
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, &ui16, kv->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
break;
case OPAL_BYTE_OBJECT:
bo.bytes = (uint8_t*)tmp_val;
bo.size = len;
if (ORTE_SUCCESS != (rc = orte_db.store(&name, kv->key, (void*)&bo, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
free(tmp_val);
break;
default:
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
return ORTE_ERR_NOT_SUPPORTED;
}
/* do NOT release the kv object here as we only
* have a pointer to it!
*/
} }
OBJ_DESTRUCT(&modex_data); OBJ_DESTRUCT(&modex_data);
} }