1
1

Several things in this commit - shouldn't impact any existing work:

1. Added pid_t to the dps

2. Processes now "register" their local pid and update their location (i.e., nodename) on the registry during mpi_init

3. Added a new error code for values that exceed maximum for their data type (useful when transitioning a value from one variable to another of different size)

4. Fixed a few places where size_t was being incorrectly handled

5. Updated dps_test to cover pid_t types

This should now provide support for TotalView connection - which David is pursuing.

This commit was SVN r5623.
Этот коммит содержится в:
Ralph Castain 2005-05-06 17:00:06 +00:00
родитель 22685e5055
Коммит 659d57f300
27 изменённых файлов: 349 добавлений и 33 удалений

Просмотреть файл

@ -90,6 +90,21 @@ extern "C" {
#error Unsupported int size!
#endif
/*
* ORTE type corresponding to pid_t
*/
#if SIZEOF_PID_T == 1
#define DPS_TYPE_PID_T ORTE_UINT8
#elif SIZEOF_PID_T == 2
#define DPS_TYPE_PID_T ORTE_UINT16
#elif SIZEOF_PID_T == 4
#define DPS_TYPE_PID_T ORTE_UINT32
#elif SIZEOF_PID_T == 8
#define DPS_TYPE_PID_T ORTE_UINT64
#else
#error Unsupported pid_t size!
#endif
/**
* Internal struct used for holding registered dps functions
*/
@ -172,6 +187,9 @@ extern orte_pointer_array_t *orte_dps_types;
int orte_dps_pack_sizet(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_dps_pack_pid(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_dps_pack_string(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
@ -205,6 +223,9 @@ extern orte_pointer_array_t *orte_dps_types;
int orte_dps_unpack_sizet(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_dps_unpack_pid(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_dps_unpack_string(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -152,6 +152,23 @@ int orte_dps_pack_sizet(orte_buffer_t *buffer, void *src,
return ret;
}
/*
* PID_T
*/
int orte_dps_pack_pid(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type)
{
int ret;
/* Turn around and pack the real type */
if (ORTE_SUCCESS != (
ret = orte_dps_pack_buffer(buffer, src, num_vals, DPS_TYPE_PID_T))) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */

Просмотреть файл

@ -204,6 +204,23 @@ int orte_dps_unpack_sizet(orte_buffer_t *buffer, void *dest,
return ret;
}
/*
* PID_T
*/
int orte_dps_unpack_pid(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type)
{
int ret;
/* Turn around and unpack the real type */
if (ORTE_SUCCESS != (
ret = orte_dps_unpack_buffer(buffer, dest, num_vals, DPS_TYPE_PID_T))) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */

Просмотреть файл

@ -213,6 +213,13 @@ int orte_dps_open(void)
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = ORTE_PID;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_dps_pack_pid,
orte_dps_unpack_pid,
"ORTE_PID", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = ORTE_STRING;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_dps_pack_string,
orte_dps_unpack_string,

Просмотреть файл

@ -65,7 +65,8 @@ enum {
OMPI_ERR_NOT_AVAILABLE = -43,
OMPI_ERR_GPR_DATA_CORRUPT = -44,
OMPI_ERR_PERM = -45, /* no permission */
OMPI_ERR_TYPE_MISMATCH = -46
OMPI_ERR_TYPE_MISMATCH = -46,
OMPI_ERR_VALUE_OUT_OF_BOUNDS = -47
};
#define OMPI_NAMESPACE_SEGMENT "ompi-namespace"

Просмотреть файл

@ -70,7 +70,8 @@ enum {
ORTE_ERR_NOT_AVAILABLE = -43,
ORTE_ERR_GPR_DATA_CORRUPT = -44,
ORTE_ERR_PERM = -45, /* no permission */
ORTE_ERR_TYPE_MISMATCH = -46
ORTE_ERR_TYPE_MISMATCH = -46,
ORTE_ERR_VALUE_OUT_OF_BOUNDS = -47
};
#endif /* ORTE_CONSTANTS_H */

Просмотреть файл

@ -99,4 +99,11 @@ typedef struct {
#define ORTE_SIZE_T_PRINTF "%lu"
#endif
/* define a print format to handle the variations in pid_t */
#if SIZEOF_PID_T == SIZEOF_INT
#define ORTE_PID_T_PRINTF "%u"
#elif SIZEOF_PID_T == SIZEOF_LONG
#define ORTE_PID_T_PRINTF "%lu"
#endif
#endif

Просмотреть файл

@ -222,6 +222,11 @@ void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, orte_gpr_keyval_t *i
orte_gpr_base_dump_load_string(buffer, &tmp_out);
break;
case ORTE_PID:
asprintf(&tmp_out, "\t\t\tData type: ORTE_PID:\tValue: " ORTE_PID_T_PRINTF, iptr->value.pid);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
break;
case ORTE_INT:
asprintf(&tmp_out, "\t\t\tData type: ORTE_INT: no value field");
orte_gpr_base_dump_load_string(buffer, &tmp_out);

Просмотреть файл

@ -31,6 +31,10 @@
#include "orte_config.h"
#include "include/orte_types.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#include "mca/schema/schema.h"
#include "class/ompi_object.h"
#include "dps/dps_types.h"
@ -126,6 +130,7 @@ typedef uint16_t orte_gpr_addr_mode_t;
typedef union { /* shared storage for the value */
char *strptr;
size_t size;
pid_t pid;
uint8_t ui8;
uint16_t ui16;
uint32_t ui32;

Просмотреть файл

@ -488,6 +488,11 @@ void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
iptr->value.size);
break;
case ORTE_PID:
sprintf(tmp, "\t\tData type: ORTE_PID\tValue: " ORTE_PID_T_PRINTF,
iptr->value.pid);
break;
case ORTE_INT:
sprintf(tmp, "\t\tData type: ORTE_INT\tValue: %d", (int)iptr->value.i32);
break;

Просмотреть файл

@ -293,6 +293,10 @@ int orte_gpr_replica_get_value(void *value, orte_gpr_replica_itagval_t *ival)
*((size_t*)value) = src->size;
break;
case ORTE_PID:
*((pid_t*)value) = src->pid;
break;
case ORTE_UINT8:
*((uint8_t*)value) = src->ui8;
break;
@ -377,6 +381,10 @@ int orte_gpr_replica_xfer_payload(orte_gpr_value_union_t *dest,
dest->size = src->size;
break;
case ORTE_PID:
dest->pid = src->pid;
break;
case ORTE_STRING:
dest->strptr = strdup(src->strptr);
if (NULL == dest->strptr) {
@ -532,6 +540,16 @@ int orte_gpr_replica_compare_values(int *cmp, orte_gpr_replica_itagval_t *ival1,
}
break;
case ORTE_PID:
if (ival1->value.pid == ival2->value.pid) {
*cmp = 0;
} else if (ival1->value.pid < ival2->value.pid) {
*cmp = -1;
} else {
*cmp = 1;
}
break;
case ORTE_UINT8:
if (ival1->value.ui8 == ival2->value.ui8) {
*cmp = 0;

Просмотреть файл

@ -88,7 +88,8 @@ int orte_ns_base_set_my_name(void)
int orte_ns_base_get_peers(orte_process_name_t **procs,
size_t *num_procs, size_t *self)
{
int i, rc;
size_t i;
int rc;
orte_cellid_t mycellid;
orte_jobid_t myjobid;
orte_vpid_t myvpid;
@ -120,7 +121,7 @@ int orte_ns_base_get_peers(orte_process_name_t **procs,
(*procs)[i].vpid = orte_process_info.vpid_start + i;
}
*num_procs = (size_t)orte_process_info.num_procs;
*num_procs = orte_process_info.num_procs;
*self = (size_t)(myvpid - orte_process_info.vpid_start);
return ORTE_SUCCESS;

Просмотреть файл

@ -114,8 +114,8 @@ int orte_ns_nds_env_get(void)
return ORTE_ERR_NOT_FOUND;
}
orte_process_info.vpid_start = vpid_start;
orte_process_info.num_procs = num_procs;
orte_process_info.vpid_start = (orte_vpid_t)vpid_start;
orte_process_info.num_procs = (size_t)num_procs;
return ORTE_SUCCESS;
}
@ -185,7 +185,7 @@ int orte_ns_nds_env_put(const orte_process_name_t* name,
free(param);
free(vpid);
asprintf(&value, "%d", vpid_start);
asprintf(&value, ORTE_SIZE_T_PRINTF, vpid_start);
if(NULL == (param = mca_base_param_environ_variable("ns","nds","vpid_start"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
@ -194,7 +194,7 @@ int orte_ns_nds_env_put(const orte_process_name_t* name,
free(param);
free(value);
asprintf(&value, "%d", (int)num_procs);
asprintf(&value, ORTE_SIZE_T_PRINTF, num_procs);
if(NULL == (param = mca_base_param_environ_variable("ns","nds","num_procs"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -25,6 +25,7 @@
#include "mca/gpr/gpr.h"
#include "mca/soh/soh_types.h"
#include "mca/errmgr/errmgr.h"
#include "mca/schema/schema.h"
/**
@ -36,7 +37,7 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t* name, pid_t pid)
{
orte_gpr_value_t* values[1];
orte_gpr_value_t value;
orte_gpr_keyval_t kv_pid = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_PROC_PID_KEY,ORTE_UINT32};
orte_gpr_keyval_t kv_pid = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_PROC_PID_KEY,ORTE_PID};
orte_gpr_keyval_t kv_state = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_PROC_STATE_KEY,ORTE_PROC_STATE};
orte_gpr_keyval_t* keyvals[2];
size_t i;
@ -53,7 +54,7 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t* name, pid_t pid)
return rc;
}
kv_pid.value.ui32 = pid;
kv_pid.value.pid = pid;
kv_state.value.proc_state = ORTE_PROC_STATE_LAUNCHED;
keyvals[0] = &kv_pid;
keyvals[1] = &kv_state;
@ -126,7 +127,7 @@ int orte_pls_base_get_proc_pid(const orte_process_name_t* name, pid_t* pid)
ORTE_ERROR_LOG(rc);
goto cleanup;
}
*pid = values[0]->keyvals[0]->value.ui32;
*pid = values[0]->keyvals[0]->value.pid;
cleanup:
if(NULL != values) {
@ -179,7 +180,7 @@ int orte_pls_base_get_proc_pids(orte_jobid_t jobid, pid_t **pids, size_t* num_pi
} else {
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
for(i=0; i<num_values; i++) {
(*pids)[i] = values[i]->keyvals[0]->value.ui32;
(*pids)[i] = values[i]->keyvals[0]->value.pid;
}
}
*num_pids = num_values;
@ -206,7 +207,7 @@ int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid
{
orte_gpr_value_t* values[1];
orte_gpr_value_t value;
orte_gpr_keyval_t kv_pid = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_PROC_PID_KEY,ORTE_UINT32};
orte_gpr_keyval_t kv_pid = {{OBJ_CLASS(orte_gpr_keyval_t),0},ORTE_PROC_PID_KEY,ORTE_PID};
orte_gpr_keyval_t* keyvals[1];
char* jobid_string;
size_t i;
@ -221,7 +222,7 @@ int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid
asprintf(&kv_pid.key, "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
free(jobid_string);
kv_pid.value.ui32 = pid;
kv_pid.value.pid = pid;
keyvals[0] = &kv_pid;
value.segment = ORTE_NODE_SEGMENT;
@ -280,7 +281,7 @@ int orte_pls_base_get_node_pids(orte_jobid_t jobid, pid_t **pids, size_t* num_pi
} else {
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
for(i=0; i<num_values; i++) {
(*pids)[i] = values[i]->keyvals[0]->value.ui32;
(*pids)[i] = values[i]->keyvals[0]->value.pid;
}
}
*num_pids = num_values;

Просмотреть файл

@ -20,7 +20,7 @@
* entire components just to query their version and parameters.
*/
#include "ompi_config.h"
#include "orte_config.h"
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
@ -265,8 +265,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
goto cleanup;
}
/* need integer value for command line parameter - NOT hex */
asprintf(&jobid_string, "%lu", (unsigned long)jobid);
/* need integer value for command line parameter */
asprintf(&jobid_string, ORTE_SIZE_T_PRINTF, jobid);
/*
* Build argv/env arrays.

Просмотреть файл

@ -65,6 +65,8 @@ OBJ_CLASS_INSTANCE(
static void orte_rmaps_base_proc_construct(orte_rmaps_base_proc_t* proc)
{
proc->proc_node = NULL;
proc->pid = 0;
proc->local_pid = 0;
}
static void orte_rmaps_base_proc_destruct(orte_rmaps_base_proc_t* proc)
@ -210,20 +212,25 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (mapping = malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_str, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -235,6 +242,7 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map->num_procs = 0;
@ -250,8 +258,10 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc)
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
@ -268,6 +278,7 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -284,13 +295,21 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
size_t app_index = keyval->value.size;
if(app_index >= num_context) {
ompi_output(0, "orte_rmaps_base_get_map: invalid context\n");
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
proc->pid = keyval->value.pid;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
proc->local_pid = keyval->value.pid;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
node_name = keyval->value.strptr;
continue;
@ -358,20 +377,25 @@ int orte_rmaps_base_get_node_map(
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (mapping = malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_str, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -384,6 +408,7 @@ int orte_rmaps_base_get_node_map(
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map->num_procs = 0;
@ -399,8 +424,10 @@ int orte_rmaps_base_get_node_map(
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc)
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
@ -417,6 +444,7 @@ int orte_rmaps_base_get_node_map(
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -435,11 +463,20 @@ int orte_rmaps_base_get_node_map(
if(app_index >= num_context) {
ompi_output(0, "orte_rmaps_base_get_map: invalid context\n");
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
proc->pid = keyval->value.pid;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
proc->local_pid = keyval->value.pid;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
node_name = keyval->value.strptr;
continue;
@ -519,13 +556,16 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
num_procs += map->num_procs;
}
if(num_procs == 0)
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* allocate value array */
size = sizeof(orte_gpr_value_t*) * num_procs;
values = (orte_gpr_value_t**)malloc(size);
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<num_procs; i++) {
@ -536,6 +576,7 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
OBJ_RELEASE(values[j]);
}
free(values);
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
@ -554,17 +595,19 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
size_t kv;
/* allocate keyval array */
size = sizeof(orte_gpr_keyval_t*) * 5;
size = sizeof(orte_gpr_keyval_t*) * 7;
keyvals = (orte_gpr_keyval_t**)malloc(size);
if(NULL == keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* allocate keyvals */
for(kv=0; kv < 5; kv++) {
for(kv=0; kv < 7; kv++) {
orte_gpr_keyval_t* value = OBJ_NEW(orte_gpr_keyval_t);
if(value == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
@ -592,20 +635,32 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, ompi_list_t* mapping_list)
keyvals[4]->type = ORTE_PROC_STATE;
keyvals[4]->value.proc_state = ORTE_PROC_STATE_INIT;
value->cnt = 5;
keyvals[5]->key = strdup(ORTE_PROC_PID_KEY);
keyvals[5]->type = ORTE_PID;
keyvals[5]->value.pid = proc->pid;
keyvals[6]->key = strdup(ORTE_PROC_LOCAL_PID_KEY);
keyvals[6]->type = ORTE_PID;
keyvals[6]->value.pid = proc->local_pid;
value->cnt = 7;
value->addr_mode = ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND;
value->keyvals = keyvals;
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&value->segment,jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&value->tokens,&value->num_tokens,&proc->proc_name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
rc = orte_gpr.put(num_procs, values);
if (ORTE_SUCCESS != (rc = orte_gpr.put(num_procs, values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
@ -642,8 +697,10 @@ int orte_rmaps_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t start, orte_v
value.cnt = 2;
values = &value;
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&value.segment, jobid)))
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&value.segment, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
vpid_start.value.vpid = start;
vpid_range.value.vpid = range;
@ -667,8 +724,10 @@ int orte_rmaps_base_get_vpid_range(orte_jobid_t jobid, orte_vpid_t *start, orte_
int rc;
/* query the job segment on the registry */
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid)))
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tokens[0] = ORTE_JOB_GLOBALS;
tokens[1] = NULL;

Просмотреть файл

@ -26,6 +26,10 @@
#include "orte_config.h"
#include "include/orte_constants.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#include "class/ompi_list.h"
#include "mca/mca.h"
#include "mca/ns/ns_types.h"
@ -68,6 +72,8 @@ struct orte_rmaps_base_proc_t {
orte_rmaps_base_node_t* proc_node;
orte_process_name_t proc_name;
size_t proc_rank;
pid_t pid; /* PLS-assigned pid */
pid_t local_pid; /* pid found by local process */
};
typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t;

Просмотреть файл

@ -15,6 +15,8 @@
*/
#include "orte_config.h"
#include "include/orte_constants.h"
#include "include/orte_types.h"
#include <sys/types.h>
#if HAVE_NETINET_IN_H
@ -91,6 +93,11 @@ int orte_rmgr_base_unpack_app_context(orte_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc);
return rc;
}
if (INT_MAX < temp) {
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
}
app_context[i]->argc = (int)temp;
}
/* get the number of env strings */

Просмотреть файл

@ -341,6 +341,7 @@ static int orte_rmgr_urm_finalize(void)
return ORTE_SUCCESS;
}
#if 0
static void orte_rmgr_urm_recv(
int status,
orte_process_name_t* peer,
@ -350,4 +351,5 @@ static void orte_rmgr_urm_recv(
{
return;
}
#endif

Просмотреть файл

@ -59,7 +59,7 @@ int orte_schema_base_get_node_tokens(char ***tokens, size_t* num_tokens, orte_ce
int orte_schema_base_get_cell_tokens(char ***tokens, size_t* num_tokens, orte_cellid_t cellid);
int orte_schema_base_get_job_segment_name(char **name, orte_jobid_t jobid);
int orte_schema_base_extract_jobid_from_segment_name(orte_jobid_t *jobid, char *name);
int orte_schema_base_store_my_info(void);
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -29,7 +29,10 @@
#include "include/orte_constants.h"
#include "util/output.h"
#include "util/proc_info.h"
#include "util/sys_info.h"
#include "mca/ns/ns.h"
#include "mca/gpr/gpr.h"
#include "mca/errmgr/errmgr.h"
#include "mca/schema/base/base.h"
@ -156,3 +159,51 @@ int orte_schema_base_extract_jobid_from_segment_name(orte_jobid_t *jobid, char *
return ORTE_SUCCESS;
}
/**
* Set the process mapping in the registry.
*/
int orte_schema_base_store_my_info(void)
{
int rc = ORTE_SUCCESS;
orte_gpr_value_t value, *values;
orte_gpr_keyval_t local_pid = { {OBJ_CLASS(ompi_object_t),0}, ORTE_PROC_LOCAL_PID_KEY, ORTE_PID };
orte_gpr_keyval_t nodename = { {OBJ_CLASS(ompi_object_t),0}, ORTE_NODE_NAME_KEY, ORTE_STRING };
orte_gpr_keyval_t* keyvals[2];
size_t i;
/* NOTE: cannot destruct the value object since the keyval's are statically
* defined, so don't construct it either
*/
keyvals[0] = &local_pid;
keyvals[1] = &nodename;
value.addr_mode = ORTE_GPR_OVERWRITE;
if (ORTE_SUCCESS != (rc = orte_schema_base_get_proc_tokens(&value.tokens,
&value.num_tokens, orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
value.keyvals = keyvals;
value.cnt = 2;
values = &value;
local_pid.value.pid = orte_process_info.pid;
nodename.value.strptr = strdup(orte_system_info.nodename);
/* insert values into registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &values))) {
ORTE_ERROR_LOG(rc);
}
/* cleanup memory */
for (i=0; i < value.num_tokens; i++) {
free(value.tokens[i]);
}
return rc;
}

Просмотреть файл

@ -41,7 +41,8 @@ OMPI_DECLSPEC orte_schema_base_module_t orte_schema = {
orte_schema_base_get_node_tokens,
orte_schema_base_get_cell_tokens,
orte_schema_base_get_job_segment_name,
orte_schema_base_extract_jobid_from_segment_name
orte_schema_base_extract_jobid_from_segment_name,
orte_schema_base_store_my_info
};

Просмотреть файл

@ -47,6 +47,8 @@ typedef int (*orte_schema_get_job_segment_name_fn_t)(char **name, orte_jobid_t j
typedef int (*orte_schema_extract_jobid_from_segment_name_fn_t)(orte_jobid_t *jobid, char *name);
typedef int (*orte_schema_store_my_info_fn_t)(void);
/*
* Ver 1.0.0
*/
@ -56,6 +58,7 @@ struct orte_schema_base_module_1_0_0_t {
orte_schema_get_cell_tokens_fn_t get_cell_tokens;
orte_schema_get_job_segment_name_fn_t get_job_segment_name;
orte_schema_extract_jobid_from_segment_name_fn_t extract_jobid_from_segment_name;
orte_schema_store_my_info_fn_t store_my_info;
};

Просмотреть файл

@ -70,6 +70,7 @@ extern char *orte_error_strings[];
#define ORTE_PROC_NAME_KEY "orte-proc-name"
#define ORTE_PROC_RANK_KEY "orte-proc-rank"
#define ORTE_PROC_PID_KEY "orte-proc-pid"
#define ORTE_PROC_LOCAL_PID_KEY "orte-proc-local-pid"
#define ORTE_PROC_STATE_KEY "orte-proc-state"
#define ORTE_PROC_APP_CONTEXT_KEY "orte-proc-app-context"
#define ORTE_PROC_EXIT_CODE_KEY "orte-proc-exit-code"

Просмотреть файл

@ -57,6 +57,7 @@
#include "mca/ns/ns.h"
#include "mca/gpr/gpr.h"
#include "mca/rml/rml.h"
#include "mca/schema/schema.h"
#include "mca/soh/soh.h"
#include "mca/soh/base/base.h"
#include "mca/errmgr/errmgr.h"
@ -295,6 +296,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* store our process info on registry */
if (ORTE_SUCCESS != (ret = orte_schema.store_my_info())) {
ORTE_ERROR_LOG(ret);
error = "could not store my info on registry";
goto error;
}
/* Let system know we are at STG1 Barrier */
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
ORTE_PROC_STATE_AT_STG1, 0))) {

Просмотреть файл

@ -107,6 +107,7 @@ char *orte_error_strings[] = {
"ORTE_ERR_NOT_AVAILABLE",
"ORTE_ERR_GPR_DATA_CORRUPT",
"ORTE_ERR_PERM",
"ORTE_ERR_TYPE_MISMATCH"
"ORTE_ERR_TYPE_MISMATCH",
"ORTE_ERR_VALUE_OUT_OF_BOUNDS"
};

Просмотреть файл

@ -47,8 +47,8 @@
#include "../src/mca/rmgr/base/base.h"
#include "../src/mca/soh/base/base.h"
#define NUM_ITERS 2
#define NUM_ELEMS 3
#define NUM_ITERS 3
#define NUM_ELEMS 10
static bool test1(void); /* verify different buffer inits */
static bool test2(void); /* verify int16 */
@ -66,6 +66,7 @@ static bool test11(void); /* verify size_t */
static bool test12(void); /* verify APP_CONTEXT */
static bool test13(void); /* verify ORTE_GPR_SUBSCRIPTION */
static bool test14(void); /* verify ORTE_GPR_NOTIFY_DATA */
static bool test15(void); /* verify pid_t */
FILE *test_out;
@ -284,6 +285,14 @@ int main (int argc, char* argv[])
test_failure("orte_dps test14 failed");
}
fprintf(test_out, "executing test15\n");
if (test15()) {
test_success();
}
else {
test_failure("orte_dps test15 failed");
}
ret = test_finalize();
fclose(test_out);
return ret;
@ -1706,3 +1715,65 @@ static bool test14(void)
return (true);
}
/*
* pid_t pack/unpack
*/
static bool test15(void)
{
orte_buffer_t *bufA;
int rc;
size_t i;
pid_t src[NUM_ELEMS];
pid_t dst[NUM_ELEMS];
for(i=0; i<NUM_ELEMS; i++)
src[i] = (pid_t)i;
bufA = OBJ_NEW(orte_buffer_t);
if (NULL == bufA) {
test_comment("orte_buffer failed init in OBJ_NEW");
fprintf(test_out, "OBJ_NEW failed\n");
return false;
}
for (i=0;i<NUM_ITERS;i++) {
rc = orte_dps.pack(bufA, src, NUM_ELEMS, ORTE_PID);
if (ORTE_SUCCESS != rc) {
test_comment ("orte_dps.pack failed");
fprintf(test_out, "orte_pack pid_t failed with return code %d\n", rc);
return(false);
}
}
for (i=0; i<NUM_ITERS; i++) {
size_t j;
size_t count;
count = NUM_ELEMS;
rc = orte_dps.unpack(bufA, dst, &count, ORTE_PID);
if (ORTE_SUCCESS != rc || count != NUM_ELEMS) {
test_comment ("orte_dps.unpack failed");
fprintf(test_out, "orte_pack pid_t failed with return code %d\n", rc);
return(false);
}
for(j=0; j<NUM_ELEMS; j++) {
if(src[j] != dst[j]) {
test_comment ("test2: invalid results from unpack");
return(false);
}
}
}
OBJ_RELEASE(bufA);
if (NULL != bufA) {
test_comment("OBJ_RELEASE did not NULL the buffer pointer");
fprintf(test_out, "OBJ_RELEASE did not NULL the buffer pointer");
return false;
}
return (true);
}