Коммит
305dc5317b
@ -167,6 +167,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
for (i=0; i < size; i++) {
|
||||
rc = opal_convert_process_name_to_string(&nstring, &(proc_list[i]->super.proc_name));
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
if (!dense) {
|
||||
free(proc_list);
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
opal_argv_append_nosize(&members, nstring);
|
||||
|
@ -581,7 +581,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
|
||||
pmix_info_t *pinfo = NULL;
|
||||
pmix_app_t *papps;
|
||||
size_t napps, n, m, ninfo = 0;
|
||||
char nspace[PMIX_MAX_NSLEN];
|
||||
char nspace[PMIX_MAX_NSLEN+1];
|
||||
opal_pmix_info_t *info;
|
||||
opal_pmix_app_t *app;
|
||||
|
||||
@ -630,7 +630,7 @@ static void spcbfunc(pmix_status_t status,
|
||||
{
|
||||
pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata;
|
||||
int rc;
|
||||
opal_jobid_t jobid;
|
||||
opal_jobid_t jobid=OPAL_JOBID_INVALID;
|
||||
|
||||
rc = pmix1_convert_rc(status);
|
||||
if (PMIX_SUCCESS == status) {
|
||||
@ -855,11 +855,13 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid,
|
||||
if (NULL != nspace) {
|
||||
free(nspace);
|
||||
}
|
||||
PMIX_PROC_FREE(array, nprocs);
|
||||
return rc;
|
||||
}
|
||||
nm->name.vpid = array[n].rank;
|
||||
}
|
||||
}
|
||||
PMIX_PROC_FREE(array, nprocs);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -162,7 +162,7 @@ opal_net_init(void)
|
||||
for( i = 0; i < count; i++ ) {
|
||||
arg = args[i];
|
||||
|
||||
sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits );
|
||||
(void)sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits );
|
||||
|
||||
if( (a > 255) || (b > 255) || (c > 255) ||
|
||||
(d > 255) || (bits > 32) ) {
|
||||
|
@ -91,7 +91,7 @@ static int rte_init(void)
|
||||
size_t sz;
|
||||
int u32, *u32ptr;
|
||||
uint16_t u16, *u16ptr;
|
||||
char **peers, **cpusets, *mycpuset;
|
||||
char **peers=NULL, **cpusets=NULL, *mycpuset;
|
||||
opal_process_name_t name;
|
||||
size_t i;
|
||||
|
||||
@ -326,6 +326,8 @@ static int rte_init(void)
|
||||
ret = opal_pmix.store_local(&name, kv);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
error = "local store of locality";
|
||||
opal_argv_free(cpusets);
|
||||
opal_argv_free(peers);
|
||||
goto error;
|
||||
}
|
||||
OBJ_RELEASE(kv);
|
||||
@ -374,11 +376,15 @@ static int rte_init(void)
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix store local";
|
||||
opal_argv_free(cpusets);
|
||||
opal_argv_free(peers);
|
||||
goto error;
|
||||
}
|
||||
OBJ_RELEASE(kv);
|
||||
}
|
||||
#endif
|
||||
opal_argv_free(peers);
|
||||
opal_argv_free(cpusets);
|
||||
|
||||
/* we don't need to force the routed system to pick the
|
||||
* "direct" component as that should happen automatically
|
||||
|
@ -74,6 +74,7 @@ static bool added_transport_keys=false;
|
||||
static bool added_num_procs = false;
|
||||
static bool added_app_ctx = false;
|
||||
static bool added_pmix_envs = false;
|
||||
static char *pmixenvars[4];
|
||||
|
||||
static int fork_hnp(void);
|
||||
|
||||
@ -415,7 +416,7 @@ static int fork_hnp(void)
|
||||
sigset_t sigs;
|
||||
int buffer_length, num_chars_read, chunk;
|
||||
char *orted_uri;
|
||||
int rc;
|
||||
int rc, i;
|
||||
|
||||
/* A pipe is used to communicate between the parent and child to
|
||||
indicate whether the exec ultimately succeeded or failed. The
|
||||
@ -556,12 +557,14 @@ static int fork_hnp(void)
|
||||
chunk = ORTE_URI_MSG_LGTH-1;
|
||||
num_chars_read = 0;
|
||||
orted_uri = (char*)malloc(buffer_length);
|
||||
memset(orted_uri, 0, buffer_length);
|
||||
|
||||
while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) {
|
||||
/* we read an entire buffer - better get more */
|
||||
num_chars_read += chunk;
|
||||
orted_uri = realloc((void*)orted_uri, buffer_length+ORTE_URI_MSG_LGTH);
|
||||
memset(&orted_uri[buffer_length], 0, ORTE_URI_MSG_LGTH);
|
||||
buffer_length += ORTE_URI_MSG_LGTH;
|
||||
orted_uri = realloc((void*)orted_uri, buffer_length);
|
||||
}
|
||||
num_chars_read += rc;
|
||||
|
||||
@ -612,10 +615,11 @@ static int fork_hnp(void)
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
/* push each piece into the environment */
|
||||
putenv(argv[0]);
|
||||
putenv(argv[1]);
|
||||
putenv(argv[2]);
|
||||
putenv(argv[3]);
|
||||
for (i=0; i < 4; i++) {
|
||||
pmixenvars[i] = strdup(argv[i]);
|
||||
putenv(pmixenvars[i]);
|
||||
}
|
||||
opal_argv_free(argv);
|
||||
added_pmix_envs = true;
|
||||
|
||||
/* all done - report success */
|
||||
|
@ -323,7 +323,9 @@ static void mrorted_complete(const orte_job_t *jdata)
|
||||
/* get the stdout target */
|
||||
stdout_target = ORTE_JOBID_INVALID;
|
||||
jbptr = &stdout_target;
|
||||
orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID);
|
||||
if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* the job is complete - close out the stdin
|
||||
* of any procs it was feeding
|
||||
|
@ -1030,14 +1030,14 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd,
|
||||
"%s connect ack received error %s from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(opal_socket_errno),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
return false;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s tcp_peer_recv_blocking: "
|
||||
"recv() failed for %s: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_TCP_FAILED;
|
||||
|
@ -145,7 +145,9 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata)
|
||||
// associate open channel request and the newly created channel object
|
||||
open_chan->channel = channel;
|
||||
type = &type_val;
|
||||
orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8);
|
||||
if (!orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
|
||||
return;
|
||||
}
|
||||
open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
"%s rml_open_channel type = %d to peer %s ",
|
||||
@ -343,7 +345,10 @@ void orte_rml_open_channel_recv_callback (int status,
|
||||
/* unpack attributes first */
|
||||
if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) {
|
||||
type = &type_val;
|
||||
orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8);
|
||||
if (!orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
|
||||
OPAL_LIST_DESTRUCT(&qos_attributes);
|
||||
return;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||
"rml_open_channel_recv_callback type =%d",
|
||||
type_val));
|
||||
@ -398,6 +403,7 @@ void orte_rml_open_channel_recv_callback (int status,
|
||||
//reply with error message
|
||||
send_open_channel_reply (peer, NULL, false);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&qos_attributes);
|
||||
}
|
||||
|
||||
static int send_open_channel_reply (orte_process_name_t *peer,
|
||||
|
@ -457,8 +457,9 @@ static void set(orte_job_t *jdata,
|
||||
|
||||
/* see if the job has the min freq attribute set */
|
||||
fptr = &minfreq;
|
||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_MIN_FREQ, (void**)&fptr, OPAL_FLOAT)) {
|
||||
minfreq = -1.0;
|
||||
orte_get_attribute(&jdata->attributes, ORTE_JOB_MIN_FREQ, (void**)&fptr, OPAL_FLOAT);
|
||||
}
|
||||
|
||||
/* see if the job has the max freq attribute set */
|
||||
fptr = &freq;
|
||||
|
@ -443,13 +443,15 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
|
||||
/* check the request out of the tracking hotel */
|
||||
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
||||
/* return the returned data to the requestor */
|
||||
if (NULL != req && NULL != req->mdxcbfunc) {
|
||||
if (NULL != req) {
|
||||
if (NULL != req->mdxcbfunc) {
|
||||
req->mdxcbfunc(ret, (char*)data, ndata, req->cbdata, relcbfunc, data);
|
||||
}
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
if (NULL != data) {
|
||||
free(data);
|
||||
}
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
|
||||
|
||||
|
@ -455,6 +455,7 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
||||
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
||||
|
||||
release:
|
||||
if (NULL != req) {
|
||||
/* pass down the response */
|
||||
if (NULL != req->opcbfunc) {
|
||||
req->opcbfunc(ret, req->cbdata);
|
||||
@ -465,4 +466,5 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(req);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -63,9 +63,9 @@ static void opcbfunc(int status, void *cbdata)
|
||||
opal_list_remove_item(lt, &k1->super);
|
||||
OBJ_RELEASE(k1);
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(lt);
|
||||
}
|
||||
}
|
||||
|
||||
/* stuff proc attributes for sending back to a proc */
|
||||
int orte_pmix_server_register_nspace(orte_job_t *jdata)
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user