Коммит
305dc5317b
@ -167,6 +167,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
for (i=0; i < size; i++) {
|
for (i=0; i < size; i++) {
|
||||||
rc = opal_convert_process_name_to_string(&nstring, &(proc_list[i]->super.proc_name));
|
rc = opal_convert_process_name_to_string(&nstring, &(proc_list[i]->super.proc_name));
|
||||||
if (OPAL_SUCCESS != rc) {
|
if (OPAL_SUCCESS != rc) {
|
||||||
|
if (!dense) {
|
||||||
|
free(proc_list);
|
||||||
|
}
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
opal_argv_append_nosize(&members, nstring);
|
opal_argv_append_nosize(&members, nstring);
|
||||||
|
@ -581,7 +581,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
|
|||||||
pmix_info_t *pinfo = NULL;
|
pmix_info_t *pinfo = NULL;
|
||||||
pmix_app_t *papps;
|
pmix_app_t *papps;
|
||||||
size_t napps, n, m, ninfo = 0;
|
size_t napps, n, m, ninfo = 0;
|
||||||
char nspace[PMIX_MAX_NSLEN];
|
char nspace[PMIX_MAX_NSLEN+1];
|
||||||
opal_pmix_info_t *info;
|
opal_pmix_info_t *info;
|
||||||
opal_pmix_app_t *app;
|
opal_pmix_app_t *app;
|
||||||
|
|
||||||
@ -630,7 +630,7 @@ static void spcbfunc(pmix_status_t status,
|
|||||||
{
|
{
|
||||||
pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata;
|
pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata;
|
||||||
int rc;
|
int rc;
|
||||||
opal_jobid_t jobid;
|
opal_jobid_t jobid=OPAL_JOBID_INVALID;
|
||||||
|
|
||||||
rc = pmix1_convert_rc(status);
|
rc = pmix1_convert_rc(status);
|
||||||
if (PMIX_SUCCESS == status) {
|
if (PMIX_SUCCESS == status) {
|
||||||
@ -855,11 +855,13 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid,
|
|||||||
if (NULL != nspace) {
|
if (NULL != nspace) {
|
||||||
free(nspace);
|
free(nspace);
|
||||||
}
|
}
|
||||||
|
PMIX_PROC_FREE(array, nprocs);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
nm->name.vpid = array[n].rank;
|
nm->name.vpid = array[n].rank;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PMIX_PROC_FREE(array, nprocs);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -162,7 +162,7 @@ opal_net_init(void)
|
|||||||
for( i = 0; i < count; i++ ) {
|
for( i = 0; i < count; i++ ) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
|
|
||||||
sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits );
|
(void)sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits );
|
||||||
|
|
||||||
if( (a > 255) || (b > 255) || (c > 255) ||
|
if( (a > 255) || (b > 255) || (c > 255) ||
|
||||||
(d > 255) || (bits > 32) ) {
|
(d > 255) || (bits > 32) ) {
|
||||||
|
@ -91,7 +91,7 @@ static int rte_init(void)
|
|||||||
size_t sz;
|
size_t sz;
|
||||||
int u32, *u32ptr;
|
int u32, *u32ptr;
|
||||||
uint16_t u16, *u16ptr;
|
uint16_t u16, *u16ptr;
|
||||||
char **peers, **cpusets, *mycpuset;
|
char **peers=NULL, **cpusets=NULL, *mycpuset;
|
||||||
opal_process_name_t name;
|
opal_process_name_t name;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
@ -326,6 +326,8 @@ static int rte_init(void)
|
|||||||
ret = opal_pmix.store_local(&name, kv);
|
ret = opal_pmix.store_local(&name, kv);
|
||||||
if (OPAL_SUCCESS != ret) {
|
if (OPAL_SUCCESS != ret) {
|
||||||
error = "local store of locality";
|
error = "local store of locality";
|
||||||
|
opal_argv_free(cpusets);
|
||||||
|
opal_argv_free(peers);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(kv);
|
OBJ_RELEASE(kv);
|
||||||
@ -374,11 +376,15 @@ static int rte_init(void)
|
|||||||
if (OPAL_SUCCESS != ret) {
|
if (OPAL_SUCCESS != ret) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "pmix store local";
|
error = "pmix store local";
|
||||||
|
opal_argv_free(cpusets);
|
||||||
|
opal_argv_free(peers);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(kv);
|
OBJ_RELEASE(kv);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
opal_argv_free(peers);
|
||||||
|
opal_argv_free(cpusets);
|
||||||
|
|
||||||
/* we don't need to force the routed system to pick the
|
/* we don't need to force the routed system to pick the
|
||||||
* "direct" component as that should happen automatically
|
* "direct" component as that should happen automatically
|
||||||
|
@ -74,6 +74,7 @@ static bool added_transport_keys=false;
|
|||||||
static bool added_num_procs = false;
|
static bool added_num_procs = false;
|
||||||
static bool added_app_ctx = false;
|
static bool added_app_ctx = false;
|
||||||
static bool added_pmix_envs = false;
|
static bool added_pmix_envs = false;
|
||||||
|
static char *pmixenvars[4];
|
||||||
|
|
||||||
static int fork_hnp(void);
|
static int fork_hnp(void);
|
||||||
|
|
||||||
@ -319,7 +320,7 @@ static int rte_init(void)
|
|||||||
OBJ_RELEASE(kv);
|
OBJ_RELEASE(kv);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* use the std app init to complete the procedure */
|
/* use the std app init to complete the procedure */
|
||||||
if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) {
|
if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -415,7 +416,7 @@ static int fork_hnp(void)
|
|||||||
sigset_t sigs;
|
sigset_t sigs;
|
||||||
int buffer_length, num_chars_read, chunk;
|
int buffer_length, num_chars_read, chunk;
|
||||||
char *orted_uri;
|
char *orted_uri;
|
||||||
int rc;
|
int rc, i;
|
||||||
|
|
||||||
/* A pipe is used to communicate between the parent and child to
|
/* A pipe is used to communicate between the parent and child to
|
||||||
indicate whether the exec ultimately succeeded or failed. The
|
indicate whether the exec ultimately succeeded or failed. The
|
||||||
@ -556,12 +557,14 @@ static int fork_hnp(void)
|
|||||||
chunk = ORTE_URI_MSG_LGTH-1;
|
chunk = ORTE_URI_MSG_LGTH-1;
|
||||||
num_chars_read = 0;
|
num_chars_read = 0;
|
||||||
orted_uri = (char*)malloc(buffer_length);
|
orted_uri = (char*)malloc(buffer_length);
|
||||||
|
memset(orted_uri, 0, buffer_length);
|
||||||
|
|
||||||
while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) {
|
while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) {
|
||||||
/* we read an entire buffer - better get more */
|
/* we read an entire buffer - better get more */
|
||||||
num_chars_read += chunk;
|
num_chars_read += chunk;
|
||||||
|
orted_uri = realloc((void*)orted_uri, buffer_length+ORTE_URI_MSG_LGTH);
|
||||||
|
memset(&orted_uri[buffer_length], 0, ORTE_URI_MSG_LGTH);
|
||||||
buffer_length += ORTE_URI_MSG_LGTH;
|
buffer_length += ORTE_URI_MSG_LGTH;
|
||||||
orted_uri = realloc((void*)orted_uri, buffer_length);
|
|
||||||
}
|
}
|
||||||
num_chars_read += rc;
|
num_chars_read += rc;
|
||||||
|
|
||||||
@ -612,10 +615,11 @@ static int fork_hnp(void)
|
|||||||
return ORTE_ERR_BAD_PARAM;
|
return ORTE_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
/* push each piece into the environment */
|
/* push each piece into the environment */
|
||||||
putenv(argv[0]);
|
for (i=0; i < 4; i++) {
|
||||||
putenv(argv[1]);
|
pmixenvars[i] = strdup(argv[i]);
|
||||||
putenv(argv[2]);
|
putenv(pmixenvars[i]);
|
||||||
putenv(argv[3]);
|
}
|
||||||
|
opal_argv_free(argv);
|
||||||
added_pmix_envs = true;
|
added_pmix_envs = true;
|
||||||
|
|
||||||
/* all done - report success */
|
/* all done - report success */
|
||||||
|
@ -323,7 +323,9 @@ static void mrorted_complete(const orte_job_t *jdata)
|
|||||||
/* get the stdout target */
|
/* get the stdout target */
|
||||||
stdout_target = ORTE_JOBID_INVALID;
|
stdout_target = ORTE_JOBID_INVALID;
|
||||||
jbptr = &stdout_target;
|
jbptr = &stdout_target;
|
||||||
orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID);
|
if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* the job is complete - close out the stdin
|
/* the job is complete - close out the stdin
|
||||||
* of any procs it was feeding
|
* of any procs it was feeding
|
||||||
|
@ -1030,14 +1030,14 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd,
|
|||||||
"%s connect ack received error %s from %s",
|
"%s connect ack received error %s from %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
strerror(opal_socket_errno),
|
strerror(opal_socket_errno),
|
||||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
ORTE_NAME_PRINT(&(peer->name)));
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
opal_output(0,
|
opal_output(0,
|
||||||
"%s tcp_peer_recv_blocking: "
|
"%s tcp_peer_recv_blocking: "
|
||||||
"recv() failed for %s: %s (%d)\n",
|
"recv() failed for %s: %s (%d)\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
ORTE_NAME_PRINT(&(peer->name)),
|
||||||
strerror(opal_socket_errno),
|
strerror(opal_socket_errno),
|
||||||
opal_socket_errno);
|
opal_socket_errno);
|
||||||
peer->state = MCA_OOB_TCP_FAILED;
|
peer->state = MCA_OOB_TCP_FAILED;
|
||||||
|
@ -145,7 +145,9 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata)
|
|||||||
// associate open channel request and the newly created channel object
|
// associate open channel request and the newly created channel object
|
||||||
open_chan->channel = channel;
|
open_chan->channel = channel;
|
||||||
type = &type_val;
|
type = &type_val;
|
||||||
orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8);
|
if (!orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes);
|
open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes);
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||||
"%s rml_open_channel type = %d to peer %s ",
|
"%s rml_open_channel type = %d to peer %s ",
|
||||||
@ -343,7 +345,10 @@ void orte_rml_open_channel_recv_callback (int status,
|
|||||||
/* unpack attributes first */
|
/* unpack attributes first */
|
||||||
if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) {
|
if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) {
|
||||||
type = &type_val;
|
type = &type_val;
|
||||||
orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8);
|
if (!orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) {
|
||||||
|
OPAL_LIST_DESTRUCT(&qos_attributes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
|
||||||
"rml_open_channel_recv_callback type =%d",
|
"rml_open_channel_recv_callback type =%d",
|
||||||
type_val));
|
type_val));
|
||||||
@ -398,6 +403,7 @@ void orte_rml_open_channel_recv_callback (int status,
|
|||||||
//reply with error message
|
//reply with error message
|
||||||
send_open_channel_reply (peer, NULL, false);
|
send_open_channel_reply (peer, NULL, false);
|
||||||
}
|
}
|
||||||
|
OPAL_LIST_DESTRUCT(&qos_attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int send_open_channel_reply (orte_process_name_t *peer,
|
static int send_open_channel_reply (orte_process_name_t *peer,
|
||||||
|
@ -457,8 +457,9 @@ static void set(orte_job_t *jdata,
|
|||||||
|
|
||||||
/* see if the job has the min freq attribute set */
|
/* see if the job has the min freq attribute set */
|
||||||
fptr = &minfreq;
|
fptr = &minfreq;
|
||||||
minfreq = -1.0;
|
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_MIN_FREQ, (void**)&fptr, OPAL_FLOAT)) {
|
||||||
orte_get_attribute(&jdata->attributes, ORTE_JOB_MIN_FREQ, (void**)&fptr, OPAL_FLOAT);
|
minfreq = -1.0;
|
||||||
|
}
|
||||||
|
|
||||||
/* see if the job has the max freq attribute set */
|
/* see if the job has the max freq attribute set */
|
||||||
fptr = &freq;
|
fptr = &freq;
|
||||||
|
@ -443,13 +443,15 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
|
|||||||
/* check the request out of the tracking hotel */
|
/* check the request out of the tracking hotel */
|
||||||
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
||||||
/* return the returned data to the requestor */
|
/* return the returned data to the requestor */
|
||||||
if (NULL != req && NULL != req->mdxcbfunc) {
|
if (NULL != req) {
|
||||||
req->mdxcbfunc(ret, (char*)data, ndata, req->cbdata, relcbfunc, data);
|
if (NULL != req->mdxcbfunc) {
|
||||||
|
req->mdxcbfunc(ret, (char*)data, ndata, req->cbdata, relcbfunc, data);
|
||||||
|
}
|
||||||
|
OBJ_RELEASE(req);
|
||||||
}
|
}
|
||||||
if (NULL != data) {
|
if (NULL != data) {
|
||||||
free(data);
|
free(data);
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(req);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -455,14 +455,16 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
|||||||
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req);
|
||||||
|
|
||||||
release:
|
release:
|
||||||
/* pass down the response */
|
if (NULL != req) {
|
||||||
if (NULL != req->opcbfunc) {
|
/* pass down the response */
|
||||||
req->opcbfunc(ret, req->cbdata);
|
if (NULL != req->opcbfunc) {
|
||||||
} else {
|
req->opcbfunc(ret, req->cbdata);
|
||||||
req->lkcbfunc(ret, info, req->cbdata);
|
} else {
|
||||||
}
|
req->lkcbfunc(ret, info, req->cbdata);
|
||||||
|
}
|
||||||
|
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
OBJ_RELEASE(req);
|
OBJ_RELEASE(req);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,8 +63,8 @@ static void opcbfunc(int status, void *cbdata)
|
|||||||
opal_list_remove_item(lt, &k1->super);
|
opal_list_remove_item(lt, &k1->super);
|
||||||
OBJ_RELEASE(k1);
|
OBJ_RELEASE(k1);
|
||||||
}
|
}
|
||||||
|
OBJ_RELEASE(lt);
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(lt);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* stuff proc attributes for sending back to a proc */
|
/* stuff proc attributes for sending back to a proc */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user