1
1

Merge pull request #2977 from rhc54/topic/spawn

Fix comm_spawn by registering nspace info only when needed
Этот коммит содержится в:
Ralph Castain 2017-02-15 04:31:54 -08:00 коммит произвёл GitHub
родитель 6ff74dde05 68b53e2179
Коммит f7fe2f7189
6 изменённых файлов: 20 добавлений и 15 удалений

Просмотреть файл

@ -612,10 +612,10 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata)
} }
if (NULL == nptr) { if (NULL == nptr) {
/* /*
* We may not have this namespace because someone asked about this namespace * We may not have this namespace because someone asked about this namespace
* but there are not processses from it running on this host * but there are not processses from it running on this host
*/ */
nptr = PMIX_NEW(pmix_nspace_t); nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(nptr->nspace, caddy->lcd->proc.nspace, PMIX_MAX_NSLEN); (void)strncpy(nptr->nspace, caddy->lcd->proc.nspace, PMIX_MAX_NSLEN);
nptr->server = PMIX_NEW(pmix_server_nspace_t); nptr->server = PMIX_NEW(pmix_server_nspace_t);

Просмотреть файл

@ -481,16 +481,16 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
} }
} }
COMPLETE: COMPLETE:
/* register this job with the PMIx server - need to wait until after we /* register this job with the PMIx server - need to wait until after we
* have computed the #local_procs before calling the function */ * have computed the #local_procs before calling the function */
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata))) { if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto REPORT_ERROR; goto REPORT_ERROR;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
REPORT_ERROR: REPORT_ERROR:
/* we have to report an error back to the HNP so we don't just /* we have to report an error back to the HNP so we don't just
* hang. Although there shouldn't be any errors once this is * hang. Although there shouldn't be any errors once this is
* all debugged, it is still good practice to have a way * all debugged, it is still good practice to have a way

Просмотреть файл

@ -573,7 +573,7 @@ int orte_daemon(int argc, char *argv[])
orte_pre_condition_transports(jdata); orte_pre_condition_transports(jdata);
/* register the singleton's nspace with our PMIx server */ /* register the singleton's nspace with our PMIx server */
if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata))) { if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
goto DONE; goto DONE;
} }

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC. * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -34,9 +34,8 @@ ORTE_DECLSPEC void pmix_server_finalize(void);
ORTE_DECLSPEC void pmix_server_register_params(void); ORTE_DECLSPEC void pmix_server_register_params(void);
ORTE_DECLSPEC int orte_pmix_server_register_nspace(orte_job_t *jdata); ORTE_DECLSPEC int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force);
END_C_DECLS END_C_DECLS
#endif /* PMIX_SERVER_H_ */ #endif /* PMIX_SERVER_H_ */

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc. * Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science * Copyright (c) 2014-2016 Research Organization for Information Science
@ -377,7 +377,7 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata)
goto release; goto release;
} }
OBJ_DESTRUCT(&buf); OBJ_DESTRUCT(&buf);
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata))) { if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, true))) {
OBJ_RELEASE(jdata); OBJ_RELEASE(jdata);
goto release; goto release;
} }
@ -441,7 +441,7 @@ static void _cnct(int sd, short args, void *cbdata)
* registered with the local PMIx server */ * registered with the local PMIx server */
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, NULL, OPAL_BOOL)) { if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, NULL, OPAL_BOOL)) {
/* it hasn't been registered yet, so register it now */ /* it hasn't been registered yet, so register it now */
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata))) { if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, true))) {
goto release; goto release;
} }
} }

Просмотреть файл

@ -51,7 +51,7 @@
#include "pmix_server.h" #include "pmix_server.h"
/* stuff proc attributes for sending back to a proc */ /* stuff proc attributes for sending back to a proc */
int orte_pmix_server_register_nspace(orte_job_t *jdata) int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force)
{ {
int rc; int rc;
orte_proc_t *pptr; orte_proc_t *pptr;
@ -74,6 +74,12 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));
/* if this job has no local procs, then no need to register
* it unless the job info is needed by connecting jobs */
if (!force && 0 == jdata->num_local_procs) {
return ORTE_SUCCESS;
}
/* setup the info list */ /* setup the info list */
info = OBJ_NEW(opal_list_t); info = OBJ_NEW(opal_list_t);
uid = geteuid(); uid = geteuid();