Merge pull request #1528 from hpcraink/pr/osx_sun_path
OSX tempdir too long for sun_path
Этот коммит содержится в:
Коммит
5b8a40ad65
@ -143,6 +143,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
{
|
||||
int debug_level;
|
||||
char *tdir, *evar;
|
||||
char * pmix_pid;
|
||||
pid_t pid;
|
||||
|
||||
/* initialize the output system */
|
||||
@ -219,7 +220,14 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
/* now set the address - we use the pid here to reduce collisions */
|
||||
memset(&myaddress, 0, sizeof(struct sockaddr_un));
|
||||
myaddress.sun_family = AF_UNIX;
|
||||
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
|
||||
asprintf(&pmix_pid, "pmix-%d", pid);
|
||||
// If the above set temporary directory name plus the pmix-PID string
|
||||
// plus the '/' separator are too long, just fail, so the caller
|
||||
// may provide the user with a proper help... *Cough*, *Cough* OSX...
|
||||
if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) {
|
||||
return PMIX_ERR_INVALID_LENGTH;
|
||||
}
|
||||
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid);
|
||||
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);
|
||||
|
||||
|
||||
|
@ -72,19 +72,20 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
|
||||
{
|
||||
int flags;
|
||||
pmix_status_t rc;
|
||||
unsigned int addrlen;
|
||||
socklen_t addrlen;
|
||||
char *ptr;
|
||||
|
||||
/* create a listen socket for incoming connection attempts */
|
||||
pmix_server_globals.listen_socket = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
if (pmix_server_globals.listen_socket < 0) {
|
||||
printf("%s:%d socket() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d socket() failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
addrlen = sizeof(struct sockaddr_un);
|
||||
if (bind(pmix_server_globals.listen_socket, (struct sockaddr*)address, addrlen) < 0) {
|
||||
printf("%s:%d bind() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d bind() failed error:%s\n", __FILE__, __LINE__,
|
||||
strerror(errno));
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
/* set the mode as required */
|
||||
@ -95,18 +96,18 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
|
||||
|
||||
/* setup listen backlog to maximum allowed by kernel */
|
||||
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {
|
||||
printf("%s:%d listen() failed", __FILE__, __LINE__);
|
||||
printf("%s:%d listen() failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
/* set socket up to be non-blocking, otherwise accept could block */
|
||||
if ((flags = fcntl(pmix_server_globals.listen_socket, F_GETFL, 0)) < 0) {
|
||||
printf("%s:%d fcntl(F_GETFL) failed", __FILE__, __LINE__);
|
||||
printf("%s:%d fcntl(F_GETFL) failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
flags |= O_NONBLOCK;
|
||||
if (fcntl(pmix_server_globals.listen_socket, F_SETFL, flags) < 0) {
|
||||
printf("%s:%d fcntl(F_SETFL) failed", __FILE__, __LINE__);
|
||||
printf("%s:%d fcntl(F_SETFL) failed\n", __FILE__, __LINE__);
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
|
@ -515,7 +515,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
@ -634,7 +634,7 @@ static int rte_init(void)
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
@ -246,6 +246,9 @@ int pmix_server_init(void)
|
||||
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* memory cleanup will occur when finalize is called */
|
||||
orte_show_help("help-orterun.txt", "orterun:pmix-failed", true,
|
||||
orte_process_info.proc_session_dir);
|
||||
return rc;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
|
||||
|
@ -660,3 +660,12 @@ method and try launching your job again.
|
||||
|
||||
Your job will now abort.
|
||||
#
|
||||
[orterun:pmix-failed]
|
||||
The call to pmix_init_server() failed. This may be due to your
|
||||
system's restriction for Unix's socket's path-length.
|
||||
|
||||
orte_proc_session_dir: %s
|
||||
|
||||
Please try to set TMPDIR to something short (like /tmp) or change
|
||||
Your computer's name (see uname -n).
|
||||
#
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user