1
1

Merge pull request #910 from hppritcha/topic/odls_alps_use_907_stuff

odls/alps: do smarter close of fds in child
Этот коммит содержится в:
Howard Pritchard 2015-09-20 07:37:55 -06:00
родитель 984418dd83 a31cc21bea
Коммит 1367a442b6
2 изменённых файлов: 66 добавлений и 25 удалений

Просмотреть файл

@ -144,4 +144,9 @@ WARNING: Open MPI call to Alps alps_app_lli_pipes function failed.
Application name: %s Application name: %s
Location: %s:%d Location: %s:%d
Return Value: %d Return Value: %d
#
[close fds]
WARNING: Closing fds in child failed.
Local host: %s
Application name: %s
Location: %s:%d

Просмотреть файл

@ -101,6 +101,10 @@
#ifdef HAVE_SYS_SELECT_H #ifdef HAVE_SYS_SELECT_H
#include <sys/select.h> #include <sys/select.h>
#endif #endif
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif
#include "opal/mca/hwloc/hwloc.h" #include "opal/mca/hwloc/hwloc.h"
#include "opal/mca/hwloc/base/base.h" #include "opal/mca/hwloc/base/base.h"
@ -350,6 +354,59 @@ static void send_error_show_help(int fd, int exit_status,
exit(exit_status); exit(exit_status);
} }
static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opts)
{
int pid, rc, fd;
char *fds_dir = NULL;
DIR *dir = NULL;
struct dirent *files;
int app_alps_filedes[2],alps_app_filedes[2];
pid = getpid();
rc = asprintf(&fds_dir, "/proc/%d/fd", pid);
if (rc < 0) return ORTE_ERR_OUT_OF_RESOURCE;
dir = opendir(fds_dir);
free(fds_dir);
if (dir == NULL) return ORTE_ERR_FILE_OPEN_FAILURE;
/* close all file descriptors w/ exception of stdin/stdout/stderr,
the pipe used for the IOF INTERNAL messages, and the pipe up to
the parent. Be careful to retain all of the pipe fd's set up
by the apshephered. These are needed for obtaining RDMA credentials,
synchronizing with aprun, etc. */
rc = alps_app_lli_pipes(app_alps_filedes,alps_app_filedes);
if (0 != rc) {
return ORTE_ERR_FILE_OPEN_FAILURE;
}
while ((files = readdir(dir)) != NULL) {
if(!strncmp(files->d_name,".",1) || !strncmp(files->d_name,"..",2)) continue;
fd = strtoul(files->d_name, NULL, 10);
if (errno == EINVAL || errno == ERANGE) return ORTE_ERR_TYPE_MISMATCH;
/*
* skip over the pipes we have open to apshepherd or slurmd
*/
if (fd == XTAPI_FD_IDENTITY) continue;
if (fd == XTAPI_FD_RESILIENCY) continue;
if ((fd == app_alps_filedes[0]) ||
(fd == app_alps_filedes[1]) ||
(fd == alps_app_filedes[0]) ||
(fd == alps_app_filedes[1])) continue;
if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) {
close(fd);
}
}
return ORTE_SUCCESS;
}
static int do_child(orte_app_context_t* context, static int do_child(orte_app_context_t* context,
orte_proc_t *child, orte_proc_t *child,
char **environ_copy, char **environ_copy,
@ -357,9 +414,7 @@ static int do_child(orte_app_context_t* context,
orte_iof_base_io_conf_t opts) orte_iof_base_io_conf_t opts)
{ {
int i, rc; int i, rc;
int app_alps_filedes[2],alps_app_filedes[2];
sigset_t sigs; sigset_t sigs;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
char *param, *msg; char *param, *msg;
if (orte_forward_job_control) { if (orte_forward_job_control) {
@ -428,30 +483,11 @@ static int do_child(orte_app_context_t* context,
opal_unsetenv(param, &environ_copy); opal_unsetenv(param, &environ_copy);
free(param); free(param);
/* close all file descriptors w/ exception of stdin/stdout/stderr, if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, opts)) {
the pipe used for the IOF INTERNAL messages, and the pipe up to
the parent. Be careful to retain all of the pipe fd's set up
by the apshephered. These are needed for obtaining RDMA credentials,
synchronizing with aprun, etc. */
rc = alps_app_lli_pipes(app_alps_filedes,alps_app_filedes);
if (0 != rc) {
send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt",
"alps_app_lli_pipes", "close fds",
orte_process_info.nodename, context->app, orte_process_info.nodename, context->app,
__FILE__, __LINE__, rc); __FILE__, __LINE__);
}
for(fd=3; fd<fdmax; fd++) {
if (fd == XTAPI_FD_IDENTITY) continue;
if (fd == XTAPI_FD_RESILIENCY) continue;
if ((fd == app_alps_filedes[0]) ||
(fd == app_alps_filedes[1]) ||
(fd == alps_app_filedes[0]) ||
(fd == alps_app_filedes[1])) continue;
if (fd != opts.p_internal[1] && fd != write_fd) {
close(fd);
}
} }