orte/iof: Address the case when output is a regular file
Regular files are always write-ready, so non-blocking I/O does not give any benefits for them. More than that - if libevent is using "epoll" to track fd events, epoll_ctl will refuse attempt to add an fd pointing to a regular file descriptor with EPERM. This fix checks the object referenced by fd and avoids event_add using event_active instead. In the original configuration that uncovered this issue "epoll" was used in libevent, it was triggering the following warning message: "[warn] Epoll ADD(1) on fd 0 failed. Old events were 0; read change was 1 (add); write change was 0 (none): Operation not permitted" And the side effect was accumulation of all output in mpirun memory and actually writing it only at mpirun exit. Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
родитель
d1c5955b73
Коммит
d9ad918a14
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -11,6 +12,14 @@
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
@ -89,3 +98,31 @@ int opal_fd_set_cloexec(int fd)
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
bool opal_fd_is_regular(int fd)
|
||||
{
|
||||
struct stat buf;
|
||||
if (fstat(fd, &buf)) {
|
||||
return false;
|
||||
}
|
||||
return S_ISREG(buf.st_mode);
|
||||
}
|
||||
|
||||
bool opal_fd_is_chardev(int fd)
|
||||
{
|
||||
struct stat buf;
|
||||
if (fstat(fd, &buf)) {
|
||||
return false;
|
||||
}
|
||||
return S_ISCHR(buf.st_mode);
|
||||
}
|
||||
|
||||
bool opal_fd_is_blkdev(int fd)
|
||||
{
|
||||
struct stat buf;
|
||||
if (fstat(fd, &buf)) {
|
||||
return false;
|
||||
}
|
||||
return S_ISBLK(buf.st_mode);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -63,6 +64,37 @@ OPAL_DECLSPEC int opal_fd_write(int fd, int len, const void *buffer);
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_fd_set_cloexec(int fd);
|
||||
|
||||
/**
|
||||
* Convenience function to check if fd point to an accessible regular file.
|
||||
*
|
||||
* @param fd File descriptor
|
||||
*
|
||||
* @returns true if "fd" points to a regular file.
|
||||
* @returns false otherwise.
|
||||
*/
|
||||
OPAL_DECLSPEC bool opal_fd_is_regular(int fd);
|
||||
|
||||
/**
|
||||
* Convenience function to check if fd point to an accessible character device.
|
||||
*
|
||||
* @param fd File descriptor
|
||||
*
|
||||
* @returns true if "fd" points to a regular file.
|
||||
* @returns false otherwise.
|
||||
*/
|
||||
OPAL_DECLSPEC bool opal_fd_is_chardev(int fd);
|
||||
|
||||
/**
|
||||
* Convenience function to check if fd point to an accessible block device.
|
||||
*
|
||||
* @param fd File descriptor
|
||||
*
|
||||
* @returns true if "fd" points to a regular file.
|
||||
* @returns false otherwise.
|
||||
*/
|
||||
OPAL_DECLSPEC bool opal_fd_is_blkdev(int fd);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -48,6 +49,7 @@
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "orte/mca/mca.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/fd.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -84,6 +86,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_job_t);
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
bool pending;
|
||||
bool always_writable;
|
||||
opal_event_t *ev;
|
||||
int fd;
|
||||
opal_list_t outputs;
|
||||
@ -157,6 +160,9 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
ep->tag = (tg); \
|
||||
if (0 <= (fid)) { \
|
||||
ep->wev->fd = (fid); \
|
||||
ep->wev->always_writable = opal_fd_is_regular(fid) || \
|
||||
opal_fd_is_chardev(fid) || \
|
||||
opal_fd_is_blkdev(fid); \
|
||||
opal_event_set(orte_event_base, \
|
||||
ep->wev->ev, ep->wev->fd, \
|
||||
OPAL_EV_WRITE, \
|
||||
|
@ -15,6 +15,7 @@
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -298,6 +299,7 @@ OBJ_CLASS_INSTANCE(orte_iof_read_event_t,
|
||||
static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev)
|
||||
{
|
||||
wev->pending = false;
|
||||
wev->always_writable = false;
|
||||
wev->fd = -1;
|
||||
OBJ_CONSTRUCT(&wev->outputs, opal_list_t);
|
||||
wev->ev = opal_event_alloc();
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -259,13 +260,22 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s
|
||||
|
||||
/* is the write event issued? */
|
||||
if (!channel->pending) {
|
||||
int rc = -1;
|
||||
/* issue it */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
|
||||
"%s write:output adding write event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
channel->pending = true;
|
||||
ORTE_POST_OBJECT(channel);
|
||||
opal_event_add(channel->ev, 0);
|
||||
if (channel->always_writable) {
|
||||
/* Regular is always write ready. Activate the handler. */
|
||||
opal_event_active (channel->ev, OPAL_EV_WRITE, 1);
|
||||
} else {
|
||||
rc = opal_event_add(channel->ev, 0);
|
||||
if (rc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return num_buffered;
|
||||
@ -297,13 +307,14 @@ void orte_iof_base_static_dump_output(orte_iof_read_event_t *rev)
|
||||
}
|
||||
}
|
||||
|
||||
#define ORTE_IOF_REGULARF_BLOCK (1024)
|
||||
void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
|
||||
orte_iof_write_event_t *wev = sink->wev;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_write_output_t *output;
|
||||
int num_written;
|
||||
int num_written, total_written = 0;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(sink);
|
||||
|
||||
@ -333,6 +344,10 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
if(wev->always_writable){
|
||||
/* Schedule another event */
|
||||
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* otherwise, something bad happened so all we can do is abort
|
||||
@ -356,12 +371,29 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready
|
||||
*/
|
||||
if(wev->always_writable){
|
||||
/* Schedule another event */
|
||||
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||
|
||||
}
|
||||
return;
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
|
||||
total_written += num_written;
|
||||
if(wev->always_writable && (ORTE_IOF_REGULARF_BLOCK <= total_written)){
|
||||
/* If this is a regular file it will never tell us it will block
|
||||
* Write no more than ORTE_IOF_REGULARF_BLOCK at a time allowing
|
||||
* other fds to progress
|
||||
*/
|
||||
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ABORT:
|
||||
opal_event_del(wev->ev);
|
||||
if (!wev->always_writable){
|
||||
opal_event_del(wev->ev);
|
||||
}
|
||||
wev->pending = false;
|
||||
ORTE_POST_OBJECT(wev);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user