orte/iof: Address the case when output is a regular file
Regular files are always write-ready, so non-blocking I/O does not give any benefits for them. More than that - if libevent is using "epoll" to track fd events, epoll_ctl will refuse attempt to add an fd pointing to a regular file descriptor with EPERM. This fix checks the object referenced by fd and avoids event_add using event_active instead. In the original configuration that uncovered this issue "epoll" was used in libevent, it was triggering the following warning message: "[warn] Epoll ADD(1) on fd 0 failed. Old events were 0; read change was 1 (add); write change was 0 (none): Operation not permitted" And the side effect was accumulation of all output in mpirun memory and actually writing it only at mpirun exit. Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
родитель
d1c5955b73
Коммит
d9ad918a14
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -11,6 +12,14 @@
|
|||||||
|
|
||||||
#include "opal_config.h"
|
#include "opal_config.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_STAT_H
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_UNISTD_H
|
#ifdef HAVE_UNISTD_H
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
@ -89,3 +98,31 @@ int opal_fd_set_cloexec(int fd)
|
|||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool opal_fd_is_regular(int fd)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if (fstat(fd, &buf)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return S_ISREG(buf.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool opal_fd_is_chardev(int fd)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if (fstat(fd, &buf)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return S_ISCHR(buf.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool opal_fd_is_blkdev(int fd)
|
||||||
|
{
|
||||||
|
struct stat buf;
|
||||||
|
if (fstat(fd, &buf)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return S_ISBLK(buf.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -63,6 +64,37 @@ OPAL_DECLSPEC int opal_fd_write(int fd, int len, const void *buffer);
|
|||||||
*/
|
*/
|
||||||
OPAL_DECLSPEC int opal_fd_set_cloexec(int fd);
|
OPAL_DECLSPEC int opal_fd_set_cloexec(int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience function to check if fd point to an accessible regular file.
|
||||||
|
*
|
||||||
|
* @param fd File descriptor
|
||||||
|
*
|
||||||
|
* @returns true if "fd" points to a regular file.
|
||||||
|
* @returns false otherwise.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC bool opal_fd_is_regular(int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience function to check if fd point to an accessible character device.
|
||||||
|
*
|
||||||
|
* @param fd File descriptor
|
||||||
|
*
|
||||||
|
* @returns true if "fd" points to a regular file.
|
||||||
|
* @returns false otherwise.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC bool opal_fd_is_chardev(int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience function to check if fd point to an accessible block device.
|
||||||
|
*
|
||||||
|
* @param fd File descriptor
|
||||||
|
*
|
||||||
|
* @returns true if "fd" points to a regular file.
|
||||||
|
* @returns false otherwise.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC bool opal_fd_is_blkdev(int fd);
|
||||||
|
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -48,6 +49,7 @@
|
|||||||
#include "opal/class/opal_bitmap.h"
|
#include "opal/class/opal_bitmap.h"
|
||||||
#include "orte/mca/mca.h"
|
#include "orte/mca/mca.h"
|
||||||
#include "opal/mca/event/event.h"
|
#include "opal/mca/event/event.h"
|
||||||
|
#include "opal/util/fd.h"
|
||||||
|
|
||||||
#include "orte/mca/iof/iof.h"
|
#include "orte/mca/iof/iof.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
@ -84,6 +86,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_job_t);
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
opal_list_item_t super;
|
opal_list_item_t super;
|
||||||
bool pending;
|
bool pending;
|
||||||
|
bool always_writable;
|
||||||
opal_event_t *ev;
|
opal_event_t *ev;
|
||||||
int fd;
|
int fd;
|
||||||
opal_list_t outputs;
|
opal_list_t outputs;
|
||||||
@ -157,6 +160,9 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
|||||||
ep->tag = (tg); \
|
ep->tag = (tg); \
|
||||||
if (0 <= (fid)) { \
|
if (0 <= (fid)) { \
|
||||||
ep->wev->fd = (fid); \
|
ep->wev->fd = (fid); \
|
||||||
|
ep->wev->always_writable = opal_fd_is_regular(fid) || \
|
||||||
|
opal_fd_is_chardev(fid) || \
|
||||||
|
opal_fd_is_blkdev(fid); \
|
||||||
opal_event_set(orte_event_base, \
|
opal_event_set(orte_event_base, \
|
||||||
ep->wev->ev, ep->wev->fd, \
|
ep->wev->ev, ep->wev->fd, \
|
||||||
OPAL_EV_WRITE, \
|
OPAL_EV_WRITE, \
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -298,6 +299,7 @@ OBJ_CLASS_INSTANCE(orte_iof_read_event_t,
|
|||||||
static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev)
|
static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev)
|
||||||
{
|
{
|
||||||
wev->pending = false;
|
wev->pending = false;
|
||||||
|
wev->always_writable = false;
|
||||||
wev->fd = -1;
|
wev->fd = -1;
|
||||||
OBJ_CONSTRUCT(&wev->outputs, opal_list_t);
|
OBJ_CONSTRUCT(&wev->outputs, opal_list_t);
|
||||||
wev->ev = opal_event_alloc();
|
wev->ev = opal_event_alloc();
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -259,13 +260,22 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s
|
|||||||
|
|
||||||
/* is the write event issued? */
|
/* is the write event issued? */
|
||||||
if (!channel->pending) {
|
if (!channel->pending) {
|
||||||
|
int rc = -1;
|
||||||
/* issue it */
|
/* issue it */
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
|
||||||
"%s write:output adding write event",
|
"%s write:output adding write event",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
channel->pending = true;
|
channel->pending = true;
|
||||||
ORTE_POST_OBJECT(channel);
|
ORTE_POST_OBJECT(channel);
|
||||||
opal_event_add(channel->ev, 0);
|
if (channel->always_writable) {
|
||||||
|
/* Regular is always write ready. Activate the handler. */
|
||||||
|
opal_event_active (channel->ev, OPAL_EV_WRITE, 1);
|
||||||
|
} else {
|
||||||
|
rc = opal_event_add(channel->ev, 0);
|
||||||
|
if (rc) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return num_buffered;
|
return num_buffered;
|
||||||
@ -297,13 +307,14 @@ void orte_iof_base_static_dump_output(orte_iof_read_event_t *rev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ORTE_IOF_REGULARF_BLOCK (1024)
|
||||||
void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||||
{
|
{
|
||||||
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
|
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
|
||||||
orte_iof_write_event_t *wev = sink->wev;
|
orte_iof_write_event_t *wev = sink->wev;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
orte_iof_write_output_t *output;
|
orte_iof_write_output_t *output;
|
||||||
int num_written;
|
int num_written, total_written = 0;
|
||||||
|
|
||||||
ORTE_ACQUIRE_OBJECT(sink);
|
ORTE_ACQUIRE_OBJECT(sink);
|
||||||
|
|
||||||
@ -333,6 +344,10 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
|||||||
/* leave the write event running so it will call us again
|
/* leave the write event running so it will call us again
|
||||||
* when the fd is ready.
|
* when the fd is ready.
|
||||||
*/
|
*/
|
||||||
|
if(wev->always_writable){
|
||||||
|
/* Schedule another event */
|
||||||
|
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* otherwise, something bad happened so all we can do is abort
|
/* otherwise, something bad happened so all we can do is abort
|
||||||
@ -356,12 +371,29 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
|||||||
/* leave the write event running so it will call us again
|
/* leave the write event running so it will call us again
|
||||||
* when the fd is ready
|
* when the fd is ready
|
||||||
*/
|
*/
|
||||||
|
if(wev->always_writable){
|
||||||
|
/* Schedule another event */
|
||||||
|
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||||
|
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(output);
|
OBJ_RELEASE(output);
|
||||||
|
|
||||||
|
total_written += num_written;
|
||||||
|
if(wev->always_writable && (ORTE_IOF_REGULARF_BLOCK <= total_written)){
|
||||||
|
/* If this is a regular file it will never tell us it will block
|
||||||
|
* Write no more than ORTE_IOF_REGULARF_BLOCK at a time allowing
|
||||||
|
* other fds to progress
|
||||||
|
*/
|
||||||
|
opal_event_active (wev->ev, OPAL_EV_WRITE, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ABORT:
|
ABORT:
|
||||||
opal_event_del(wev->ev);
|
if (!wev->always_writable){
|
||||||
|
opal_event_del(wev->ev);
|
||||||
|
}
|
||||||
wev->pending = false;
|
wev->pending = false;
|
||||||
ORTE_POST_OBJECT(wev);
|
ORTE_POST_OBJECT(wev);
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user