diff --git a/config/ompi_configure_options.m4 b/config/ompi_configure_options.m4 index b66316c0c3..fcb3d85e36 100644 --- a/config/ompi_configure_options.m4 +++ b/config/ompi_configure_options.m4 @@ -275,5 +275,11 @@ AC_DEFINE_UNQUOTED([OMPI_BUILD_FORTRAN_F08_SUBARRAYS], [$OMPI_BUILD_FORTRAN_F08_SUBARRAYS], [Whether we built the 'use mpi_f08' prototype subarray-based implementation or not (i.e., whether to build the use-mpi-f08-desc prototype or the regular use-mpi-f08 implementation)]) +dnl We no longer support the old OMPI_ENABLE_PROGRESS_THREADS. At +dnl some point, this should die. +AC_DEFINE([OMPI_ENABLE_PROGRESS_THREADS], + [0], + [Whether we want OMPI progress threads enabled]) + ])dnl diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c index 8ea3d11be2..d32292ed58 100644 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c @@ -775,7 +775,11 @@ int mca_bcol_iboffload_adjust_cq(mca_bcol_iboffload_device_t *device, if (NULL == *ib_cq) { *ib_cq = ibv_create_cq_compat(device->dev.ib_dev_context, cq_size, +#if OMPI_ENABLE_PROGRESS_THREADS == 1 device, device->ib_channel, +#else + NULL, NULL, +#endif 0); if (NULL == *ib_cq) { diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c index 70221030ec..fc164521db 100644 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ b/ompi/mca/bml/r2/bml_r2_ft.c @@ -178,7 +178,8 @@ int mca_bml_r2_ft_event(int state) * This will cause the BTL components to discover the available * network options on this machine, and post proper modex informaiton. */ - if( OMPI_SUCCESS != (ret = mca_btl_base_select(1, OMPI_ENABLE_THREAD_MULTIPLE) ) ) { + if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE) ) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); return ret; } @@ -272,7 +273,8 @@ int mca_bml_r2_ft_event(int state) * This will cause the BTL components to discover the available * network options on this machine, and post proper modex informaiton. */ - if( OMPI_SUCCESS != (ret = mca_btl_base_select(1, OMPI_ENABLE_THREAD_MULTIPLE) ) ) { + if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE) ) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); return ret; } diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index c716b0325c..ffa4209bbe 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -179,7 +179,11 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) if(NULL == device->ib_cq[cq]) { device->ib_cq[cq] = create_cq_compat(device->ib_dev_context, cq_size, +#if OMPI_ENABLE_PROGRESS_THREADS == 1 device, device->ib_channel, +#else + NULL, NULL, +#endif 0); if (NULL == device->ib_cq[cq]) { @@ -188,6 +192,7 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) return OMPI_ERROR; } +#if OMPI_ENABLE_PROGRESS_THREADS == 1 if(ibv_req_notify_cq(device->ib_cq[cq], 0)) { mca_btl_openib_show_init_error(__FILE__, __LINE__, "ibv_req_notify_cq", @@ -205,6 +210,7 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) } } OPAL_THREAD_UNLOCK(&device->device_lock); +#endif } #ifdef HAVE_IBV_RESIZE_CQ else if (cq_size > mca_btl_openib_component.ib_cq_size[cq]){ @@ -730,13 +736,14 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) return OMPI_ERROR; } } - +#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* Prepare data for thread, but not starting it */ OBJ_CONSTRUCT(&device->thread, opal_thread_t); device->thread.t_run = mca_btl_openib_progress_thread; device->thread.t_arg = device; device->progress = false; #endif +#endif #if HAVE_XRC /* if user configured to run with XRC qp and the device doesn't diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h index cdf271277c..42e380bb08 100644 --- a/ompi/mca/btl/openib/btl_openib.h +++ b/ompi/mca/btl/openib/btl_openib.h @@ -372,9 +372,11 @@ struct mca_btl_base_endpoint_t; typedef struct mca_btl_openib_device_t { opal_object_t super; struct ibv_device *ib_dev; /* the ib device */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 struct ibv_comp_channel *ib_channel; /* Channel event for the device */ opal_thread_t thread; /* Progress thread */ volatile bool progress; /* Progress status */ +#endif opal_mutex_t device_lock; /* device level lock */ struct ibv_context *ib_dev_context; struct ibv_device_attr ib_dev_attr; @@ -503,7 +505,9 @@ struct mca_btl_openib_reg_t { }; typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t; +#if OMPI_ENABLE_PROGRESS_THREADS == 1 extern void* mca_btl_openib_progress_thread(opal_object_t*); +#endif /** diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index d1a83528b5..91576083c0 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -868,7 +868,9 @@ static void device_construct(mca_btl_openib_device_t *device) device->ib_dev_context = NULL; device->ib_pd = NULL; device->mpool = NULL; +#if OMPI_ENABLE_PROGRESS_THREADS device->ib_channel = NULL; +#endif device->btls = 0; device->endpoints = NULL; device->device_btls = NULL; @@ -903,6 +905,7 @@ static void device_destruct(mca_btl_openib_device_t *device) int i; #if OPAL_HAVE_THREADS +#if OMPI_ENABLE_PROGRESS_THREADS if(device->progress) { device->progress = false; if (pthread_cancel(device->thread.t_handle)) { @@ -915,6 +918,7 @@ static void device_destruct(mca_btl_openib_device_t *device) BTL_VERBOSE(("Failed to close comp_channel")); goto device_error; } +#endif /* signaling to async_tread to stop poll for this device */ if (mca_btl_openib_component.use_async_event_thread && -1 != mca_btl_openib_component.async_pipe[1]) { @@ -1624,7 +1628,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) device->use_eager_rdma = values.use_eager_rdma; } /* Eager RDMA is not currently supported with progress threads */ - if (device->use_eager_rdma) { + if (device->use_eager_rdma && OMPI_ENABLE_PROGRESS_THREADS) { device->use_eager_rdma = 0; opal_show_help("help-mpi-btl-openib.txt", "eager RDMA and progress threads", true); @@ -1644,6 +1648,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) goto error; } +#if OMPI_ENABLE_PROGRESS_THREADS device->ib_channel = ibv_create_comp_channel(device->ib_dev_context); if (NULL == device->ib_channel) { BTL_ERROR(("error creating channel for %s errno says %s", @@ -1651,6 +1656,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) strerror(errno))); goto error; } +#endif ret = OMPI_SUCCESS; @@ -2018,9 +2024,11 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) } error: +#if OMPI_ENABLE_PROGRESS_THREADS if (device->ib_channel) { ibv_destroy_comp_channel(device->ib_channel); } +#endif if (device->mpool) { mca_mpool_base_module_destroy(device->mpool); } @@ -3438,6 +3446,7 @@ error: return count; } +#if OMPI_ENABLE_PROGRESS_THREADS void* mca_btl_openib_progress_thread(opal_object_t* arg) { opal_thread_t* thread = (opal_thread_t*)arg; @@ -3475,6 +3484,7 @@ void* mca_btl_openib_progress_thread(opal_object_t* arg) return PTHREAD_CANCELED; } +#endif static int progress_one_device(mca_btl_openib_device_t *device) { diff --git a/ompi/mca/btl/self/btl_self_endpoint.h b/ompi/mca/btl/self/btl_self_endpoint.h index 31ab10299e..79c35b955b 100644 --- a/ompi/mca/btl/self/btl_self_endpoint.h +++ b/ompi/mca/btl/self/btl_self_endpoint.h @@ -21,7 +21,9 @@ #ifndef MCA_BTL_SELF_ENDPOINT_H #define MCA_BTL_SELF_ENDPOINT_H +#if OMPI_ENABLE_PROGRESS_THREADS == 1 #include "opal/mca/event/event.h" +#endif /** * An abstraction that represents a connection to a endpoint process. diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index 41f03336b9..8dd1bb1974 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -424,7 +424,10 @@ static struct mca_btl_base_endpoint_t * create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) { struct mca_btl_base_endpoint_t *ep; + +#if OMPI_ENABLE_PROGRESS_THREADS == 1 char path[PATH_MAX]; +#endif ep = (struct mca_btl_base_endpoint_t*) malloc(sizeof(struct mca_btl_base_endpoint_t)); @@ -434,6 +437,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t); OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t); +#if OMPI_ENABLE_PROGRESS_THREADS == 1 sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", ompi_process_info.job_session_dir, (unsigned long)proc->proc_name.vpid); @@ -444,6 +448,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) free(ep); return NULL; } +#endif return ep; } diff --git a/ompi/mca/btl/sm/btl_sm.h b/ompi/mca/btl/sm/btl_sm.h index e82bae6c63..ab0ed676ac 100644 --- a/ompi/mca/btl/sm/btl_sm.h +++ b/ompi/mca/btl/sm/btl_sm.h @@ -113,8 +113,10 @@ typedef struct sm_fifo_t sm_fifo_t; * Shared Memory resource managment */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 #define DATA (char)0 #define DONE (char)1 +#endif typedef struct mca_btl_sm_mem_node_t { mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ @@ -167,9 +169,11 @@ struct mca_btl_sm_component_t { int mem_node; int num_mem_nodes; +#if OMPI_ENABLE_PROGRESS_THREADS == 1 char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ opal_thread_t sm_fifo_thread; +#endif struct mca_btl_sm_t **sm_btls; struct mca_btl_sm_frag_t **table; size_t sm_num_btls; @@ -538,8 +542,11 @@ extern void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, */ int mca_btl_sm_ft_event(int state); +#if OMPI_ENABLE_PROGRESS_THREADS == 1 void mca_btl_sm_component_event_thread(opal_object_t*); +#endif +#if OMPI_ENABLE_PROGRESS_THREADS == 1 #define MCA_BTL_SM_SIGNAL_PEER(peer) \ { \ unsigned char cmd = DATA; \ @@ -547,6 +554,9 @@ void mca_btl_sm_component_event_thread(opal_object_t*); opal_output(0, "mca_btl_sm_send: write fifo failed: errno=%d\n", errno); \ } \ } +#else +#define MCA_BTL_SM_SIGNAL_PEER(peer) +#endif END_C_DECLS diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index 5cc56ecab4..6c1652c7cc 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -347,6 +347,7 @@ static int mca_btl_sm_component_close(void) OBJ_RELEASE(mca_btl_sm_component.sm_seg); } +#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* close/cleanup fifo create for event notification */ if(mca_btl_sm_component.sm_fifo_fd > 0) { /* write a done message down the pipe */ @@ -360,6 +361,7 @@ static int mca_btl_sm_component_close(void) close(mca_btl_sm_component.sm_fifo_fd); unlink(mca_btl_sm_component.sm_fifo_path); } +#endif CLEANUP: @@ -743,6 +745,7 @@ mca_btl_sm_component_init(int *num_btls, return NULL; } +#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* create a named pipe to receive events */ sprintf( mca_btl_sm_component.sm_fifo_path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", ompi_process_info.job_session_dir, @@ -764,6 +767,7 @@ mca_btl_sm_component_init(int *num_btls, mca_btl_sm_component.sm_fifo_thread.t_run = (opal_thread_fn_t)mca_btl_sm_component_event_thread; opal_thread_start(&mca_btl_sm_component.sm_fifo_thread); +#endif mca_btl_sm_component.sm_btls = (mca_btl_sm_t **)malloc(mca_btl_sm_component.sm_max_btls * @@ -921,6 +925,7 @@ mca_btl_sm_component_init(int *num_btls, * SM component progress. */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 void mca_btl_sm_component_event_thread(opal_object_t* thread) { while(1) { @@ -936,6 +941,7 @@ void mca_btl_sm_component_event_thread(opal_object_t* thread) mca_btl_sm_component_progress(); } } +#endif void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep) { diff --git a/ompi/mca/btl/sm/btl_sm_endpoint.h b/ompi/mca/btl/sm/btl_sm_endpoint.h index 485cbdb576..096abf4ccd 100644 --- a/ompi/mca/btl/sm/btl_sm_endpoint.h +++ b/ompi/mca/btl/sm/btl_sm_endpoint.h @@ -33,7 +33,9 @@ struct mca_btl_base_endpoint_t { * SMP specfic data structures. */ int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing * SMP specfic data structures. */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */ +#endif opal_list_t pending_sends; /**< pending data to send */ /** lock for concurrent access to endpoint state */ diff --git a/ompi/mca/btl/smcuda/btl_smcuda.c b/ompi/mca/btl/smcuda/btl_smcuda.c index 037cc6496a..0acf8b90e8 100644 --- a/ompi/mca/btl/smcuda/btl_smcuda.c +++ b/ompi/mca/btl/smcuda/btl_smcuda.c @@ -436,7 +436,10 @@ static struct mca_btl_base_endpoint_t * create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) { struct mca_btl_base_endpoint_t *ep; + +#if OMPI_ENABLE_PROGRESS_THREADS == 1 char path[PATH_MAX]; +#endif ep = (struct mca_btl_base_endpoint_t*) malloc(sizeof(struct mca_btl_base_endpoint_t)); @@ -446,6 +449,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t); OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t); +#if OMPI_ENABLE_PROGRESS_THREADS == 1 sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", ompi_process_info.job_session_dir, (unsigned long)proc->proc_name.vpid); @@ -456,6 +460,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) free(ep); return NULL; } +#endif #if OPAL_CUDA_SUPPORT { mca_mpool_base_resources_t resources; /* unused, but needed */ diff --git a/ompi/mca/btl/smcuda/btl_smcuda.h b/ompi/mca/btl/smcuda/btl_smcuda.h index 8f3125612e..6ab1ecdd18 100644 --- a/ompi/mca/btl/smcuda/btl_smcuda.h +++ b/ompi/mca/btl/smcuda/btl_smcuda.h @@ -110,8 +110,10 @@ typedef struct sm_fifo_t sm_fifo_t; * Shared Memory resource managment */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 #define DATA (char)0 #define DONE (char)1 +#endif typedef struct mca_btl_smcuda_mem_node_t { mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ @@ -164,9 +166,11 @@ struct mca_btl_smcuda_component_t { int mem_node; int num_mem_nodes; +#if OMPI_ENABLE_PROGRESS_THREADS == 1 char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ opal_thread_t sm_fifo_thread; +#endif struct mca_btl_smcuda_t **sm_btls; struct mca_btl_smcuda_frag_t **table; size_t sm_num_btls; @@ -529,8 +533,11 @@ extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl, */ int mca_btl_smcuda_ft_event(int state); +#if OMPI_ENABLE_PROGRESS_THREADS == 1 void mca_btl_smcuda_component_event_thread(opal_object_t*); +#endif +#if OMPI_ENABLE_PROGRESS_THREADS == 1 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer) \ { \ unsigned char cmd = DATA; \ @@ -538,6 +545,9 @@ void mca_btl_smcuda_component_event_thread(opal_object_t*); opal_output(0, "mca_btl_smcuda_send: write fifo failed: errno=%d\n", errno); \ } \ } +#else +#define MCA_BTL_SMCUDA_SIGNAL_PEER(peer) +#endif END_C_DECLS diff --git a/ompi/mca/btl/smcuda/btl_smcuda_component.c b/ompi/mca/btl/smcuda/btl_smcuda_component.c index 8c7eb1684c..9bf2508b3a 100644 --- a/ompi/mca/btl/smcuda/btl_smcuda_component.c +++ b/ompi/mca/btl/smcuda/btl_smcuda_component.c @@ -278,6 +278,7 @@ static int mca_btl_smcuda_component_close(void) OBJ_RELEASE(mca_btl_smcuda_component.sm_seg); } +#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* close/cleanup fifo create for event notification */ if(mca_btl_smcuda_component.sm_fifo_fd > 0) { /* write a done message down the pipe */ @@ -291,6 +292,7 @@ static int mca_btl_smcuda_component_close(void) close(mca_btl_smcuda_component.sm_fifo_fd); unlink(mca_btl_smcuda_component.sm_fifo_path); } +#endif CLEANUP: @@ -881,6 +883,7 @@ mca_btl_smcuda_component_init(int *num_btls, return NULL; } +#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* create a named pipe to receive events */ sprintf( mca_btl_smcuda_component.sm_fifo_path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", ompi_process_info.job_session_dir, @@ -902,6 +905,7 @@ mca_btl_smcuda_component_init(int *num_btls, mca_btl_smcuda_component.sm_fifo_thread.t_run = (opal_thread_fn_t)mca_btl_smcuda_component_event_thread; opal_thread_start(&mca_btl_smcuda_component.sm_fifo_thread); +#endif mca_btl_smcuda_component.sm_btls = (mca_btl_smcuda_t **)malloc(mca_btl_smcuda_component.sm_max_btls * @@ -947,6 +951,7 @@ mca_btl_smcuda_component_init(int *num_btls, * SM component progress. */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 void mca_btl_smcuda_component_event_thread(opal_object_t* thread) { while(1) { @@ -962,6 +967,7 @@ void mca_btl_smcuda_component_event_thread(opal_object_t* thread) mca_btl_smcuda_component_progress(); } } +#endif void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) { diff --git a/ompi/mca/btl/smcuda/btl_smcuda_endpoint.h b/ompi/mca/btl/smcuda/btl_smcuda_endpoint.h index 300998ce78..16feeec337 100644 --- a/ompi/mca/btl/smcuda/btl_smcuda_endpoint.h +++ b/ompi/mca/btl/smcuda/btl_smcuda_endpoint.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +37,9 @@ struct mca_btl_base_endpoint_t { #if OPAL_CUDA_SUPPORT mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */ #endif /* OPAL_CUDA_SUPPORT */ +#if OMPI_ENABLE_PROGRESS_THREADS == 1 int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */ +#endif opal_list_t pending_sends; /**< pending data to send */ /** lock for concurrent access to endpoint state */ diff --git a/ompi/mca/io/base/io_base_file_select.c b/ompi/mca/io/base/io_base_file_select.c index 6f84f42a26..50ad2f80ad 100644 --- a/ompi/mca/io/base/io_base_file_select.c +++ b/ompi/mca/io/base/io_base_file_select.c @@ -213,19 +213,23 @@ int mca_io_base_file_select(ompi_file_t *file, } if (OMPI_SUCCESS != - (ret = mca_fs_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_fs_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } if (OMPI_SUCCESS != - (ret = mca_fcoll_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_fcoll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } if (OMPI_SUCCESS != - (ret = mca_fbtl_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_fbtl_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } if (OMPI_SUCCESS != - (ret = mca_sharedfp_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_sharedfp_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } } diff --git a/ompi/mca/io/base/io_base_frame.c b/ompi/mca/io/base/io_base_frame.c index 6e93ed6a36..0b41ec138e 100644 --- a/ompi/mca/io/base/io_base_frame.c +++ b/ompi/mca/io/base/io_base_frame.c @@ -38,7 +38,7 @@ static int mca_io_base_open(mca_base_open_flag_t flags) return ret; } - return mca_io_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE); + return mca_io_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_THREAD_MULTIPLE); } MCA_BASE_FRAMEWORK_DECLARE(ompi, io, "I/O", NULL, mca_io_base_open, NULL, diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index a5d65ccc67..3f7fc05639 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -442,6 +442,10 @@ ompi_osc_rdma_test(ompi_win_t *win, ompi_group_t *group; int ret = OMPI_SUCCESS; +#if !OMPI_ENABLE_PROGRESS_THREADS + opal_progress(); +#endif + if (NULL == module->pw_group) { return OMPI_ERR_RMA_SYNC; } diff --git a/ompi/mca/pml/v/pml_v_component.c b/ompi/mca/pml/v/pml_v_component.c index b8debfde8c..63c4af9b47 100644 --- a/ompi/mca/pml/v/pml_v_component.c +++ b/ompi/mca/pml/v/pml_v_component.c @@ -54,7 +54,7 @@ mca_pml_base_component_2_0_0_t mca_pml_v_component = mca_pml_v_component_finalize /* component finalize */ }; -static bool pml_v_enable_progress_treads = 1; +static bool pml_v_enable_progress_treads = OMPI_ENABLE_PROGRESS_THREADS; static bool pml_v_enable_mpi_thread_multiple = OMPI_ENABLE_THREAD_MULTIPLE; static char *ompi_pml_vprotocol_include_list; diff --git a/ompi/mca/topo/base/topo_base_lazy_init.c b/ompi/mca/topo/base/topo_base_lazy_init.c index c538f646c5..84d3da8e8f 100644 --- a/ompi/mca/topo/base/topo_base_lazy_init.c +++ b/ompi/mca/topo/base/topo_base_lazy_init.c @@ -38,7 +38,8 @@ int mca_topo_base_lazy_init(void) if (0 == opal_list_get_size(&ompi_topo_base_framework.framework_components)) { ompi_topo_base_framework.framework_open(MCA_BASE_OPEN_FIND_COMPONENTS); if (OMPI_SUCCESS != - (err = mca_topo_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (err = mca_topo_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } } diff --git a/ompi/mpi/c/request_get_status.c b/ompi/mpi/c/request_get_status.c index 5ed2027fa6..2e6e97fed1 100644 --- a/ompi/mpi/c/request_get_status.c +++ b/ompi/mpi/c/request_get_status.c @@ -44,6 +44,10 @@ static const char FUNC_NAME[] = "MPI_Request_get_status"; int MPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status) { +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + int do_it_once = 0; +#endif + MEMCHECKER( memchecker_request(&request); ); @@ -60,6 +64,9 @@ int MPI_Request_get_status(MPI_Request request, int *flag, } } +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + recheck_request_status: +#endif opal_atomic_mb(); if( (request == MPI_REQUEST_NULL) || (request->req_state == OMPI_REQUEST_INACTIVE) ) { *flag = true; @@ -81,6 +88,16 @@ int MPI_Request_get_status(MPI_Request request, int *flag, } return MPI_SUCCESS; } +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + if( 0 == do_it_once ) { + /* If we run the opal_progress then check the status of the + request before leaving. We will call the opal_progress only + once per call. */ + opal_progress(); + do_it_once++; + goto recheck_request_status; + } +#endif *flag = false; return MPI_SUCCESS; } diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index 1b9256148b..632f022bcb 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -32,7 +32,11 @@ int ompi_request_default_test( ompi_request_t ** rptr, ompi_status_public_t * status ) { ompi_request_t *request = *rptr; +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + int do_it_once = 0; + recheck_request_status: +#endif opal_atomic_mb(); if( request->req_state == OMPI_REQUEST_INACTIVE ) { *completed = true; @@ -77,6 +81,17 @@ int ompi_request_default_test( ompi_request_t ** rptr, later! */ return ompi_request_free(rptr); } +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + if( 0 == do_it_once ) { + /** + * If we run the opal_progress then check the status of the request before + * leaving. We will call the opal_progress only once per call. + */ + opal_progress(); + do_it_once++; + goto recheck_request_status; + } +#endif *completed = false; return OMPI_SUCCESS; } @@ -148,6 +163,9 @@ int ompi_request_default_test_any( *index = MPI_UNDEFINED; if(num_requests_null_inactive != count) { *completed = false; +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + opal_progress(); +#endif } else { *completed = true; if (MPI_STATUS_IGNORE != status) { @@ -183,6 +201,9 @@ int ompi_request_default_test_all( if (num_completed != count) { *completed = false; +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + opal_progress(); +#endif return OMPI_SUCCESS; } @@ -291,6 +312,9 @@ int ompi_request_default_test_some( *outcount = num_requests_done; if (num_requests_done == 0) { +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + opal_progress(); +#endif return OMPI_SUCCESS; } diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 2f8c68851b..a07532576e 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -29,7 +29,9 @@ #include "ompi/mca/crcp/crcp.h" #include "ompi/mca/pml/base/pml_base_request.h" +#if OMPI_ENABLE_PROGRESS_THREADS static int ompi_progress_thread_count=0; +#endif int ompi_request_default_wait( ompi_request_t ** req_ptr, @@ -87,13 +89,16 @@ int ompi_request_default_wait_any( int *index, ompi_status_public_t * status) { +#if OMPI_ENABLE_PROGRESS_THREADS int c; +#endif size_t i=0, num_requests_null_inactive=0; int rc = OMPI_SUCCESS; int completed = -1; ompi_request_t **rptr=NULL; ompi_request_t *request=NULL; +#if OMPI_ENABLE_PROGRESS_THREADS /* poll for completion */ OPAL_THREAD_ADD32(&ompi_progress_thread_count,1); for (c = 0; completed < 0 && c < opal_progress_spin_count; c++) { @@ -122,6 +127,7 @@ int ompi_request_default_wait_any( opal_progress(); } OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); +#endif /* give up and sleep until completion */ OPAL_THREAD_LOCK(&ompi_request_lock); @@ -159,7 +165,10 @@ int ompi_request_default_wait_any( ompi_request_waiting--; OPAL_THREAD_UNLOCK(&ompi_request_lock); +#if OMPI_ENABLE_PROGRESS_THREADS finished: +#endif /* OMPI_ENABLE_PROGRESS_THREADS */ + if(num_requests_null_inactive == count) { *index = MPI_UNDEFINED; if (MPI_STATUS_IGNORE != status) { @@ -438,7 +447,9 @@ int ompi_request_default_wait_some( int * indices, ompi_status_public_t * statuses) { +#if OMPI_ENABLE_PROGRESS_THREADS int c; +#endif size_t i, num_requests_null_inactive=0, num_requests_done=0; int rc = MPI_SUCCESS; ompi_request_t **rptr=NULL; @@ -449,6 +460,7 @@ int ompi_request_default_wait_some( indices[i] = 0; } +#if OMPI_ENABLE_PROGRESS_THREADS /* poll for completion */ OPAL_THREAD_ADD32(&ompi_progress_thread_count,1); for (c = 0; c < opal_progress_spin_count; c++) { @@ -478,6 +490,7 @@ int ompi_request_default_wait_some( opal_progress(); } OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); +#endif /* * We only get here when outcount still is 0. @@ -512,7 +525,10 @@ int ompi_request_default_wait_some( ompi_request_waiting--; OPAL_THREAD_UNLOCK(&ompi_request_lock); +#if OMPI_ENABLE_PROGRESS_THREADS finished: +#endif /* OMPI_ENABLE_PROGRESS_THREADS */ + #if OPAL_ENABLE_FT_CR == 1 if( opal_cr_is_enabled) { rptr = requests; diff --git a/ompi/request/request.h b/ompi/request/request.h index 3ea627f3c5..20737ae0ae 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -370,9 +370,11 @@ static inline int ompi_request_free(ompi_request_t** request) static inline void ompi_request_wait_completion(ompi_request_t *req) { if(false == req->req_complete) { +#if OMPI_ENABLE_PROGRESS_THREADS if(opal_progress_spin(&req->req_complete)) { return; } +#endif OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_waiting++; while(false == req->req_complete) { diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index bf670e2396..72b326ebe7 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -139,6 +139,10 @@ int ompi_mpi_finalize(void) */ (void)mca_pml_base_bsend_detach(NULL, NULL); +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK); +#endif + /* Redo ORTE calling opal_progress_event_users_increment() during MPI lifetime, to get better latency when not using TCP */ opal_progress_event_users_increment(); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index e0400221db..1db2a160ff 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -543,7 +543,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } if (OMPI_SUCCESS != - (ret = ompi_op_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = ompi_op_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "ompi_op_base_find_available() failed"; goto error; } @@ -600,13 +601,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Select which MPI components to use */ if (OMPI_SUCCESS != - (ret = mca_mpool_base_init(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_mpool_base_init(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_mpool_base_init() failed"; goto error; } if (OMPI_SUCCESS != - (ret = mca_pml_base_select(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_pml_base_select(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_pml_base_select() failed"; goto error; } @@ -655,13 +658,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } if (OMPI_SUCCESS != - (ret = mca_coll_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = mca_coll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_coll_base_find_available() failed"; goto error; } if (OMPI_SUCCESS != - (ret = ompi_osc_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + (ret = ompi_osc_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "ompi_osc_base_find_available() failed"; goto error; } @@ -752,7 +757,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* If thread support was enabled, then setup OPAL to allow for them. */ - if ((*provided != MPI_THREAD_SINGLE)) { + if ((OMPI_ENABLE_PROGRESS_THREADS == 1) || + (*provided != MPI_THREAD_SINGLE)) { opal_set_using_threads(true); } @@ -832,7 +838,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) gettimeofday(&ompistart, NULL); } - /* wire up the mpi interface, if requested. Do this after the +#if OMPI_ENABLE_PROGRESS_THREADS == 0 + /* Start setting up the event engine for MPI operations. Don't + block in the event library, so that communications don't take + forever between procs in the dynamic code. This will increase + CPU utilization for the remainder of MPI_INIT when we are + blocking on RTE-level events, but may greatly reduce non-TCP + latency. */ + opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK); +#endif + + /* wire up the mpi interface, if requested. Do this after the non-block switch for non-TCP performance. Do before the polling change as anyone with a complex wire-up is going to be using the oob. */ diff --git a/ompi/tools/ompi_info/param.c b/ompi/tools/ompi_info/param.c index 83617260e5..2a30a05df1 100644 --- a/ompi/tools/ompi_info/param.c +++ b/ompi/tools/ompi_info/param.c @@ -275,12 +275,14 @@ void ompi_info_do_config(bool want_all) (void)asprintf(&threads, "%s (MPI_THREAD_MULTIPLE: %s, OPAL support: %s, OMPI progress: %s, ORTE progress: yes, Event lib: yes)", (OPAL_HAVE_POSIX_THREADS ? "posix" : "type unknown"), /* "type unknown" can presumably never happen */ OMPI_ENABLE_THREAD_MULTIPLE ? "yes" : "no", - OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", "yes"); + OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", + OMPI_ENABLE_PROGRESS_THREADS ? "yes" : "no"); #else (void)asprintf(&threads, "%s (MPI_THREAD_MULTIPLE: %s, OPAL support: %s, OMPI progress: %s, Event lib: yes)", (OPAL_HAVE_POSIX_THREADS ? "posix" : "type unknown"), /* "type unknown" can presumably never happen */ OMPI_ENABLE_THREAD_MULTIPLE ? "yes" : "no", - OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", "yes"); + OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", + OMPI_ENABLE_PROGRESS_THREADS ? "yes" : "no"); #endif } else { threads = strdup("no"); diff --git a/orte/mca/oob/ud/oob_ud_event.c b/orte/mca/oob/ud/oob_ud_event.c index 7c5e6fb829..27084b008f 100644 --- a/orte/mca/oob/ud/oob_ud_event.c +++ b/orte/mca/oob/ud/oob_ud_event.c @@ -50,6 +50,9 @@ static bool event_started = false; void mca_oob_ud_event_start_monitor (mca_oob_ud_device_t *device) { if (!event_started) { +#if !ORTE_ENABLE_PROGRESS_THREADS + opal_progress_event_users_increment (); +#endif opal_event_set (orte_event_base, &device->event, device->ib_channel->fd, OPAL_EV_READ, mca_oob_ud_event_dispatch, (void *) device); opal_event_add (&device->event, NULL); @@ -60,6 +63,9 @@ void mca_oob_ud_event_start_monitor (mca_oob_ud_device_t *device) void mca_oob_ud_event_stop_monitor (mca_oob_ud_device_t *device) { if (event_started) { +#if !ORTE_ENABLE_PROGRESS_THREADS + opal_progress_event_users_decrement (); +#endif opal_event_del (&device->event); mca_oob_ud_stop_events (device); event_started = false; diff --git a/oshmem/request/request.h b/oshmem/request/request.h index 40a7ace4db..41580eb591 100644 --- a/oshmem/request/request.h +++ b/oshmem/request/request.h @@ -395,9 +395,11 @@ static inline void oshmem_request_wait_any_completion(void) static inline void oshmem_request_wait_completion(oshmem_request_t *req) { if (false == req->req_complete) { +#if OMPI_ENABLE_PROGRESS_THREADS if(opal_progress_spin(&req->req_complete)) { return; } +#endif OPAL_THREAD_LOCK(&oshmem_request_lock); oshmem_request_waiting++; while (false == req->req_complete) { diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index b2a1e318e5..7be7978bfd 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -346,14 +346,16 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) } if (OSHMEM_SUCCESS - != (ret = mca_spml_base_select(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + != (ret = mca_spml_base_select(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_spml_base_select() failed"; goto error; } if (OSHMEM_SUCCESS != (ret = - mca_scoll_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + mca_scoll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_scoll_base_find_available() failed"; goto error; } @@ -428,7 +430,8 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) if (OSHMEM_SUCCESS != (ret = - mca_atomic_base_find_available(1, OMPI_ENABLE_THREAD_MULTIPLE))) { + mca_atomic_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { error = "mca_atomic_base_find_available() failed"; goto error; } diff --git a/oshmem/tools/oshmem_info/param.c b/oshmem/tools/oshmem_info/param.c index e70886eaec..9e106be2da 100644 --- a/oshmem/tools/oshmem_info/param.c +++ b/oshmem/tools/oshmem_info/param.c @@ -260,12 +260,14 @@ void oshmem_info_do_config(bool want_all) (void)asprintf(&threads, "%s (MPI_THREAD_MULTIPLE: %s, OPAL support: %s, OMPI progress: %s, ORTE progress: yes, Event lib: yes)", (OPAL_HAVE_POSIX_THREADS ? "posix" : "type unknown"), /* "type unknown" can presumably never happen */ OMPI_ENABLE_THREAD_MULTIPLE ? "yes" : "no", - OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", "yes"); + OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", + OMPI_ENABLE_PROGRESS_THREADS ? "yes" : "no"); #else (void)asprintf(&threads, "%s (MPI_THREAD_MULTIPLE: %s, OPAL support: %s, OMPI progress: %s, Event lib: yes)", (OPAL_HAVE_POSIX_THREADS ? "posix" : "type unknown"), /* "type unknown" can presumably never happen */ OMPI_ENABLE_THREAD_MULTIPLE ? "yes" : "no", - OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", "yes"); + OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", + OMPI_ENABLE_PROGRESS_THREADS ? "yes" : "no"); #endif } else { threads = strdup("no");