From c2cd717f823d1184287e759987b885a2d95d0613 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 9 Nov 2016 18:10:11 -0500 Subject: [PATCH] Don't refcount the predefined datatypes. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype.h | 21 +++++++ ompi/mca/osc/base/osc_base_obj_convert.h | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt_comm.c | 16 +++--- ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c | 26 ++++----- ompi/mca/pml/base/pml_base_recvreq.h | 4 +- ompi/mca/pml/base/pml_base_sendreq.h | 4 +- ompi/mca/pml/cm/pml_cm_recvreq.h | 12 ++-- ompi/mca/pml/cm/pml_cm_sendreq.h | 12 ++-- ompi/mca/pml/ob1/pml_ob1.c | 2 +- ompi/mca/pml/ob1/pml_ob1.h | 6 ++ ompi/mca/pml/ob1/pml_ob1_progress.c | 70 +++++++++++++++--------- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 1 + ompi/mca/pml/ucx/pml_ucx_datatype.c | 6 +- ompi/mca/pml/yalla/pml_yalla_datatype.c | 4 +- ompi/mca/pml/yalla/pml_yalla_datatype.h | 2 +- opal/include/opal/constants.h | 2 +- opal/mca/common/cuda/common_cuda.c | 2 +- 17 files changed, 119 insertions(+), 73 deletions(-) diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index ff6a1b0b2f..15284f1fd3 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -36,6 +36,7 @@ #include "ompi/constants.h" #include "opal/datatype/opal_convertor.h" +#include "opal/util/output.h" #include "mpi.h" BEGIN_C_DECLS @@ -374,5 +375,25 @@ OMPI_DECLSPEC int ompi_datatype_unpack_external( const char datarep[], const voi OMPI_DECLSPEC int ompi_datatype_pack_external_size( const char datarep[], int incount, ompi_datatype_t *datatype, MPI_Aint *size); +#define OMPI_DATATYPE_RETAIN(ddt) \ + { \ + if( !ompi_datatype_is_predefined((ddt)) ) { \ + OBJ_RETAIN((ddt)); \ + OPAL_OUTPUT_VERBOSE((0, 100, "Datatype %p [%s] refcount %d in file %s:%d\n", \ + (void*)(ddt), (ddt)->name, (ddt)->super.super.obj_reference_count, \ + __FILE__, __LINE__)); \ + } \ + } + +#define OMPI_DATATYPE_RELEASE(ddt) \ + { \ + if( !ompi_datatype_is_predefined((ddt)) ) { \ + OPAL_OUTPUT_VERBOSE((0, 100, "Datatype %p [%s] refcount %d in file %s:%d\n", \ + (void*)(ddt), (ddt)->name, (ddt)->super.super.obj_reference_count, \ + __func__, __LINE__)); \ + OBJ_RELEASE((ddt)); \ + } \ + } + END_C_DECLS #endif /* OMPI_DATATYPE_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/osc/base/osc_base_obj_convert.h b/ompi/mca/osc/base/osc_base_obj_convert.h index c6514bbbe0..c470959450 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.h +++ b/ompi/mca/osc/base/osc_base_obj_convert.h @@ -54,7 +54,7 @@ ompi_osc_base_datatype_create(ompi_proc_t *remote_proc, void **payload) struct ompi_datatype_t *datatype = ompi_datatype_create_from_packed_description(payload, remote_proc); if (NULL == datatype) return NULL; - if (ompi_datatype_is_predefined(datatype)) OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); return datatype; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index 4d16f1a8ce..937dc9caf2 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -91,7 +91,7 @@ static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request) ompi_datatype_t *datatype = (ompi_datatype_t *) request->req_complete_cb_data; ompi_osc_pt2pt_module_t *module = NULL; - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.lock); (void) opal_hash_table_get_value_uint32(&mca_osc_pt2pt_component.modules, @@ -360,7 +360,7 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ /* the datatype does not fit in an eager message. send it seperately */ header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE; - OBJ_RETAIN(target_dt); + OMPI_DATATYPE_RETAIN(target_dt); ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, tag_to_target(tag), module->comm, @@ -516,7 +516,7 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, /* the datatype does not fit in an eager message. send it seperately */ header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE; - OBJ_RETAIN(target_dt); + OMPI_DATATYPE_RETAIN(target_dt); ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, tag_to_target(tag), module->comm, @@ -620,7 +620,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar request->type = OMPI_OSC_PT2PT_HDR_TYPE_CSWAP; request->origin_addr = origin_addr; request->internal = true; - OBJ_RETAIN(dt); + OMPI_DATATYPE_RETAIN(dt); request->origin_dt = dt; /* Compute datatype and payload lengths. Note that the datatype description @@ -775,7 +775,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET; pt2pt_request->origin_addr = origin_addr; pt2pt_request->origin_count = origin_count; - OBJ_RETAIN(origin_dt); + OMPI_DATATYPE_RETAIN(origin_dt); pt2pt_request->origin_dt = origin_dt; /* Compute datatype length. Note that the datatype description @@ -825,7 +825,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co /* the datatype does not fit in an eager message. send it seperately */ header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE; - OBJ_RETAIN(target_dt); + OMPI_DATATYPE_RETAIN(target_dt); ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, tag_to_target(tag), module->comm, @@ -983,7 +983,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC; pt2pt_request->origin_addr = origin_addr; pt2pt_request->origin_count = origin_count; - OBJ_RETAIN(origin_datatype); + OMPI_DATATYPE_RETAIN(origin_datatype); pt2pt_request->origin_dt = origin_datatype; /* Compute datatype and payload lengths. Note that the datatype description @@ -1044,7 +1044,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin /* the datatype does not fit in an eager message. send it seperately */ header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE; - OBJ_RETAIN(target_datatype); + OMPI_DATATYPE_RETAIN(target_datatype); ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target_rank, tag_to_target(tag), module->comm, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 04333d2add..f65d2fa274 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -75,7 +75,7 @@ static void osc_pt2pt_accumulate_data_destructor (osc_pt2pt_accumulate_data_t *a } if (acc_data->datatype) { - OBJ_RELEASE(acc_data->datatype); + OMPI_DATATYPE_RELEASE(acc_data->datatype); } } @@ -118,7 +118,7 @@ static void osc_pt2pt_pending_acc_destructor (osc_pt2pt_pending_acc_t *pending) } if (NULL != pending->datatype) { - OBJ_RELEASE(pending->datatype); + OMPI_DATATYPE_RELEASE(pending->datatype); } } @@ -370,7 +370,7 @@ static inline int process_put(ompi_osc_pt2pt_module_t* module, int source, osc_pt2pt_copy_on_recv (target, data, data_len, proc, put_header->count, datatype); - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return put_header->len; } @@ -407,7 +407,7 @@ static inline int process_put_long(ompi_osc_pt2pt_module_t* module, int source, return OMPI_ERROR; } - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return put_header->len; } @@ -532,7 +532,7 @@ static inline int process_get (ompi_osc_pt2pt_module_t* module, int target, ret = osc_pt2pt_get_post_send (module, source, get_header->count, datatype, target, tag_to_origin(get_header->tag)); - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return OMPI_SUCCESS == ret ? (int) get_header->len : ret; } @@ -634,7 +634,7 @@ static int osc_pt2pt_accumulate_allocate (ompi_osc_pt2pt_module_t *module, int p acc_data->proc = proc; acc_data->count = count; acc_data->datatype = datatype; - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); acc_data->op = op; acc_data->request_count = request_count; @@ -728,7 +728,7 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os /* save the datatype */ pending_acc->datatype = datatype; - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); /* save the header */ switch (header->base.type) { @@ -1166,7 +1166,7 @@ static inline int process_acc (ompi_osc_pt2pt_module_t *module, int source, } /* Release datatype & op */ - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret; } @@ -1197,7 +1197,7 @@ static inline int process_acc_long (ompi_osc_pt2pt_module_t* module, int source, } /* Release datatype & op */ - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret; } @@ -1231,7 +1231,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source, uint32_t primitive_count; buffer = malloc (data_len); if (OPAL_UNLIKELY(NULL == buffer)) { - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -1250,7 +1250,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source, } /* Release datatype & op */ - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret; } @@ -1281,7 +1281,7 @@ static inline int process_get_acc_long(ompi_osc_pt2pt_module_t *module, int sour } /* Release datatype & op */ - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return OMPI_SUCCESS == ret ? (int) acc_header->len : ret; } @@ -1313,7 +1313,7 @@ static inline int process_cswap (ompi_osc_pt2pt_module_t *module, int source, } /* Release datatype */ - OBJ_RELEASE(datatype); + OMPI_DATATYPE_RELEASE(datatype); return (OMPI_SUCCESS == ret) ? (int) cswap_header->len : ret; } diff --git a/ompi/mca/pml/base/pml_base_recvreq.h b/ompi/mca/pml/base/pml_base_recvreq.h index 154225f508..d20663e12f 100644 --- a/ompi/mca/pml/base/pml_base_recvreq.h +++ b/ompi/mca/pml/base/pml_base_recvreq.h @@ -67,7 +67,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); { \ /* increment reference count on communicator */ \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ \ OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \ (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ @@ -117,7 +117,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); do { \ OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \ OBJ_RELEASE( (request)->req_base.req_comm); \ - OBJ_RELEASE( (request)->req_base.req_datatype ); \ + OMPI_DATATYPE_RELEASE( (request)->req_base.req_datatype ); \ opal_convertor_cleanup( &((request)->req_base.req_convertor) ); \ } while (0) diff --git a/ompi/mca/pml/base/pml_base_sendreq.h b/ompi/mca/pml/base/pml_base_sendreq.h index 6fb50d0f90..1e85d8044a 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.h +++ b/ompi/mca/pml/base/pml_base_sendreq.h @@ -98,7 +98,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); \ /* initialize datatype convertor for this request */ \ if( count > 0 ) { \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ /* We will create a convertor specialized for the */ \ /* remote architecture and prepared with the datatype. */ \ opal_convertor_copy_and_prepare_for_send( \ @@ -145,7 +145,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \ OBJ_RELEASE((request)->req_base.req_comm); \ if( 0 != (request)->req_base.req_count ) \ - OBJ_RELEASE((request)->req_base.req_datatype); \ + OMPI_DATATYPE_RELEASE((request)->req_base.req_datatype); \ opal_convertor_cleanup( &((request)->req_base.req_convertor) ); \ } while (0) diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.h b/ompi/mca/pml/cm/pml_cm_recvreq.h index 9dd3319cfe..d0774bac1c 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.h +++ b/ompi/mca/pml/cm/pml_cm_recvreq.h @@ -101,7 +101,7 @@ do { \ request->req_base.req_comm = comm; \ request->req_base.req_datatype = datatype; \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ \ if( MPI_ANY_SOURCE == src ) { \ ompi_proc = ompi_proc_local_proc; \ @@ -132,7 +132,7 @@ do { \ request->req_base.req_comm = comm; \ request->req_base.req_datatype = datatype; \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ \ opal_convertor_copy_and_prepare_for_recv( \ ompi_mpi_local_convertor, \ @@ -166,7 +166,7 @@ do { \ request->req_addr = addr; \ request->req_count = count; \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ \ if( MPI_ANY_SOURCE == src ) { \ ompi_proc = ompi_proc_local_proc; \ @@ -203,7 +203,7 @@ do { \ request->req_addr = addr; \ request->req_count = count; \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ \ opal_convertor_copy_and_prepare_for_recv( \ ompi_mpi_local_convertor, \ @@ -349,7 +349,7 @@ do { \ #define MCA_PML_CM_HVY_RECV_REQUEST_RETURN(recvreq) \ { \ OBJ_RELEASE((recvreq)->req_base.req_comm); \ - OBJ_RELEASE((recvreq)->req_base.req_datatype); \ + OMPI_DATATYPE_RELEASE((recvreq)->req_base.req_datatype); \ OMPI_REQUEST_FINI(&(recvreq)->req_base.req_ompi); \ opal_convertor_cleanup( &((recvreq)->req_base.req_convertor) ); \ opal_free_list_return ( &mca_pml_base_recv_requests, \ @@ -362,7 +362,7 @@ do { \ #define MCA_PML_CM_THIN_RECV_REQUEST_RETURN(recvreq) \ { \ OBJ_RELEASE((recvreq)->req_base.req_comm); \ - OBJ_RELEASE((recvreq)->req_base.req_datatype); \ + OMPI_DATATYPE_RELEASE((recvreq)->req_base.req_datatype); \ OMPI_REQUEST_FINI(&(recvreq)->req_base.req_ompi); \ opal_convertor_cleanup( &((recvreq)->req_base.req_convertor) ); \ opal_free_list_return ( &mca_pml_base_recv_requests, \ diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h index d0c22a9cea..e03eebf092 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.h +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -128,7 +128,7 @@ do { \ count) \ { \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ (req_send)->req_base.req_comm = comm; \ (req_send)->req_base.req_datatype = datatype; \ opal_convertor_copy_and_prepare_for_send( \ @@ -157,7 +157,7 @@ do { \ count) \ { \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ (req_send)->req_base.req_comm = comm; \ (req_send)->req_base.req_datatype = datatype; \ opal_convertor_copy_and_prepare_for_send( \ @@ -188,7 +188,7 @@ do { \ count) \ { \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ (req_send)->req_base.req_comm = comm; \ (req_send)->req_base.req_datatype = datatype; \ opal_convertor_copy_and_prepare_for_send( \ @@ -218,7 +218,7 @@ do { \ count) \ { \ OBJ_RETAIN(comm); \ - OBJ_RETAIN(datatype); \ + OMPI_DATATYPE_RETAIN(datatype); \ (req_send)->req_base.req_comm = comm; \ (req_send)->req_base.req_datatype = datatype; \ if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { \ @@ -434,7 +434,7 @@ do { #define MCA_PML_CM_HVY_SEND_REQUEST_RETURN(sendreq) \ { \ /* Let the base handle the reference counts */ \ - OBJ_RELEASE(sendreq->req_send.req_base.req_datatype); \ + OMPI_DATATYPE_RETAIN(sendreq->req_send.req_base.req_datatype); \ OBJ_RELEASE(sendreq->req_send.req_base.req_comm); \ OMPI_REQUEST_FINI(&sendreq->req_send.req_base.req_ompi); \ opal_convertor_cleanup( &(sendreq->req_send.req_base.req_convertor) ); \ @@ -471,7 +471,7 @@ do { \ #define MCA_PML_CM_THIN_SEND_REQUEST_RETURN(sendreq) \ { \ /* Let the base handle the reference counts */ \ - OBJ_RELEASE(sendreq->req_send.req_base.req_datatype); \ + OMPI_DATATYPE_RETAIN(sendreq->req_send.req_base.req_datatype); \ OBJ_RELEASE(sendreq->req_send.req_base.req_comm); \ OMPI_REQUEST_FINI(&sendreq->req_send.req_base.req_ompi); \ opal_convertor_cleanup( &(sendreq->req_send.req_base.req_convertor) ); \ diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index ec44ab96c2..fc941df071 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -58,7 +58,7 @@ mca_pml_ob1_t mca_pml_ob1 = { mca_pml_ob1_add_procs, mca_pml_ob1_del_procs, mca_pml_ob1_enable, - mca_pml_ob1_progress, + NULL, /* mca_pml_ob1_progress, */ mca_pml_ob1_add_comm, mca_pml_ob1_del_comm, mca_pml_ob1_irecv_init, diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index 8f0f510d7a..6c135faecf 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -399,4 +399,10 @@ mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, siz btls[0].length += length_left; } +/** + * A thread-safe function that should be called every time we need the OB1 + * progress to be turned (or kept) on. + */ +int mca_pml_ob1_enable_progress(int32_t count); + #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index ea283293e3..96935b6021 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -26,9 +26,42 @@ #include "opal/mca/common/cuda/common_cuda.h" #include "pml_ob1_recvreq.h" #include "opal/runtime/opal_params.h" -static void mca_pml_ob1_process_pending_cuda_async_copies(void); + +/** + * Return the number of completed events allowing the upper level + * to know when no pending events are expected so that it can + * unregister the progress function. + */ +static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) +{ + mca_btl_base_descriptor_t *frag; + int progress, count = 0; + + do { + progress = progress_one_cuda_htod_event(&frag); + if (1 == progress) { + /* Call the finish function to make progress. */ + mca_pml_ob1_recv_request_frag_copy_finished(NULL, NULL, frag, 0); + count++; + } + } while (progress > 0); + /* Consider progressing dtoh events here in future */ + + return count; +} #endif /* OPAL_CUDA_SUPPORT */ +static int mca_pml_ob1_progress_needed = 0; +int mca_pml_ob1_enable_progress(int32_t count) +{ + int32_t old = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); + if( 0 != old ) + return 0; /* progress was already on */ + + opal_progress_register(mca_pml_ob1_progress); + return 1; +} + int mca_pml_ob1_progress(void) { int i, queue_length = opal_list_get_size(&mca_pml_ob1.send_pending); @@ -36,12 +69,10 @@ int mca_pml_ob1_progress(void) bool send_succedded; #if OPAL_CUDA_SUPPORT - mca_pml_ob1_process_pending_cuda_async_copies(); + if (opal_cuda_support) + completed_requests += mca_pml_ob1_process_pending_cuda_async_copies(); #endif /* OPAL_CUDA_SUPPORT */ - if( OPAL_LIKELY(0 == queue_length) ) - return 0; - for( i = 0; i < queue_length; i++ ) { mca_pml_ob1_send_pending_t pending_type = MCA_PML_OB1_SEND_PENDING_NONE; mca_pml_ob1_send_request_t* sendreq; @@ -84,26 +115,13 @@ int mca_pml_ob1_progress(void) } } } + + if( 0 != completed_requests ) { + j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests); + if( j == completed_requests ) { + opal_progress_unregister(mca_pml_ob1_progress); + } + } + return completed_requests; } - -#if OPAL_CUDA_SUPPORT -static void mca_pml_ob1_process_pending_cuda_async_copies(void) -{ - mca_btl_base_descriptor_t *frag; - int progress; - - if (!opal_cuda_support) - return; - - do { - progress = progress_one_cuda_htod_event(&frag); - if (1 == progress) { - /* Call the finish function to make progress. */ - mca_pml_ob1_recv_request_frag_copy_finished(NULL, NULL, frag, 0); - } - } while (progress > 0); - /* Consider progressing dtoh events here in future */ - -} -#endif /* OPAL_CUDA_SUPPORT */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 3ebde85d28..80acc93f4e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -99,6 +99,7 @@ add_request_to_send_pending(mca_pml_ob1_send_request_t* sendreq, opal_list_prepend(&mca_pml_ob1.send_pending, item); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); + mca_pml_ob1_enable_progress(1); } static inline mca_pml_ob1_send_request_t* diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.c b/ompi/mca/pml/ucx/pml_ucx_datatype.c index 488642fcda..9970a64c1b 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.c +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.c @@ -22,7 +22,7 @@ static void* pml_ucx_generic_datatype_start_pack(void *context, const void *buff convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs); - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); convertor->datatype = datatype; opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor, &datatype->super, count, buffer, 0, @@ -38,7 +38,7 @@ static void* pml_ucx_generic_datatype_start_unpack(void *context, void *buffer, convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs); - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); convertor->datatype = datatype; opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor, &datatype->super, count, buffer, 0, @@ -95,7 +95,7 @@ static void pml_ucx_generic_datatype_finish(void *state) mca_pml_ucx_convertor_t *convertor = state; opal_convertor_cleanup(&convertor->opal_conv); - OBJ_RELEASE(convertor->datatype); + OMPI_DATATYPE_RELEASE(convertor->datatype); PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.convs, &convertor->super); } diff --git a/ompi/mca/pml/yalla/pml_yalla_datatype.c b/ompi/mca/pml/yalla/pml_yalla_datatype.c index b87340c007..0ab1608c82 100644 --- a/ompi/mca/pml/yalla/pml_yalla_datatype.c +++ b/ompi/mca/pml/yalla/pml_yalla_datatype.c @@ -20,7 +20,7 @@ static mca_pml_yalla_convertor_t *mca_pml_yalla_get_send_convertor(void *buf, si mca_pml_yalla_convertor_t *convertor = (mca_pml_yalla_convertor_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.convs); convertor->datatype = datatype; - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor, &datatype->super, count, buf, 0, &convertor->convertor); @@ -33,7 +33,7 @@ static mca_pml_yalla_convertor_t *mca_pml_yalla_get_recv_convertor(void *buf, si mca_pml_yalla_convertor_t *convertor = (mca_pml_yalla_convertor_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.convs); convertor->datatype = datatype; - OBJ_RETAIN(datatype); + OMPI_DATATYPE_RETAIN(datatype); opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor, &datatype->super, count, buf, 0, &convertor->convertor); diff --git a/ompi/mca/pml/yalla/pml_yalla_datatype.h b/ompi/mca/pml/yalla/pml_yalla_datatype.h index 2ba4e755a8..c77dfd41ba 100644 --- a/ompi/mca/pml/yalla/pml_yalla_datatype.h +++ b/ompi/mca/pml/yalla/pml_yalla_datatype.h @@ -53,7 +53,7 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_convertor_t); static inline void mca_pml_yalla_convertor_free(mca_pml_yalla_convertor_t *convertor) { opal_convertor_cleanup(&convertor->convertor); - OBJ_RELEASE(convertor->datatype); + OMPI_DATATYPE_RELEASE(convertor->datatype); PML_YALLA_FREELIST_RETURN(&ompi_pml_yalla.convs, &convertor->super); } diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 61b56be20f..f05e53b6cd 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -36,7 +36,7 @@ enum { OPAL_ERR_FATAL = (OPAL_ERR_BASE - 6), OPAL_ERR_NOT_IMPLEMENTED = (OPAL_ERR_BASE - 7), OPAL_ERR_NOT_SUPPORTED = (OPAL_ERR_BASE - 8), - OPAL_ERR_INTERRUPTED = (OPAL_ERR_BASE - 9), + OPAL_ERR_INTERRUPTED = (OPAL_ERR_BASE - 9), OPAL_ERR_WOULD_BLOCK = (OPAL_ERR_BASE - 10), OPAL_ERR_IN_ERRNO = (OPAL_ERR_BASE - 11), OPAL_ERR_UNREACH = (OPAL_ERR_BASE - 12), diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index 94886739fb..138ad7e658 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -1635,7 +1635,7 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) { return 1; } OPAL_THREAD_UNLOCK(&common_cuda_htod_lock); - return 0; + return OPAL_ERR_RESOURCE_BUSY; }