Don't refcount the predefined datatypes.
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
a49422fe84
Коммит
c2cd717f82
@ -36,6 +36,7 @@
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "mpi.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
@ -374,5 +375,25 @@ OMPI_DECLSPEC int ompi_datatype_unpack_external( const char datarep[], const voi
|
||||
OMPI_DECLSPEC int ompi_datatype_pack_external_size( const char datarep[], int incount,
|
||||
ompi_datatype_t *datatype, MPI_Aint *size);
|
||||
|
||||
#define OMPI_DATATYPE_RETAIN(ddt) \
|
||||
{ \
|
||||
if( !ompi_datatype_is_predefined((ddt)) ) { \
|
||||
OBJ_RETAIN((ddt)); \
|
||||
OPAL_OUTPUT_VERBOSE((0, 100, "Datatype %p [%s] refcount %d in file %s:%d\n", \
|
||||
(void*)(ddt), (ddt)->name, (ddt)->super.super.obj_reference_count, \
|
||||
__FILE__, __LINE__)); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define OMPI_DATATYPE_RELEASE(ddt) \
|
||||
{ \
|
||||
if( !ompi_datatype_is_predefined((ddt)) ) { \
|
||||
OPAL_OUTPUT_VERBOSE((0, 100, "Datatype %p [%s] refcount %d in file %s:%d\n", \
|
||||
(void*)(ddt), (ddt)->name, (ddt)->super.super.obj_reference_count, \
|
||||
__func__, __LINE__)); \
|
||||
OBJ_RELEASE((ddt)); \
|
||||
} \
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
#endif /* OMPI_DATATYPE_H_HAS_BEEN_INCLUDED */
|
||||
|
@ -54,7 +54,7 @@ ompi_osc_base_datatype_create(ompi_proc_t *remote_proc, void **payload)
|
||||
struct ompi_datatype_t *datatype =
|
||||
ompi_datatype_create_from_packed_description(payload, remote_proc);
|
||||
if (NULL == datatype) return NULL;
|
||||
if (ompi_datatype_is_predefined(datatype)) OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
return datatype;
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,7 @@ static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request)
|
||||
ompi_datatype_t *datatype = (ompi_datatype_t *) request->req_complete_cb_data;
|
||||
ompi_osc_pt2pt_module_t *module = NULL;
|
||||
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.lock);
|
||||
(void) opal_hash_table_get_value_uint32(&mca_osc_pt2pt_component.modules,
|
||||
@ -360,7 +360,7 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_
|
||||
/* the datatype does not fit in an eager message. send it seperately */
|
||||
header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;
|
||||
|
||||
OBJ_RETAIN(target_dt);
|
||||
OMPI_DATATYPE_RETAIN(target_dt);
|
||||
|
||||
ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
|
||||
target, tag_to_target(tag), module->comm,
|
||||
@ -516,7 +516,7 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
|
||||
/* the datatype does not fit in an eager message. send it seperately */
|
||||
header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;
|
||||
|
||||
OBJ_RETAIN(target_dt);
|
||||
OMPI_DATATYPE_RETAIN(target_dt);
|
||||
|
||||
ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
|
||||
target, tag_to_target(tag), module->comm,
|
||||
@ -620,7 +620,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar
|
||||
request->type = OMPI_OSC_PT2PT_HDR_TYPE_CSWAP;
|
||||
request->origin_addr = origin_addr;
|
||||
request->internal = true;
|
||||
OBJ_RETAIN(dt);
|
||||
OMPI_DATATYPE_RETAIN(dt);
|
||||
request->origin_dt = dt;
|
||||
|
||||
/* Compute datatype and payload lengths. Note that the datatype description
|
||||
@ -775,7 +775,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co
|
||||
pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET;
|
||||
pt2pt_request->origin_addr = origin_addr;
|
||||
pt2pt_request->origin_count = origin_count;
|
||||
OBJ_RETAIN(origin_dt);
|
||||
OMPI_DATATYPE_RETAIN(origin_dt);
|
||||
pt2pt_request->origin_dt = origin_dt;
|
||||
|
||||
/* Compute datatype length. Note that the datatype description
|
||||
@ -825,7 +825,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co
|
||||
/* the datatype does not fit in an eager message. send it seperately */
|
||||
header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;
|
||||
|
||||
OBJ_RETAIN(target_dt);
|
||||
OMPI_DATATYPE_RETAIN(target_dt);
|
||||
|
||||
ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
|
||||
target, tag_to_target(tag), module->comm,
|
||||
@ -983,7 +983,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin
|
||||
pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
|
||||
pt2pt_request->origin_addr = origin_addr;
|
||||
pt2pt_request->origin_count = origin_count;
|
||||
OBJ_RETAIN(origin_datatype);
|
||||
OMPI_DATATYPE_RETAIN(origin_datatype);
|
||||
pt2pt_request->origin_dt = origin_datatype;
|
||||
|
||||
/* Compute datatype and payload lengths. Note that the datatype description
|
||||
@ -1044,7 +1044,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin
|
||||
/* the datatype does not fit in an eager message. send it seperately */
|
||||
header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;
|
||||
|
||||
OBJ_RETAIN(target_datatype);
|
||||
OMPI_DATATYPE_RETAIN(target_datatype);
|
||||
|
||||
ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
|
||||
target_rank, tag_to_target(tag), module->comm,
|
||||
|
@ -75,7 +75,7 @@ static void osc_pt2pt_accumulate_data_destructor (osc_pt2pt_accumulate_data_t *a
|
||||
}
|
||||
|
||||
if (acc_data->datatype) {
|
||||
OBJ_RELEASE(acc_data->datatype);
|
||||
OMPI_DATATYPE_RELEASE(acc_data->datatype);
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,7 +118,7 @@ static void osc_pt2pt_pending_acc_destructor (osc_pt2pt_pending_acc_t *pending)
|
||||
}
|
||||
|
||||
if (NULL != pending->datatype) {
|
||||
OBJ_RELEASE(pending->datatype);
|
||||
OMPI_DATATYPE_RELEASE(pending->datatype);
|
||||
}
|
||||
}
|
||||
|
||||
@ -370,7 +370,7 @@ static inline int process_put(ompi_osc_pt2pt_module_t* module, int source,
|
||||
|
||||
osc_pt2pt_copy_on_recv (target, data, data_len, proc, put_header->count, datatype);
|
||||
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return put_header->len;
|
||||
}
|
||||
@ -407,7 +407,7 @@ static inline int process_put_long(ompi_osc_pt2pt_module_t* module, int source,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return put_header->len;
|
||||
}
|
||||
@ -532,7 +532,7 @@ static inline int process_get (ompi_osc_pt2pt_module_t* module, int target,
|
||||
ret = osc_pt2pt_get_post_send (module, source, get_header->count, datatype,
|
||||
target, tag_to_origin(get_header->tag));
|
||||
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return OMPI_SUCCESS == ret ? (int) get_header->len : ret;
|
||||
}
|
||||
@ -634,7 +634,7 @@ static int osc_pt2pt_accumulate_allocate (ompi_osc_pt2pt_module_t *module, int p
|
||||
acc_data->proc = proc;
|
||||
acc_data->count = count;
|
||||
acc_data->datatype = datatype;
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
acc_data->op = op;
|
||||
acc_data->request_count = request_count;
|
||||
|
||||
@ -728,7 +728,7 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os
|
||||
|
||||
/* save the datatype */
|
||||
pending_acc->datatype = datatype;
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
|
||||
/* save the header */
|
||||
switch (header->base.type) {
|
||||
@ -1166,7 +1166,7 @@ static inline int process_acc (ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret;
|
||||
}
|
||||
@ -1197,7 +1197,7 @@ static inline int process_acc_long (ompi_osc_pt2pt_module_t* module, int source,
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret;
|
||||
}
|
||||
@ -1231,7 +1231,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
|
||||
uint32_t primitive_count;
|
||||
buffer = malloc (data_len);
|
||||
if (OPAL_UNLIKELY(NULL == buffer)) {
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -1250,7 +1250,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret;
|
||||
}
|
||||
@ -1281,7 +1281,7 @@ static inline int process_get_acc_long(ompi_osc_pt2pt_module_t *module, int sour
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return OMPI_SUCCESS == ret ? (int) acc_header->len : ret;
|
||||
}
|
||||
@ -1313,7 +1313,7 @@ static inline int process_cswap (ompi_osc_pt2pt_module_t *module, int source,
|
||||
}
|
||||
|
||||
/* Release datatype */
|
||||
OBJ_RELEASE(datatype);
|
||||
OMPI_DATATYPE_RELEASE(datatype);
|
||||
|
||||
return (OMPI_SUCCESS == ret) ? (int) cswap_header->len : ret;
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t);
|
||||
{ \
|
||||
/* increment reference count on communicator */ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
|
||||
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
@ -117,7 +117,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t);
|
||||
do { \
|
||||
OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \
|
||||
OBJ_RELEASE( (request)->req_base.req_comm); \
|
||||
OBJ_RELEASE( (request)->req_base.req_datatype ); \
|
||||
OMPI_DATATYPE_RELEASE( (request)->req_base.req_datatype ); \
|
||||
opal_convertor_cleanup( &((request)->req_base.req_convertor) ); \
|
||||
} while (0)
|
||||
|
||||
|
@ -98,7 +98,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
|
||||
\
|
||||
/* initialize datatype convertor for this request */ \
|
||||
if( count > 0 ) { \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
/* We will create a convertor specialized for the */ \
|
||||
/* remote architecture and prepared with the datatype. */ \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
@ -145,7 +145,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
|
||||
OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \
|
||||
OBJ_RELEASE((request)->req_base.req_comm); \
|
||||
if( 0 != (request)->req_base.req_count ) \
|
||||
OBJ_RELEASE((request)->req_base.req_datatype); \
|
||||
OMPI_DATATYPE_RELEASE((request)->req_base.req_datatype); \
|
||||
opal_convertor_cleanup( &((request)->req_base.req_convertor) ); \
|
||||
} while (0)
|
||||
|
||||
|
@ -101,7 +101,7 @@ do { \
|
||||
request->req_base.req_comm = comm; \
|
||||
request->req_base.req_datatype = datatype; \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
if( MPI_ANY_SOURCE == src ) { \
|
||||
ompi_proc = ompi_proc_local_proc; \
|
||||
@ -132,7 +132,7 @@ do { \
|
||||
request->req_base.req_comm = comm; \
|
||||
request->req_base.req_datatype = datatype; \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_mpi_local_convertor, \
|
||||
@ -166,7 +166,7 @@ do { \
|
||||
request->req_addr = addr; \
|
||||
request->req_count = count; \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
if( MPI_ANY_SOURCE == src ) { \
|
||||
ompi_proc = ompi_proc_local_proc; \
|
||||
@ -203,7 +203,7 @@ do { \
|
||||
request->req_addr = addr; \
|
||||
request->req_count = count; \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_mpi_local_convertor, \
|
||||
@ -349,7 +349,7 @@ do { \
|
||||
#define MCA_PML_CM_HVY_RECV_REQUEST_RETURN(recvreq) \
|
||||
{ \
|
||||
OBJ_RELEASE((recvreq)->req_base.req_comm); \
|
||||
OBJ_RELEASE((recvreq)->req_base.req_datatype); \
|
||||
OMPI_DATATYPE_RELEASE((recvreq)->req_base.req_datatype); \
|
||||
OMPI_REQUEST_FINI(&(recvreq)->req_base.req_ompi); \
|
||||
opal_convertor_cleanup( &((recvreq)->req_base.req_convertor) ); \
|
||||
opal_free_list_return ( &mca_pml_base_recv_requests, \
|
||||
@ -362,7 +362,7 @@ do { \
|
||||
#define MCA_PML_CM_THIN_RECV_REQUEST_RETURN(recvreq) \
|
||||
{ \
|
||||
OBJ_RELEASE((recvreq)->req_base.req_comm); \
|
||||
OBJ_RELEASE((recvreq)->req_base.req_datatype); \
|
||||
OMPI_DATATYPE_RELEASE((recvreq)->req_base.req_datatype); \
|
||||
OMPI_REQUEST_FINI(&(recvreq)->req_base.req_ompi); \
|
||||
opal_convertor_cleanup( &((recvreq)->req_base.req_convertor) ); \
|
||||
opal_free_list_return ( &mca_pml_base_recv_requests, \
|
||||
|
@ -128,7 +128,7 @@ do { \
|
||||
count) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
@ -157,7 +157,7 @@ do { \
|
||||
count) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
@ -188,7 +188,7 @@ do { \
|
||||
count) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
@ -218,7 +218,7 @@ do { \
|
||||
count) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { \
|
||||
@ -434,7 +434,7 @@ do {
|
||||
#define MCA_PML_CM_HVY_SEND_REQUEST_RETURN(sendreq) \
|
||||
{ \
|
||||
/* Let the base handle the reference counts */ \
|
||||
OBJ_RELEASE(sendreq->req_send.req_base.req_datatype); \
|
||||
OMPI_DATATYPE_RETAIN(sendreq->req_send.req_base.req_datatype); \
|
||||
OBJ_RELEASE(sendreq->req_send.req_base.req_comm); \
|
||||
OMPI_REQUEST_FINI(&sendreq->req_send.req_base.req_ompi); \
|
||||
opal_convertor_cleanup( &(sendreq->req_send.req_base.req_convertor) ); \
|
||||
@ -471,7 +471,7 @@ do { \
|
||||
#define MCA_PML_CM_THIN_SEND_REQUEST_RETURN(sendreq) \
|
||||
{ \
|
||||
/* Let the base handle the reference counts */ \
|
||||
OBJ_RELEASE(sendreq->req_send.req_base.req_datatype); \
|
||||
OMPI_DATATYPE_RETAIN(sendreq->req_send.req_base.req_datatype); \
|
||||
OBJ_RELEASE(sendreq->req_send.req_base.req_comm); \
|
||||
OMPI_REQUEST_FINI(&sendreq->req_send.req_base.req_ompi); \
|
||||
opal_convertor_cleanup( &(sendreq->req_send.req_base.req_convertor) ); \
|
||||
|
@ -58,7 +58,7 @@ mca_pml_ob1_t mca_pml_ob1 = {
|
||||
mca_pml_ob1_add_procs,
|
||||
mca_pml_ob1_del_procs,
|
||||
mca_pml_ob1_enable,
|
||||
mca_pml_ob1_progress,
|
||||
NULL, /* mca_pml_ob1_progress, */
|
||||
mca_pml_ob1_add_comm,
|
||||
mca_pml_ob1_del_comm,
|
||||
mca_pml_ob1_irecv_init,
|
||||
|
@ -399,4 +399,10 @@ mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, siz
|
||||
btls[0].length += length_left;
|
||||
}
|
||||
|
||||
/**
|
||||
* A thread-safe function that should be called every time we need the OB1
|
||||
* progress to be turned (or kept) on.
|
||||
*/
|
||||
int mca_pml_ob1_enable_progress(int32_t count);
|
||||
|
||||
#endif
|
||||
|
@ -26,9 +26,42 @@
|
||||
#include "opal/mca/common/cuda/common_cuda.h"
|
||||
#include "pml_ob1_recvreq.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
static void mca_pml_ob1_process_pending_cuda_async_copies(void);
|
||||
|
||||
/**
|
||||
* Return the number of completed events allowing the upper level
|
||||
* to know when no pending events are expected so that it can
|
||||
* unregister the progress function.
|
||||
*/
|
||||
static inline int mca_pml_ob1_process_pending_cuda_async_copies(void)
|
||||
{
|
||||
mca_btl_base_descriptor_t *frag;
|
||||
int progress, count = 0;
|
||||
|
||||
do {
|
||||
progress = progress_one_cuda_htod_event(&frag);
|
||||
if (1 == progress) {
|
||||
/* Call the finish function to make progress. */
|
||||
mca_pml_ob1_recv_request_frag_copy_finished(NULL, NULL, frag, 0);
|
||||
count++;
|
||||
}
|
||||
} while (progress > 0);
|
||||
/* Consider progressing dtoh events here in future */
|
||||
|
||||
return count;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
static int mca_pml_ob1_progress_needed = 0;
|
||||
int mca_pml_ob1_enable_progress(int32_t count)
|
||||
{
|
||||
int32_t old = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count);
|
||||
if( 0 != old )
|
||||
return 0; /* progress was already on */
|
||||
|
||||
opal_progress_register(mca_pml_ob1_progress);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int mca_pml_ob1_progress(void)
|
||||
{
|
||||
int i, queue_length = opal_list_get_size(&mca_pml_ob1.send_pending);
|
||||
@ -36,12 +69,10 @@ int mca_pml_ob1_progress(void)
|
||||
bool send_succedded;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_pml_ob1_process_pending_cuda_async_copies();
|
||||
if (opal_cuda_support)
|
||||
completed_requests += mca_pml_ob1_process_pending_cuda_async_copies();
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
if( OPAL_LIKELY(0 == queue_length) )
|
||||
return 0;
|
||||
|
||||
for( i = 0; i < queue_length; i++ ) {
|
||||
mca_pml_ob1_send_pending_t pending_type = MCA_PML_OB1_SEND_PENDING_NONE;
|
||||
mca_pml_ob1_send_request_t* sendreq;
|
||||
@ -84,26 +115,13 @@ int mca_pml_ob1_progress(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( 0 != completed_requests ) {
|
||||
j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests);
|
||||
if( j == completed_requests ) {
|
||||
opal_progress_unregister(mca_pml_ob1_progress);
|
||||
}
|
||||
}
|
||||
|
||||
return completed_requests;
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
static void mca_pml_ob1_process_pending_cuda_async_copies(void)
|
||||
{
|
||||
mca_btl_base_descriptor_t *frag;
|
||||
int progress;
|
||||
|
||||
if (!opal_cuda_support)
|
||||
return;
|
||||
|
||||
do {
|
||||
progress = progress_one_cuda_htod_event(&frag);
|
||||
if (1 == progress) {
|
||||
/* Call the finish function to make progress. */
|
||||
mca_pml_ob1_recv_request_frag_copy_finished(NULL, NULL, frag, 0);
|
||||
}
|
||||
} while (progress > 0);
|
||||
/* Consider progressing dtoh events here in future */
|
||||
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
@ -99,6 +99,7 @@ add_request_to_send_pending(mca_pml_ob1_send_request_t* sendreq,
|
||||
opal_list_prepend(&mca_pml_ob1.send_pending, item);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
mca_pml_ob1_enable_progress(1);
|
||||
}
|
||||
|
||||
static inline mca_pml_ob1_send_request_t*
|
||||
|
@ -22,7 +22,7 @@ static void* pml_ucx_generic_datatype_start_pack(void *context, const void *buff
|
||||
|
||||
convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs);
|
||||
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
convertor->datatype = datatype;
|
||||
opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor,
|
||||
&datatype->super, count, buffer, 0,
|
||||
@ -38,7 +38,7 @@ static void* pml_ucx_generic_datatype_start_unpack(void *context, void *buffer,
|
||||
|
||||
convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs);
|
||||
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
convertor->datatype = datatype;
|
||||
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor,
|
||||
&datatype->super, count, buffer, 0,
|
||||
@ -95,7 +95,7 @@ static void pml_ucx_generic_datatype_finish(void *state)
|
||||
mca_pml_ucx_convertor_t *convertor = state;
|
||||
|
||||
opal_convertor_cleanup(&convertor->opal_conv);
|
||||
OBJ_RELEASE(convertor->datatype);
|
||||
OMPI_DATATYPE_RELEASE(convertor->datatype);
|
||||
PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.convs, &convertor->super);
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ static mca_pml_yalla_convertor_t *mca_pml_yalla_get_send_convertor(void *buf, si
|
||||
mca_pml_yalla_convertor_t *convertor = (mca_pml_yalla_convertor_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.convs);
|
||||
|
||||
convertor->datatype = datatype;
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor,
|
||||
&datatype->super, count, buf, 0,
|
||||
&convertor->convertor);
|
||||
@ -33,7 +33,7 @@ static mca_pml_yalla_convertor_t *mca_pml_yalla_get_recv_convertor(void *buf, si
|
||||
mca_pml_yalla_convertor_t *convertor = (mca_pml_yalla_convertor_t *)PML_YALLA_FREELIST_GET(&ompi_pml_yalla.convs);
|
||||
|
||||
convertor->datatype = datatype;
|
||||
OBJ_RETAIN(datatype);
|
||||
OMPI_DATATYPE_RETAIN(datatype);
|
||||
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor,
|
||||
&datatype->super, count, buf, 0,
|
||||
&convertor->convertor);
|
||||
|
@ -53,7 +53,7 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_convertor_t);
|
||||
static inline void mca_pml_yalla_convertor_free(mca_pml_yalla_convertor_t *convertor)
|
||||
{
|
||||
opal_convertor_cleanup(&convertor->convertor);
|
||||
OBJ_RELEASE(convertor->datatype);
|
||||
OMPI_DATATYPE_RELEASE(convertor->datatype);
|
||||
PML_YALLA_FREELIST_RETURN(&ompi_pml_yalla.convs, &convertor->super);
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ enum {
|
||||
OPAL_ERR_FATAL = (OPAL_ERR_BASE - 6),
|
||||
OPAL_ERR_NOT_IMPLEMENTED = (OPAL_ERR_BASE - 7),
|
||||
OPAL_ERR_NOT_SUPPORTED = (OPAL_ERR_BASE - 8),
|
||||
OPAL_ERR_INTERRUPTED = (OPAL_ERR_BASE - 9),
|
||||
OPAL_ERR_INTERRUPTED = (OPAL_ERR_BASE - 9),
|
||||
OPAL_ERR_WOULD_BLOCK = (OPAL_ERR_BASE - 10),
|
||||
OPAL_ERR_IN_ERRNO = (OPAL_ERR_BASE - 11),
|
||||
OPAL_ERR_UNREACH = (OPAL_ERR_BASE - 12),
|
||||
|
@ -1635,7 +1635,7 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) {
|
||||
return 1;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&common_cuda_htod_lock);
|
||||
return 0;
|
||||
return OPAL_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user