1
1

A more consistent version. As we now share the send and receive queue, we

have to construct/destruct only once. Therefore, the construction will
happens before digging for a PML, while the destruction just before
finalizing the component.

Add some OPAL_LIKELY/OPAL_UNLIKELY.

This commit was SVN r15347.
Этот коммит содержится в:
George Bosilca 2007-07-10 23:45:23 +00:00
родитель 433f8a7694
Коммит e19777e910
15 изменённых файлов: 752 добавлений и 848 удалений

Просмотреть файл

@ -25,32 +25,41 @@
#include "opal/mca/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_request.h"
#include "opal/runtime/opal_progress.h"
int mca_pml_base_close(void)
{
/* turn off the progress code for the pml */
opal_progress_unregister(mca_pml.pml_progress);
/* turn off the progress code for the pml */
opal_progress_unregister(mca_pml.pml_progress);
/* Blatently ignore the return code (what would we do to recover,
anyway? This module is going away, so errors don't matter
anymore) */
/* Blatently ignore the return code (what would we do to recover,
anyway? This module is going away, so errors don't matter
anymore) */
mca_pml.pml_progress = mca_pml_base_progress;
if (NULL != mca_pml_base_selected_component.pmlm_finalize) {
mca_pml_base_selected_component.pmlm_finalize();
}
/**
* Destruct the send and receive queues. The ompi_free_list_t destructor
* will return the memory to the mpool, so this has to be done before the
* mpool get released by the PML close function.
*/
OBJ_DESTRUCT(&mca_pml_base_send_requests);
OBJ_DESTRUCT(&mca_pml_base_recv_requests);
OBJ_DESTRUCT(&mca_pml_base_pml);
mca_pml.pml_progress = mca_pml_base_progress;
if (NULL != mca_pml_base_selected_component.pmlm_finalize) {
mca_pml_base_selected_component.pmlm_finalize();
}
/* Close all remaining available modules (may be one if this is a
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
OBJ_DESTRUCT(&mca_pml_base_pml);
mca_base_components_close(mca_pml_base_output,
&mca_pml_base_components_available, NULL);
/* Close all remaining available modules (may be one if this is a
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
/* All done */
mca_base_components_close(mca_pml_base_output,
&mca_pml_base_components_available, NULL);
return OMPI_SUCCESS;
/* All done */
return OMPI_SUCCESS;
}

Просмотреть файл

@ -29,6 +29,7 @@
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_request.h"
/*
* The following file was created by configure. It contains extern
@ -129,7 +130,6 @@ int mca_pml_base_open(void)
ompi_pointer_array_add(&mca_pml_base_pml,
stringify(MCA_pml_DIRECT_CALL_COMPONENT));
#else
mca_base_param_reg_string_name("pml", NULL,
"Specify a specific PML to use",
@ -159,6 +159,16 @@ int mca_pml_base_open(void)
#endif
/**
* Construct the send and receive request queues. There are 2 reasons to do it
* here. First, as they are globals it's better to construct them in one common
* place. Second, in order to be able to allow the external debuggers to show
* their content, they should get constructed as soon as possible once the MPI
* process is started.
*/
OBJ_CONSTRUCT(&mca_pml_base_send_requests, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_base_recv_requests, ompi_free_list_t);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -56,6 +56,24 @@ mca_pml_cm_enable(bool enable)
{
/* BWB - FIX ME - need to have this actually do something,
maybe? */
ompi_free_list_init(&mca_pml_base_send_requests,
MAX(sizeof(mca_pml_cm_thin_send_request_t),
sizeof(mca_pml_cm_hvy_send_request_t)) + ompi_mtl->mtl_request_size,
OBJ_CLASS(mca_pml_cm_hvy_send_request_t),
ompi_pml_cm.free_list_num,
ompi_pml_cm.free_list_max,
ompi_pml_cm.free_list_inc,
NULL);
ompi_free_list_init(&mca_pml_base_send_requests,
MAX(sizeof(mca_pml_cm_thin_recv_request_t),
sizeof(mca_pml_cm_hvy_recv_request_t)) + ompi_mtl->mtl_request_size,
OBJ_CLASS(mca_pml_cm_hvy_recv_request_t),
ompi_pml_cm.free_list_num,
ompi_pml_cm.free_list_max,
ompi_pml_cm.free_list_inc,
NULL);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -34,6 +34,10 @@ struct mca_mtl_request_t;
struct ompi_pml_cm_t {
mca_pml_base_module_t super;
int free_list_num;
int free_list_max;
int free_list_inc;
int default_priority;
};
typedef struct ompi_pml_cm_t ompi_pml_cm_t;
extern ompi_pml_cm_t ompi_pml_cm;

Просмотреть файл

@ -64,11 +64,6 @@ mca_pml_base_component_1_0_0_t mca_pml_cm_component = {
mca_pml_cm_component_fini /* component finalize */
};
static int free_list_num = 0;
static int free_list_max = 0;
static int free_list_inc = 0;
static int default_priority = 30;
static int
mca_pml_cm_component_open(void)
{
@ -83,7 +78,7 @@ mca_pml_cm_component_open(void)
false,
false,
4,
&free_list_num);
&ompi_pml_cm.free_list_num);
mca_base_param_reg_int(&mca_pml_cm_component.pmlm_version,
"free_list_max",
@ -91,7 +86,7 @@ mca_pml_cm_component_open(void)
false,
false,
-1,
&free_list_max);
&ompi_pml_cm.free_list_max);
mca_base_param_reg_int(&mca_pml_cm_component.pmlm_version,
"free_list_inc",
@ -99,7 +94,7 @@ mca_pml_cm_component_open(void)
false,
false,
64,
&free_list_inc);
&ompi_pml_cm.free_list_inc);
mca_base_param_reg_int(&mca_pml_cm_component.pmlm_version,
"priority",
@ -107,7 +102,7 @@ mca_pml_cm_component_open(void)
false,
false,
30,
&default_priority);
&ompi_pml_cm.default_priority);
return OMPI_SUCCESS;
}
@ -126,11 +121,12 @@ mca_pml_cm_component_init(int* priority,
bool enable_mpi_threads)
{
int ret;
if((*priority) > default_priority) {
*priority = default_priority;
if((*priority) > ompi_pml_cm.default_priority) {
*priority = ompi_pml_cm.default_priority;
return NULL;
}
*priority = default_priority;
*priority = ompi_pml_cm.default_priority;
opal_output_verbose( 10, 0,
"in cm pml priority is %d\n", *priority);
/* find a useable MTL */
@ -152,26 +148,6 @@ mca_pml_cm_component_init(int* priority,
ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid;
ompi_pml_cm.super.pml_max_tag = ompi_mtl->mtl_max_tag;
OBJ_CONSTRUCT(&mca_pml_base_send_requests, ompi_free_list_t);
ompi_free_list_init(&mca_pml_base_send_requests,
MAX(sizeof(mca_pml_cm_thin_send_request_t),
sizeof(mca_pml_cm_hvy_send_request_t)) + ompi_mtl->mtl_request_size,
OBJ_CLASS(mca_pml_cm_hvy_send_request_t),
free_list_num,
free_list_max,
free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_base_recv_requests, ompi_free_list_t);
ompi_free_list_init(&mca_pml_base_send_requests,
MAX(sizeof(mca_pml_cm_thin_recv_request_t),
sizeof(mca_pml_cm_hvy_recv_request_t)) + ompi_mtl->mtl_request_size,
OBJ_CLASS(mca_pml_cm_hvy_recv_request_t),
free_list_num,
free_list_max,
free_list_inc,
NULL);
/* initialize buffered send code */
if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) {
opal_output(0, "mca_pml_cm_component_init: mca_pml_bsend_init failed\n");

Просмотреть файл

@ -62,13 +62,56 @@ mca_pml_dr_t mca_pml_dr = {
}
};
void mca_pml_dr_error_handler(
struct mca_btl_base_module_t* btl,
int32_t flags);
void mca_pml_dr_error_handler( struct mca_btl_base_module_t* btl,
int32_t flags );
int mca_pml_dr_enable(bool enable)
{
if( false == enable ) return OMPI_SUCCESS;
/* requests */
ompi_free_list_init( &mca_pml_base_send_requests,
sizeof(mca_pml_dr_send_request_t),
OBJ_CLASS(mca_pml_dr_send_request_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL );
ompi_free_list_init( &mca_pml_base_recv_requests,
sizeof(mca_pml_dr_recv_request_t),
OBJ_CLASS(mca_pml_dr_recv_request_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL );
/* fragments */
OBJ_CONSTRUCT(&mca_pml_dr.recv_frags, ompi_free_list_t);
ompi_free_list_init( &mca_pml_dr.recv_frags,
sizeof(mca_pml_dr_recv_frag_t),
OBJ_CLASS(mca_pml_dr_recv_frag_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_dr.vfrags, ompi_free_list_t);
ompi_free_list_init( &mca_pml_dr.vfrags,
sizeof(mca_pml_dr_vfrag_t),
OBJ_CLASS(mca_pml_dr_vfrag_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_dr.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.send_active, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.acks_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.buffers, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.endpoints, ompi_pointer_array_t);
OBJ_CONSTRUCT(&mca_pml_dr.lock, opal_mutex_t);
mca_pml_dr.enabled = true;
return OMPI_SUCCESS;
}
@ -234,7 +277,16 @@ int mca_pml_dr_del_procs(ompi_proc_t** procs, size_t nprocs)
int mca_pml_dr_component_fini(void)
{
/* FIX */
if(!mca_pml_dr.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
mca_pml_dr.enabled = false; /* not anymore */
OBJ_DESTRUCT(&mca_pml_dr.send_pending);
OBJ_DESTRUCT(&mca_pml_dr.send_active);
OBJ_DESTRUCT(&mca_pml_dr.acks_pending);
OBJ_DESTRUCT(&mca_pml_dr.recv_frags);
OBJ_DESTRUCT(&mca_pml_dr.buffers);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -112,59 +112,9 @@ int mca_pml_dr_component_open(void)
/* default is to csum all data */
mca_pml_dr.enable_csum =
mca_pml_dr_param_register_int("enable_csum", 1);
/* requests */
OBJ_CONSTRUCT(&mca_pml_base_send_requests, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_base_send_requests,
sizeof(mca_pml_dr_send_request_t),
OBJ_CLASS(mca_pml_dr_send_request_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_base_recv_requests, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_base_recv_requests,
sizeof(mca_pml_dr_recv_request_t),
OBJ_CLASS(mca_pml_dr_recv_request_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL);
/* fragments */
OBJ_CONSTRUCT(&mca_pml_dr.recv_frags, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_dr.recv_frags,
sizeof(mca_pml_dr_recv_frag_t),
OBJ_CLASS(mca_pml_dr_recv_frag_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_dr.vfrags, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_dr.vfrags,
sizeof(mca_pml_dr_vfrag_t),
OBJ_CLASS(mca_pml_dr_vfrag_t),
mca_pml_dr.free_list_num,
mca_pml_dr.free_list_max,
mca_pml_dr.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_dr.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.send_active, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.acks_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.buffers, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_dr.endpoints, ompi_pointer_array_t);
OBJ_CONSTRUCT(&mca_pml_dr.lock, opal_mutex_t);
mca_pml_dr.enabled = false;
return mca_bml_base_open();
return mca_bml_base_open();
}
@ -172,19 +122,9 @@ int mca_pml_dr_component_close(void)
{
int rc;
if(!mca_pml_dr.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
if(OMPI_SUCCESS != (rc = mca_bml_base_close()))
return rc;
OBJ_DESTRUCT(&mca_pml_dr.send_pending);
OBJ_DESTRUCT(&mca_pml_dr.send_active);
OBJ_DESTRUCT(&mca_pml_dr.acks_pending);
OBJ_DESTRUCT(&mca_pml_base_recv_requests);
OBJ_DESTRUCT(&mca_pml_base_send_requests);
OBJ_DESTRUCT(&mca_pml_dr.recv_frags);
OBJ_DESTRUCT(&mca_pml_dr.buffers);
return OMPI_SUCCESS;
}
@ -207,8 +147,7 @@ mca_pml_base_module_t* mca_pml_dr_component_init(int* priority,
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
enable_mpi_threads
)) {
enable_mpi_threads )) {
return NULL;
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -46,36 +46,106 @@
mca_pml_ob1_t mca_pml_ob1 = {
{
mca_pml_ob1_add_procs,
mca_pml_ob1_del_procs,
mca_pml_ob1_enable,
mca_pml_ob1_progress,
mca_pml_ob1_add_comm,
mca_pml_ob1_del_comm,
mca_pml_ob1_irecv_init,
mca_pml_ob1_irecv,
mca_pml_ob1_recv,
mca_pml_ob1_isend_init,
mca_pml_ob1_isend,
mca_pml_ob1_send,
mca_pml_ob1_iprobe,
mca_pml_ob1_probe,
mca_pml_ob1_start,
mca_pml_ob1_dump,
mca_pml_ob1_ft_event,
32768,
INT_MAX
mca_pml_ob1_add_procs,
mca_pml_ob1_del_procs,
mca_pml_ob1_enable,
mca_pml_ob1_progress,
mca_pml_ob1_add_comm,
mca_pml_ob1_del_comm,
mca_pml_ob1_irecv_init,
mca_pml_ob1_irecv,
mca_pml_ob1_recv,
mca_pml_ob1_isend_init,
mca_pml_ob1_isend,
mca_pml_ob1_send,
mca_pml_ob1_iprobe,
mca_pml_ob1_probe,
mca_pml_ob1_start,
mca_pml_ob1_dump,
mca_pml_ob1_ft_event,
32768,
INT_MAX
}
};
void mca_pml_ob1_error_handler(
struct mca_btl_base_module_t* btl,
int32_t flags);
void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl,
int32_t flags );
int mca_pml_ob1_enable(bool enable)
{
if( false == enable ) return OMPI_SUCCESS;
OBJ_CONSTRUCT(&mca_pml_ob1.lock, opal_mutex_t);
/* fragments */
OBJ_CONSTRUCT(&mca_pml_ob1.rdma_frags, ompi_free_list_t);
ompi_free_list_init( &mca_pml_ob1.rdma_frags,
sizeof(mca_pml_ob1_rdma_frag_t),
OBJ_CLASS(mca_pml_ob1_rdma_frag_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_ob1.recv_frags, ompi_free_list_t);
ompi_free_list_init( &mca_pml_ob1.recv_frags,
sizeof(mca_pml_ob1_recv_frag_t) + mca_pml_ob1.unexpected_limit,
OBJ_CLASS(mca_pml_ob1_recv_frag_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_ob1.pending_pckts, ompi_free_list_t);
ompi_free_list_init( &mca_pml_ob1.pending_pckts,
sizeof(mca_pml_ob1_pckt_pending_t),
OBJ_CLASS(mca_pml_ob1_pckt_pending_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_ob1.buffers, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.send_ranges, ompi_free_list_t);
ompi_free_list_init( &mca_pml_ob1.send_ranges,
sizeof(mca_pml_ob1_send_range_t) +
(mca_pml_ob1.max_send_per_range - 1) * sizeof(mca_pml_ob1_com_btl_t),
OBJ_CLASS(mca_pml_ob1_send_range_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
/* pending operations */
OBJ_CONSTRUCT(&mca_pml_ob1.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.recv_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.pckt_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.rdma_pending, opal_list_t);
/**
* If we get here this is the PML who get selected for the run. We
* should get ownership for the send and receive requests list, and
* initialize them with the size of our own requests.
*/
ompi_free_list_init( &mca_pml_base_send_requests,
sizeof(mca_pml_ob1_send_request_t),
OBJ_CLASS(mca_pml_ob1_send_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
ompi_free_list_init( &mca_pml_base_recv_requests,
sizeof(mca_pml_ob1_recv_request_t),
OBJ_CLASS(mca_pml_ob1_recv_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
mca_pml_ob1.enabled = true;
return OMPI_SUCCESS;
}
@ -192,24 +262,6 @@ int mca_pml_ob1_del_procs(ompi_proc_t** procs, size_t nprocs)
return mca_bml.bml_del_procs(nprocs, procs);
}
int mca_pml_ob1_component_fini(void)
{
int rc;
/* Shutdown BML */
if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
return rc;
/* Shutdown buffered send */
if(OMPI_SUCCESS != (rc = mca_pml_base_bsend_fini())) {
return rc;
}
/* FIX */
return OMPI_SUCCESS;
}
/*
* diagnostics
*/
@ -237,11 +289,10 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose)
return OMPI_SUCCESS;
}
static void mca_pml_ob1_fin_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status )
{
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
@ -252,13 +303,11 @@ static void mca_pml_ob1_fin_completion(
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
}
int mca_pml_ob1_send_fin(
ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl,
void *hdr_des,
uint8_t order,
uint32_t status
)
int mca_pml_ob1_send_fin( ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl,
void *hdr_des,
uint8_t order,
uint32_t status )
{
mca_btl_base_descriptor_t* fin;
mca_pml_ob1_fin_hdr_t* hdr;

Просмотреть файл

@ -37,9 +37,8 @@
#include "ompi/proc/proc.h"
#include "ompi/mca/allocator/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/**
* OB1 PML module
*/
@ -84,25 +83,6 @@ typedef struct mca_pml_ob1_t mca_pml_ob1_t;
extern mca_pml_ob1_t mca_pml_ob1;
/*
* PML module functions.
*/
extern int mca_pml_ob1_component_open(void);
extern int mca_pml_ob1_component_close(void);
extern mca_pml_base_module_t* mca_pml_ob1_component_init(
int *priority,
bool enable_progress_threads,
bool enable_mpi_threads
);
extern int mca_pml_ob1_component_fini(void);
/*
* PML interface functions.
*/
@ -125,106 +105,80 @@ extern int mca_pml_ob1_del_procs(
size_t nprocs
);
extern int mca_pml_ob1_enable(
bool enable
);
extern int mca_pml_ob1_enable( bool enable );
extern int mca_pml_ob1_progress(void);
extern int mca_pml_ob1_iprobe(
int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
ompi_status_public_t* status
);
extern int mca_pml_ob1_iprobe( int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
ompi_status_public_t* status );
extern int mca_pml_ob1_probe(
int dst,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status
);
extern int mca_pml_ob1_probe( int dst,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
extern int mca_pml_ob1_isend_init(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_isend_init( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_ob1_isend(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_isend( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_ob1_send(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm
);
extern int mca_pml_ob1_send( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm );
extern int mca_pml_ob1_irecv_init(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_irecv_init( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_ob1_irecv(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_irecv( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_ob1_recv(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status
);
extern int mca_pml_ob1_recv( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
extern int mca_pml_ob1_dump(
struct ompi_communicator_t* comm,
int verbose
);
extern int mca_pml_ob1_dump( struct ompi_communicator_t* comm,
int verbose );
extern int mca_pml_ob1_start(
size_t count,
ompi_request_t** requests
);
extern int mca_pml_ob1_start( size_t count,
ompi_request_t** requests );
extern int mca_pml_ob1_ft_event(
int state
);
extern int mca_pml_ob1_ft_event( int state );
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
END_C_DECLS
#define MCA_PML_OB1_DES_ALLOC(bml_btl, des, order, size) \
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, des, order, \
@ -300,6 +254,7 @@ void mca_pml_ob1_process_pending_rdma(void);
if(opal_list_get_size(&mca_pml_ob1.rdma_pending)) \
mca_pml_ob1_process_pending_rdma(); \
} while (0)
/*
* Compute the total number of bytes on supplied descriptor
*/
@ -325,28 +280,28 @@ int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2);
/* Calculate what percentage of a message to send through each BTL according to
* relative weight */
static inline void mca_pml_ob1_calc_weighted_length(
mca_pml_ob1_com_btl_t *btls, int num_btls, size_t size,
double weight_total)
static inline void
mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, size_t size,
double weight_total )
{
int i;
size_t length_left = size;
size_t length_left;
/* shortcut for common case for only one BTL */
if(num_btls == 1) {
if( OPAL_LIKELY(1 == num_btls) ) {
btls[0].length = size;
return;
}
/* sort BTLs according of their weights so BTLs with smaller weight will
* not hijack all of the traffic */
qsort(btls, num_btls, sizeof(mca_pml_ob1_com_btl_t),
mca_pml_ob1_com_btl_comp);
qsort( btls, num_btls, sizeof(mca_pml_ob1_com_btl_t),
mca_pml_ob1_com_btl_comp );
for(i = 0; i < num_btls; i++) {
for(length_left = size, i = 0; i < num_btls; i++) {
mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
size_t length = 0;
if(length_left != 0) {
if( OPAL_UNLIKELY(0 != length_left) ) {
length = (length_left > bml_btl->btl_eager_limit)?
((size_t)(size * (bml_btl->btl_weight / weight_total))) :
length_left;

Просмотреть файл

@ -35,12 +35,17 @@
#include "pml_ob1_component.h"
#include "ompi/mca/allocator/base/base.h"
OBJ_CLASS_INSTANCE(
mca_pml_ob1_pckt_pending_t,
ompi_free_list_item_t,
NULL,
NULL
);
OBJ_CLASS_INSTANCE( mca_pml_ob1_pckt_pending_t,
ompi_free_list_item_t,
NULL,
NULL );
static int mca_pml_ob1_component_open(void);
static int mca_pml_ob1_component_close(void);
static mca_pml_base_module_t*
mca_pml_ob1_component_init( int* priority, bool enable_progress_threads,
bool enable_mpi_threads );
static int mca_pml_ob1_component_fini(void);
mca_pml_base_component_1_0_0_t mca_pml_ob1_component = {
@ -90,11 +95,10 @@ static inline int mca_pml_ob1_param_register_int(
return param_value;
}
int mca_pml_ob1_component_open(void)
static int mca_pml_ob1_component_open(void)
{
mca_allocator_base_component_t* allocator_component;
mca_allocator_base_component_t* allocator_component;
mca_pml_ob1.free_list_num =
mca_pml_ob1_param_register_int("free_list_num", 4);
mca_pml_ob1.free_list_max =
@ -126,75 +130,19 @@ int mca_pml_ob1_component_open(void)
"bucket",
&mca_pml_ob1.allocator_name);
allocator_component = mca_allocator_component_lookup( mca_pml_ob1.allocator_name );
if(NULL == allocator_component) {
opal_output(0, "mca_pml_ob1_component_open: can't find allocator: %s\n", mca_pml_ob1.allocator_name);
return OMPI_ERROR;
}
mca_pml_ob1.allocator = allocator_component->allocator_init(true,
mca_pml_ob1_seg_alloc, mca_pml_ob1_seg_free, NULL);
mca_pml_ob1_seg_alloc,
mca_pml_ob1_seg_free, NULL);
if(NULL == mca_pml_ob1.allocator) {
opal_output(0, "mca_pml_ob1_component_open: unable to initialize allocator\n");
return OMPI_ERROR;
}
OBJ_CONSTRUCT(&mca_pml_ob1.lock, opal_mutex_t);
/* fragments */
OBJ_CONSTRUCT(&mca_pml_ob1.rdma_frags, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_ob1.rdma_frags,
sizeof(mca_pml_ob1_rdma_frag_t),
OBJ_CLASS(mca_pml_ob1_rdma_frag_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_ob1.recv_frags, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_ob1.recv_frags,
sizeof(mca_pml_ob1_recv_frag_t) + mca_pml_ob1.unexpected_limit,
OBJ_CLASS(mca_pml_ob1_recv_frag_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_ob1.pending_pckts, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_ob1.pending_pckts,
sizeof(mca_pml_ob1_pckt_pending_t),
OBJ_CLASS(mca_pml_ob1_pckt_pending_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
OBJ_CONSTRUCT(&mca_pml_ob1.buffers, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.send_ranges, ompi_free_list_t);
ompi_free_list_init(
&mca_pml_ob1.send_ranges,
sizeof(mca_pml_ob1_send_range_t) +
(mca_pml_ob1.max_send_per_range - 1) * sizeof(mca_pml_ob1_com_btl_t),
OBJ_CLASS(mca_pml_ob1_send_range_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
/* pending operations */
OBJ_CONSTRUCT(&mca_pml_ob1.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.recv_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.pckt_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.rdma_pending, opal_list_t);
mca_pml_ob1.leave_pinned = ompi_mpi_leave_pinned;
mca_pml_ob1.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
@ -204,55 +152,21 @@ int mca_pml_ob1_component_open(void)
}
int mca_pml_ob1_component_close(void)
static int mca_pml_ob1_component_close(void)
{
int rc;
if(!mca_pml_ob1.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
if(OMPI_SUCCESS != (rc = mca_bml_base_close()))
return rc;
OBJ_DESTRUCT(&mca_pml_ob1.rdma_pending);
OBJ_DESTRUCT(&mca_pml_ob1.pckt_pending);
OBJ_DESTRUCT(&mca_pml_ob1.recv_pending);
OBJ_DESTRUCT(&mca_pml_ob1.send_pending);
OBJ_DESTRUCT(&mca_pml_ob1.buffers);
OBJ_DESTRUCT(&mca_pml_ob1.pending_pckts);
OBJ_DESTRUCT(&mca_pml_ob1.recv_frags);
OBJ_DESTRUCT(&mca_pml_ob1.rdma_frags);
OBJ_DESTRUCT(&mca_pml_ob1.lock);
/* destroy the global free lists */
OBJ_DESTRUCT(&mca_pml_base_send_requests);
OBJ_DESTRUCT(&mca_pml_base_recv_requests);
if(OMPI_SUCCESS != (rc = mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator))) {
return rc;
}
#if 0
if (mca_pml_ob1.send_requests.fl_num_allocated !=
mca_pml_ob1.send_requests.super.opal_list_length) {
opal_output(0, "ob1 send requests: %d allocated %d returned\n",
mca_pml_ob1.send_requests.fl_num_allocated,
mca_pml_ob1.send_requests.super.opal_list_length);
}
if (mca_pml_ob1.recv_requests.fl_num_allocated !=
mca_pml_ob1.recv_requests.super.opal_list_length) {
opal_output(0, "ob1 recv requests: %d allocated %d returned\n",
mca_pml_ob1.recv_requests.fl_num_allocated,
mca_pml_ob1.recv_requests.super.opal_list_length);
}
#endif
return OMPI_SUCCESS;
}
mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
bool enable_progress_threads,
bool enable_mpi_threads)
static mca_pml_base_module_t*
mca_pml_ob1_component_init( int* priority,
bool enable_progress_threads,
bool enable_mpi_threads )
{
opal_output_verbose( 10, 0,
"in ob1, my priority is %d\n", mca_pml_ob1.priority);
@ -282,32 +196,59 @@ mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
*/
mca_pml_ob1.super.pml_progress = mca_bml.bml_progress;
/**
* If we get here this is the PML who get selected for the run. We
* should get ownership for the send and receive requests list, and
* initialize them with the size of our own requests.
*/
OBJ_CONSTRUCT(&mca_pml_base_send_requests, ompi_free_list_t);
ompi_free_list_init( &mca_pml_base_send_requests,
sizeof(mca_pml_ob1_send_request_t),
OBJ_CLASS(mca_pml_ob1_send_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_base_recv_requests, ompi_free_list_t);
ompi_free_list_init( &mca_pml_base_recv_requests,
sizeof(mca_pml_ob1_recv_request_t),
OBJ_CLASS(mca_pml_ob1_recv_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL );
return &mca_pml_ob1.super;
}
int mca_pml_ob1_component_fini(void)
{
int rc;
/* Shutdown BML */
if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
return rc;
/* Shutdown buffered send */
if(OMPI_SUCCESS != (rc = mca_pml_base_bsend_fini())) {
return rc;
}
if(!mca_pml_ob1.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
mca_pml_ob1.enabled = false; /* not anymore */
OBJ_DESTRUCT(&mca_pml_ob1.rdma_pending);
OBJ_DESTRUCT(&mca_pml_ob1.pckt_pending);
OBJ_DESTRUCT(&mca_pml_ob1.recv_pending);
OBJ_DESTRUCT(&mca_pml_ob1.send_pending);
OBJ_DESTRUCT(&mca_pml_ob1.buffers);
OBJ_DESTRUCT(&mca_pml_ob1.pending_pckts);
OBJ_DESTRUCT(&mca_pml_ob1.recv_frags);
OBJ_DESTRUCT(&mca_pml_ob1.rdma_frags);
OBJ_DESTRUCT(&mca_pml_ob1.lock);
if(OMPI_SUCCESS != (rc = mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator))) {
return rc;
}
#if 0
if (mca_pml_base_send_requests.fl_num_allocated !=
mca_pml_base_send_requests.super.opal_list_length) {
opal_output(0, "ob1 send requests: %d allocated %d returned\n",
mca_pml_base_send_requests.fl_num_allocated,
mca_pml_base_send_requests.super.opal_list_length);
}
if (mca_pml_base_recv_requests.fl_num_allocated !=
mca_pml_base_recv_requests.super.opal_list_length) {
opal_output(0, "ob1 recv requests: %d allocated %d returned\n",
mca_pml_base_recv_requests.fl_num_allocated,
mca_pml_base_recv_requests.super.opal_list_length);
}
#endif
return OMPI_SUCCESS;
}
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration) {

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -42,11 +42,11 @@ int mca_pml_ob1_isend_init(void *buf,
return rc;
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, true);
buf,
count,
datatype,
dst, tag,
comm, sendmode, true);
*request = (ompi_request_t *) sendreq;
return OMPI_SUCCESS;
@ -69,11 +69,11 @@ int mca_pml_ob1_isend(void *buf,
return rc;
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc);
*request = (ompi_request_t *) sendreq;

Просмотреть файл

@ -37,19 +37,15 @@
#include "ompi/datatype/dt_arch.h"
#include "ompi/peruse/peruse-internal.h"
OBJ_CLASS_INSTANCE(
mca_pml_ob1_buffer_t,
ompi_free_list_item_t,
NULL,
NULL
);
OBJ_CLASS_INSTANCE( mca_pml_ob1_buffer_t,
ompi_free_list_item_t,
NULL,
NULL );
OBJ_CLASS_INSTANCE(
mca_pml_ob1_recv_frag_t,
opal_list_item_t,
NULL,
NULL
);
OBJ_CLASS_INSTANCE( mca_pml_ob1_recv_frag_t,
opal_list_item_t,
NULL,
NULL );
/**
* Static functions.
@ -74,15 +70,15 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
* Callback from BTL on receive.
*/
void mca_pml_ob1_recv_frag_callback(
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata)
void mca_pml_ob1_recv_frag_callback( mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata )
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
if(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) {
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
return;
}
@ -234,7 +230,7 @@ void mca_pml_ob1_recv_frag_callback(
((opal_list_item_t *)generic_recv)->opal_list_next) { \
/* Check for a match */ \
recv_tag = generic_recv->req_recv.req_base.req_tag; \
if ( frag_tag == recv_tag ) { \
if( OPAL_UNLIKELY(frag_tag == recv_tag) ) { \
break; \
} \
} \
@ -267,12 +263,12 @@ void mca_pml_ob1_recv_frag_callback(
* set by the upper level routine.
*/
#define MCA_PML_OB1_CHECK_WILD_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local parameters */ \
opal_list_t* wild_receives = &comm->wild_receives; \
MCA_PML_OB1_MATCH_GENERIC_RECEIVES(hdr,wild_receives,proc,return_match); \
} while(0)
#define MCA_PML_OB1_CHECK_WILD_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local parameters */ \
opal_list_t* wild_receives = &comm->wild_receives; \
MCA_PML_OB1_MATCH_GENERIC_RECEIVES(hdr,wild_receives,proc,return_match); \
} while(0)
/**
@ -289,12 +285,12 @@ do { \
* This routine assumes that the appropriate matching locks are
* set by the upper level routine.
*/
#define MCA_PML_OB1_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local variables */ \
opal_list_t* specific_receives = &proc->specific_receives; \
MCA_PML_OB1_MATCH_GENERIC_RECEIVES(hdr,specific_receives,proc,return_match); \
} while(0)
#define MCA_PML_OB1_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local variables */ \
opal_list_t* specific_receives = &proc->specific_receives; \
MCA_PML_OB1_MATCH_GENERIC_RECEIVES(hdr,specific_receives,proc,return_match); \
} while(0)
/**
* Try and match the incoming message fragment to the list of
@ -313,137 +309,120 @@ do { \
* set by the upper level routine.
*/
#define MCA_PML_OB1_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH( \
hdr,comm,proc,return_match) \
do { \
/* local variables */ \
mca_pml_ob1_recv_request_t *specific_recv, *wild_recv; \
mca_pml_sequence_t wild_recv_seq, specific_recv_seq; \
int frag_tag, wild_recv_tag, specific_recv_tag; \
\
/* initialization */ \
frag_tag=hdr->hdr_tag; \
\
/* \
* We know that when this is called, both specific and wild irecvs \
* have been posted. \
*/ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
opal_list_get_first(&(proc)->specific_receives); \
wild_recv = (mca_pml_ob1_recv_request_t *) \
opal_list_get_first(&comm->wild_receives); \
\
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
while (true) { \
if (wild_recv_seq < specific_recv_seq) { \
/* \
* wild recv is earlier than the specific one. \
*/ \
/* \
* try and match \
*/ \
wild_recv_tag = wild_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == wild_recv_tag) || \
( (wild_recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \
/* \
* Match made \
*/ \
return_match=wild_recv; \
\
/* remove this recv from the wild receive queue */ \
opal_list_remove_item(&comm->wild_receives, \
(opal_list_item_t *)wild_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(wild_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
/* \
* No match, go to the next. \
*/ \
wild_recv=(mca_pml_ob1_recv_request_t *) \
((opal_list_item_t *)wild_recv)->opal_list_next; \
\
/* \
* If that was the last wild one, just look at the \
* rest of the specific ones. \
*/ \
if (wild_recv == (mca_pml_ob1_recv_request_t *) \
opal_list_get_end(&comm->wild_receives) ) \
{ \
MCA_PML_OB1_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
\
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
} else { \
/* \
* specific recv is earlier than the wild one. \
*/ \
specific_recv_tag=specific_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == specific_recv_tag) || \
( (specific_recv_tag == OMPI_ANY_TAG) && (0<=frag_tag)) ) \
{ \
/* \
* Match made \
*/ \
return_match = specific_recv; \
/* remove descriptor from specific receive list */ \
opal_list_remove_item(&(proc)->specific_receives, \
(opal_list_item_t *)specific_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(specific_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
/* \
* No match, go on to the next specific irecv. \
*/ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
((opal_list_item_t *)specific_recv)->opal_list_next; \
\
/* \
* If that was the last specific irecv, process the \
* rest of the wild ones. \
*/ \
if (specific_recv == (mca_pml_ob1_recv_request_t *) \
opal_list_get_end(&(proc)->specific_receives)) \
{ \
MCA_PML_OB1_CHECK_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
} \
} \
} while(0)
#define MCA_PML_OB1_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH( hdr,comm,proc,return_match) \
do { \
/* local variables */ \
mca_pml_ob1_recv_request_t *specific_recv, *wild_recv; \
mca_pml_sequence_t wild_recv_seq, specific_recv_seq; \
int frag_tag, wild_recv_tag, specific_recv_tag; \
\
/* initialization */ \
frag_tag=hdr->hdr_tag; \
\
/* \
* We know that when this is called, both specific and wild irecvs \
* have been posted. \
*/ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
opal_list_get_first(&(proc)->specific_receives); \
wild_recv = (mca_pml_ob1_recv_request_t *) \
opal_list_get_first(&comm->wild_receives); \
\
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
while (true) { \
if (wild_recv_seq < specific_recv_seq) { \
/* wild recv is earlier than the specific one. */ \
/* try and match */ \
wild_recv_tag = wild_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == wild_recv_tag) || \
( (wild_recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \
/* Match made */ \
return_match=wild_recv; \
\
/* remove this recv from the wild receive queue */ \
opal_list_remove_item(&comm->wild_receives, \
(opal_list_item_t *)wild_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(wild_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
/* No match, go to the next */ \
wild_recv=(mca_pml_ob1_recv_request_t *) \
((opal_list_item_t *)wild_recv)->opal_list_next; \
\
/* \
* If that was the last wild one, just look at the \
* rest of the specific ones. \
*/ \
if (wild_recv == (mca_pml_ob1_recv_request_t *) \
opal_list_get_end(&comm->wild_receives) ) \
{ \
MCA_PML_OB1_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
\
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
} else { \
/* specific recv is earlier than the wild one. */ \
specific_recv_tag=specific_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == specific_recv_tag) || \
( (specific_recv_tag == OMPI_ANY_TAG) && (0<=frag_tag)) ) \
{ \
/* Match made */ \
return_match = specific_recv; \
/* remove descriptor from specific receive list */ \
opal_list_remove_item(&(proc)->specific_receives, \
(opal_list_item_t *)specific_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(specific_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
/* No match, go on to the next specific irecv. */ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
((opal_list_item_t *)specific_recv)->opal_list_next; \
\
/* \
* If that was the last specific irecv, process the \
* rest of the wild ones. \
*/ \
if (specific_recv == (mca_pml_ob1_recv_request_t *) \
opal_list_get_end(&(proc)->specific_receives)) \
{ \
MCA_PML_OB1_CHECK_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
} \
} \
} while(0)
/*
* Specialized matching routines for internal use only.
*/
static bool mca_pml_ob1_check_cantmatch_for_match(
opal_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc);
static bool mca_pml_ob1_check_cantmatch_for_match( opal_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc );
/**
* RCS/CTS receive side matching
@ -518,7 +497,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
/* get sequence number of next message that can be processed */
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
if (frag_msg_seq == next_msg_seq_expected) {
if( OPAL_LIKELY(frag_msg_seq == next_msg_seq_expected) ) {
/*
* This is the sequence number we were expecting,
@ -563,7 +542,7 @@ rematch:
}
/* if match found, process data */
if (match) {
if( OPAL_LIKELY(match) ) {
match->req_recv.req_base.req_proc = proc->ompi_proc;
/*
@ -583,7 +562,7 @@ rematch:
/* if no match found, place on unexpected queue */
mca_pml_ob1_recv_frag_t* frag;
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
/**
* As we return from the match function, we should generate the expected event.
@ -610,7 +589,7 @@ rematch:
* any fragments on the c_c_frags_cant_match list
* may now be used to form new matchs
*/
if (0 < opal_list_get_size(&proc->frags_cant_match)) {
if( OPAL_UNLIKELY(0 < opal_list_get_size(&proc->frags_cant_match)) ) {
additional_match = mca_pml_ob1_check_cantmatch_for_match(&additional_matches,comm,proc);
}
@ -622,7 +601,7 @@ rematch:
*/
mca_pml_ob1_recv_frag_t* frag;
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
return rc;
}
@ -633,13 +612,11 @@ rematch:
/* release matching lock before processing fragment */
OPAL_THREAD_UNLOCK(&comm->matching_lock);
if(match != NULL) {
if( OPAL_LIKELY(match != NULL) ) {
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
#if OMPI_WANT_PERUSE
} else {
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
#endif /* OMPI_WANT_PERUSE */
}
if( OPAL_UNLIKELY(additional_match) ) {
opal_list_item_t* item;
@ -669,10 +646,9 @@ rematch:
* set by the upper level routine.
*/
static bool mca_pml_ob1_check_cantmatch_for_match(
opal_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc)
static bool mca_pml_ob1_check_cantmatch_for_match( opal_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc )
{
/* local parameters */
int match_found;
@ -698,11 +674,11 @@ static bool mca_pml_ob1_check_cantmatch_for_match(
* number next_msg_seq_expected
*/
for(frag = (mca_pml_ob1_recv_frag_t *)
opal_list_get_first(&proc->frags_cant_match);
opal_list_get_first(&proc->frags_cant_match);
frag != (mca_pml_ob1_recv_frag_t *)
opal_list_get_end(&proc->frags_cant_match);
opal_list_get_end(&proc->frags_cant_match);
frag = (mca_pml_ob1_recv_frag_t *)
opal_list_get_next(frag))
opal_list_get_next(frag))
{
/*
* If the message has the next expected seq from that proc...
@ -750,7 +726,7 @@ rematch:
}
/* if match found, process data */
if (match) {
if( OPAL_LIKELY(match) ) {
match->req_recv.req_base.req_proc = proc->ompi_proc;
/*

Просмотреть файл

@ -44,7 +44,7 @@ void mca_pml_ob1_recv_request_process_pending(void)
recvreq = (mca_pml_ob1_recv_request_t*)
opal_list_remove_first(&mca_pml_ob1.recv_pending);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
if(NULL == recvreq)
if( OPAL_UNLIKELY(NULL == recvreq) )
break;
recvreq->req_pending = false;
if(mca_pml_ob1_recv_request_schedule_exclusive(recvreq, NULL) ==
@ -157,7 +157,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
size_t bytes_received = 0;
if(status == OMPI_SUCCESS) {
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_dst, des->des_dst_cnt,
0, bytes_received );
}
@ -189,7 +189,7 @@ int mca_pml_ob1_recv_request_ack_send_btl(
/* allocate descriptor */
MCA_PML_OB1_DES_ALLOC(bml_btl, des, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_ack_hdr_t));
if(NULL == des) {
if( OPAL_UNLIKELY(NULL == des) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -220,7 +220,7 @@ int mca_pml_ob1_recv_request_ack_send_btl(
des->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(rc != OMPI_SUCCESS) {
if( OPAL_UNLIKELY(rc != OMPI_SUCCESS) ) {
mca_bml_base_free(bml_btl, des);
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -297,18 +297,17 @@ static int mca_pml_ob1_recv_request_ack(
* Return resources used by the RDMA
*/
static void mca_pml_ob1_rget_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status )
{
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)frag->rdma_req;
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
ORTE_ERROR_LOG(status);
orte_errmgr.abort();
@ -373,7 +372,8 @@ int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag )
frag->rdma_length, PERUSE_RECV);
/* queue up get request */
if(OMPI_SUCCESS != (rc = mca_bml_base_get(bml_btl,descriptor))) {
rc = mca_bml_base_get(bml_btl,descriptor);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
mca_bml_base_free(bml_btl, descriptor);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
@ -425,7 +425,7 @@ static void mca_pml_ob1_recv_request_rget(
size += frag->rdma_segs[i].seg_len;
}
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
if(NULL == frag->rdma_bml) {
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
orte_errmgr.abort();
}
@ -445,11 +445,10 @@ static void mca_pml_ob1_recv_request_rget(
* received and actually delivered to the application.
*/
void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments)
void mca_pml_ob1_recv_request_progress( mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_received = 0;
size_t bytes_delivered = 0;
@ -464,14 +463,13 @@ void mca_pml_ob1_recv_request_progress(
bytes_received -= sizeof(mca_pml_ob1_match_hdr_t);
recvreq->req_recv.req_bytes_packed = bytes_received;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_match_hdr_t),
data_offset,
bytes_received,
bytes_delivered);
MCA_PML_OB1_RECV_REQUEST_UNPACK( recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_match_hdr_t),
data_offset,
bytes_received,
bytes_delivered);
break;
case MCA_PML_OB1_HDR_TYPE_RNDV:
@ -505,14 +503,13 @@ void mca_pml_ob1_recv_request_progress(
* unpack.
*/
if( 0 < bytes_received ) {
MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_rendezvous_hdr_t),
data_offset,
bytes_received,
bytes_delivered);
MCA_PML_OB1_RECV_REQUEST_UNPACK( recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_rendezvous_hdr_t),
data_offset,
bytes_received,
bytes_delivered );
}
break;
@ -526,14 +523,13 @@ void mca_pml_ob1_recv_request_progress(
case MCA_PML_OB1_HDR_TYPE_FRAG:
bytes_received -= sizeof(mca_pml_ob1_frag_hdr_t);
data_offset = hdr->hdr_frag.hdr_frag_offset;
MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_frag_hdr_t),
data_offset,
bytes_received,
bytes_delivered);
MCA_PML_OB1_RECV_REQUEST_UNPACK( recvreq,
segments,
num_segments,
sizeof(mca_pml_ob1_frag_hdr_t),
data_offset,
bytes_received,
bytes_delivered );
break;
default:
@ -554,11 +550,10 @@ void mca_pml_ob1_recv_request_progress(
* Handle completion of a probe request
*/
void mca_pml_ob1_recv_request_matched_probe(
mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments)
void mca_pml_ob1_recv_request_matched_probe( mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments )
{
size_t bytes_packed = 0;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
@ -600,9 +595,9 @@ int mca_pml_ob1_recv_request_schedule_exclusive(
int num_tries = recvreq->req_rdma_cnt;
size_t i;
size_t bytes_remaining = recvreq->req_send_offset -
recvreq->req_rdma_offset;
recvreq->req_rdma_offset;
if(bytes_remaining == 0) {
if( OPAL_UNLIKELY(0 == bytes_remaining)) {
OPAL_THREAD_ADD32(&recvreq->req_lock, -recvreq->req_lock);
return OMPI_SUCCESS;
}
@ -613,7 +608,7 @@ int mca_pml_ob1_recv_request_schedule_exclusive(
if(recvreq->req_rdma[i].bml_btl != start_bml_btl)
continue;
/* something left to be send? */
if(recvreq->req_rdma[i].length)
if( OPAL_LIKELY(recvreq->req_rdma[i].length) )
recvreq->req_rdma_idx = i;
break;
}
@ -688,7 +683,7 @@ int mca_pml_ob1_recv_request_schedule_exclusive(
}
MCA_PML_OB1_DES_ALLOC(bml_btl, ctl, MCA_BTL_NO_ORDER, hdr_size);
if(ctl == NULL) {
if( OPAL_UNLIKELY(NULL == ctl) ) {
mca_bml_base_free(bml_btl,dst);
continue;
}
@ -734,7 +729,7 @@ int mca_pml_ob1_recv_request_schedule_exclusive(
/* send rdma request to peer */
rc = mca_bml_base_send(bml_btl, ctl, MCA_BTL_TAG_PML);
if(rc == OMPI_SUCCESS) {
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
/* update request state */
recvreq->req_rdma_offset += size;
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,1);

Просмотреть файл

@ -45,11 +45,12 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
mca_pml_ob1_send_pending_t pending_type;
mca_pml_ob1_send_request_t* sendreq;
mca_bml_base_btl_t *send_dst;
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
sendreq = (mca_pml_ob1_send_request_t*)
opal_list_remove_first(&mca_pml_ob1.send_pending);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
if(NULL == sendreq)
if( OPAL_UNLIKELY(NULL == sendreq) )
break;
pending_type = sendreq->req_pending;
sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_NONE;
@ -70,7 +71,7 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
* endpoint or no resources put request back on the list */
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_START;
if(NULL == send_dst) {
if( OPAL_UNLIKELY(NULL == send_dst) ) {
opal_list_append(&mca_pml_ob1.send_pending,
(opal_list_item_t*)sendreq);
} else {
@ -137,22 +138,21 @@ static void mca_pml_ob1_send_request_construct(mca_pml_ob1_send_request_t* req)
OBJ_CONSTRUCT(&req->req_send_ranges, opal_list_t);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_send_request_t,
mca_pml_base_send_request_t,
mca_pml_ob1_send_request_construct,
NULL);
OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t,
mca_pml_base_send_request_t,
mca_pml_ob1_send_request_construct,
NULL );
/**
* Completion of a short message - nothing left to schedule. Note that this
* function is only called for 0 sized messages.
*/
void mca_pml_ob1_match_completion_cache(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
static void
mca_pml_ob1_match_completion_cache( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status )
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
@ -163,7 +163,7 @@ void mca_pml_ob1_match_completion_cache(
}
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
orte_errmgr.abort();
@ -183,11 +183,11 @@ void mca_pml_ob1_match_completion_cache(
* Completion of a short message - nothing left to schedule.
*/
void mca_pml_ob1_match_completion_free(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
static void
mca_pml_ob1_match_completion_free( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status )
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
@ -198,7 +198,7 @@ void mca_pml_ob1_match_completion_free(
}
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
orte_errmgr.abort();
@ -218,11 +218,11 @@ void mca_pml_ob1_match_completion_free(
* Completion of the first fragment of a long message that
* requires an acknowledgement
*/
static void mca_pml_ob1_rndv_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
static void
mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status )
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)descriptor->des_context;
@ -234,7 +234,7 @@ static void mca_pml_ob1_rndv_completion(
}
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
orte_errmgr.abort();
@ -284,11 +284,11 @@ static void mca_pml_ob1_rndv_completion(
* Completion of a get request.
*/
static void mca_pml_ob1_rget_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
static void
mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status )
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
@ -312,11 +312,11 @@ static void mca_pml_ob1_rget_completion(
* Completion of a control message - return resources.
*/
static void mca_pml_ob1_send_ctl_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
static void
mca_pml_ob1_send_ctl_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status )
{
/* return the descriptor */
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
@ -331,18 +331,18 @@ static void mca_pml_ob1_send_ctl_completion(
* to schedule additional fragments.
*/
static void mca_pml_ob1_frag_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
static void
mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status )
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
size_t req_bytes_delivered = 0;
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
orte_errmgr.abort();
@ -370,8 +370,6 @@ static void mca_pml_ob1_frag_completion(
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
}
/**
* Buffer the entire message and mark as complete.
*/
@ -392,7 +390,7 @@ int mca_pml_ob1_send_request_start_buffered(
/* allocate descriptor */
mca_bml_base_alloc(bml_btl, &descriptor,
MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size);
if(NULL == descriptor) {
if( OPAL_UNLIKELY(NULL == descriptor) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = descriptor->des_src;
@ -445,7 +443,7 @@ int mca_pml_ob1_send_request_start_buffered(
/* buffer the remainder of the message */
rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, descriptor);
return rc;
}
@ -473,7 +471,7 @@ int mca_pml_ob1_send_request_start_buffered(
/* send */
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, descriptor );
}
return rc;
@ -643,9 +641,9 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq,
*/
int mca_pml_ob1_send_request_start_rdma(
mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size)
mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size)
{
/*
* When req_rdma array is constructed the firs element of the array always
@ -676,114 +674,114 @@ int mca_pml_ob1_send_request_start_rdma(
0,
&size,
&src );
if( OPAL_UNLIKELY(NULL == src) ) {
ompi_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
&old_position);
return OMPI_ERR_OUT_OF_RESOURCE;
}
src->des_cbfunc = mca_pml_ob1_rget_completion;
src->des_cbdata = sendreq;
if( OPAL_UNLIKELY(NULL == src) ) {
ompi_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
&old_position);
return OMPI_ERR_OUT_OF_RESOURCE;
}
src->des_cbfunc = mca_pml_ob1_rget_completion;
src->des_cbdata = sendreq;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rget_hdr_t) + (sizeof(mca_btl_base_segment_t)*(src->des_src_cnt-1)));
if(NULL == des) {
ompi_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
&old_position);
mca_bml_base_free(bml_btl, src);
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rget_hdr_t) + (sizeof(mca_btl_base_segment_t)*(src->des_src_cnt-1)));
if( OPAL_UNLIKELY(NULL == des) ) {
ompi_convertor_set_position(&sendreq->req_send.req_base.req_convertor,
&old_position);
mca_bml_base_free(bml_btl, src);
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
/* build match header */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rget.hdr_des.pval = src;
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
/* build match header */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rget.hdr_des.pval = src;
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#ifdef WORDS_BIGENDIAN
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
#else
/* if we are little endian and the remote side is big endian,
we're responsible for making sure the data is in network byte
order */
/* RDMA is currently disabled by bml if arch doesn't
match, so this shouldn't be needed. here to make sure
we remember if we ever change the bml. */
assert(0 == (sendreq->req_send.req_base.req_proc->proc_arch &
OMPI_ARCH_ISBIGENDIAN));
/* if we are little endian and the remote side is big endian,
we're responsible for making sure the data is in network byte
order */
/* RDMA is currently disabled by bml if arch doesn't
match, so this shouldn't be needed. here to make sure
we remember if we ever change the bml. */
assert(0 == (sendreq->req_send.req_base.req_proc->proc_arch &
OMPI_ARCH_ISBIGENDIAN));
#endif
#endif
for( i = 0; i < src->des_src_cnt; i++ ) {
hdr->hdr_rget.hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(src->des_src[i].seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].seg_len = src->des_src[i].seg_len;
hdr->hdr_rget.hdr_segs[i].seg_key.key64 = src->des_src[i].seg_key.key64;
}
for( i = 0; i < src->des_src_cnt; i++ ) {
hdr->hdr_rget.hdr_segs[i].seg_addr.lval = ompi_ptr_ptol(src->des_src[i].seg_addr.pval);
hdr->hdr_rget.hdr_segs[i].seg_len = src->des_src[i].seg_len;
hdr->hdr_rget.hdr_segs[i].seg_key.key64 = src->des_src[i].seg_key.key64;
}
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
/**
* Well, it's a get so we will not know when the peer get the data anyway.
* If we generate the PERUSE event here, at least we will know when do we
* sent the GET message ...
*/
if( sendreq->req_send.req_bytes_packed > 0 ) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
/**
* Well, it's a get so we will not know when the peer get the data anyway.
* If we generate the PERUSE event here, at least we will know when do we
* sent the GET message ...
*/
if( sendreq->req_send.req_bytes_packed > 0 ) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
} else {
} else {
/* allocate a rendezvous header - dont eager send any data
* receiver will schedule rdma put(s) of the entire message
*/
/* allocate a rendezvous header - dont eager send any data
* receiver will schedule rdma put(s) of the entire message
*/
mca_bml_base_alloc(bml_btl, &des,
MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t));
if(NULL == des) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
mca_bml_base_alloc(bml_btl, &des,
MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t));
if( OPAL_UNLIKELY(NULL == des)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#ifdef WORDS_BIGENDIAN
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
#else
/* if we are little endian and the remote side is big endian,
we're responsible for making sure the data is in network byte
order */
if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) {
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
MCA_PML_OB1_RNDV_HDR_HTON(hdr->hdr_rndv);
}
/* if we are little endian and the remote side is big endian,
we're responsible for making sure the data is in network byte
order */
if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) {
hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO;
MCA_PML_OB1_RNDV_HDR_HTON(hdr->hdr_rndv);
}
#endif
#endif
/* update lengths with number of bytes actually packed */
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t);
/* update lengths with number of bytes actually packed */
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t);
/* first fragment of a long message */
des->des_cbfunc = mca_pml_ob1_rndv_completion;
/* first fragment of a long message */
des->des_cbfunc = mca_pml_ob1_rndv_completion;
}
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
@ -791,7 +789,7 @@ int mca_pml_ob1_send_request_start_rdma(
/* send */
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, des);
}
return rc;
@ -803,11 +801,10 @@ int mca_pml_ob1_send_request_start_rdma(
* the btls eager limit.
*/
int mca_pml_ob1_send_request_start_rndv(
mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size,
int flags)
int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size,
int flags )
{
mca_btl_base_descriptor_t* des;
mca_btl_base_segment_t* segment;
@ -830,7 +827,7 @@ int mca_pml_ob1_send_request_start_rndv(
&des );
}
if( NULL == des ) {
if( OPAL_UNLIKELY(NULL == des) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = des->des_src;
@ -867,14 +864,15 @@ int mca_pml_ob1_send_request_start_rndv(
/* send */
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, des );
}
return rc;
}
void mca_pml_ob1_send_request_copy_in_out(mca_pml_ob1_send_request_t *sendreq,
uint64_t send_offset, uint64_t send_length)
void mca_pml_ob1_send_request_copy_in_out( mca_pml_ob1_send_request_t *sendreq,
uint64_t send_offset,
uint64_t send_length )
{
mca_pml_ob1_send_range_t *sr;
ompi_free_list_item_t *i;
@ -883,7 +881,7 @@ void mca_pml_ob1_send_request_copy_in_out(mca_pml_ob1_send_request_t *sendreq,
int rc = OMPI_SUCCESS, n;
double weight_total = 0;
if(0 == send_length)
if( OPAL_UNLIKELY(0 == send_length) )
return;
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.send_ranges, i, rc);
@ -934,7 +932,7 @@ int mca_pml_ob1_send_request_schedule_exclusive(
opal_list_item_t *item;
mca_bml_base_btl_t* bml_btl;
if(NULL == range || 0 == range->range_send_length) {
if( OPAL_UNLIKELY(NULL == range || 0 == range->range_send_length) ) {
OPAL_THREAD_LOCK(&sendreq->req_send_range_lock);
if(range) {
opal_list_remove_first(&sendreq->req_send_ranges);
@ -964,7 +962,7 @@ int mca_pml_ob1_send_request_schedule_exclusive(
prev_bytes_remaining = range->range_send_length;
if (num_fail == range->range_btl_cnt) {
if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) {
assert(sendreq->req_pending == MCA_PML_OB1_SEND_PENDING_NONE);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_SCHEDULE;
@ -1037,7 +1035,7 @@ int mca_pml_ob1_send_request_schedule_exclusive(
/* initiate send - note that this may complete before the call returns */
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(rc == OMPI_SUCCESS) {
if( OPAL_LIKELY(rc == OMPI_SUCCESS) ) {
/* update state */
range->range_btls[btl_idx].length -= size;
range->range_send_length -= size;
@ -1071,7 +1069,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
/* check completion status */
if(OMPI_SUCCESS != status) {
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
/* TSW - FIX */
ORTE_ERROR_LOG(status);
orte_errmgr.abort();
@ -1120,7 +1118,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
&frag->rdma_length,
&des );
if(NULL == des) {
if( OPAL_UNLIKELY(NULL == des) ) {
if(frag->retries < mca_pml_ob1.rdma_put_retries_limit) {
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
frag->rdma_length = save_size;
@ -1153,7 +1151,8 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
&(((mca_pml_ob1_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND );
if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) {
rc = mca_bml_base_put(bml_btl, des);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_bml_base_free(bml_btl, des);
frag->rdma_length = save_size;
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
@ -1193,7 +1192,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag, rc);
if(NULL == frag) {
if( OPAL_UNLIKELY(NULL == frag) ) {
/* TSW - FIX */
ORTE_ERROR_LOG(rc);
orte_errmgr.abort();

Просмотреть файл

@ -30,9 +30,7 @@
#include "ompi/datatype/dt_arch.h"
#include "ompi/mca/bml/bml.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
typedef enum {
MCA_PML_OB1_SEND_PENDING_NONE,
@ -197,7 +195,7 @@ do {
MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq ); \
} \
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
} while (0)
} while (0)
/**
* Schedule additional fragments
@ -229,7 +227,7 @@ static inline void mca_pml_ob1_send_request_schedule(
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
OMPI_FREE_LIST_RETURN( &mca_pml_base_send_requests, \
(ompi_free_list_item_t*)sendreq); \
}
}
/**
* Start the specified request
@ -261,15 +259,19 @@ int mca_pml_ob1_send_request_start_rndv(
size_t size,
int flags);
static inline int mca_pml_ob1_send_request_start_btl(
mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl)
static inline int
mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl )
{
size_t size = sendreq->req_send.req_bytes_packed;
size_t eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
size_t eager_limit = bml_btl->btl_eager_limit;
int rc;
if(size <= eager_limit) {
if( eager_limit > mca_pml_ob1.eager_limit )
eager_limit = mca_pml_ob1.eager_limit;
eager_limit -= sizeof(mca_pml_ob1_hdr_t);
if( OPAL_LIKELY(size <= eager_limit) ) {
switch(sendreq->req_send.req_send_mode) {
case MCA_PML_BASE_SEND_SYNCHRONOUS:
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0);
@ -293,23 +295,23 @@ static inline int mca_pml_ob1_send_request_start_btl(
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size);
} else if
(ompi_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
(ompi_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
unsigned char *base;
ompi_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_ob1_rdma_btls(
sendreq->req_endpoint,
base,
sendreq->req_send.req_bytes_packed,
sendreq->req_rdma))) {
sendreq->req_endpoint,
base,
sendreq->req_send.req_bytes_packed,
sendreq->req_rdma))) {
rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl,
sendreq->req_send.req_bytes_packed);
if(OMPI_SUCCESS != rc) {
sendreq->req_send.req_bytes_packed);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_pml_ob1_free_rdma_resources(sendreq);
}
} else {
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size,
MCA_PML_OB1_HDR_FLAGS_CONTIG);
MCA_PML_OB1_HDR_FLAGS_CONTIG);
}
} else {
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0);
@ -319,15 +321,15 @@ static inline int mca_pml_ob1_send_request_start_btl(
return rc;
}
static inline int mca_pml_ob1_send_request_start(
mca_pml_ob1_send_request_t* sendreq)
static inline int
mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
{
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
sendreq->req_send.req_base.req_proc->proc_bml;
size_t i;
if(endpoint == NULL) {
if( OPAL_UNLIKELY(endpoint == NULL) ) {
return OMPI_ERR_UNREACH;
}
@ -349,7 +351,7 @@ static inline int mca_pml_ob1_send_request_start(
/* select a btl */
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl);
if(OMPI_ERR_OUT_OF_RESOURCE != rc)
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
return rc;
}
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
@ -360,34 +362,13 @@ static inline int mca_pml_ob1_send_request_start(
return OMPI_SUCCESS;
}
/**
* Completion callback on match header
* Cache descriptor.
*/
void mca_pml_ob1_match_completion_cache(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status);
/**
* Completion callback on match header
* Free descriptor.
*/
void mca_pml_ob1_match_completion_free(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status);
/**
* Initiate a put scheduled by the receiver.
*/
void mca_pml_ob1_send_request_put(
mca_pml_ob1_send_request_t* sendreq,
mca_btl_base_module_t* btl,
mca_pml_ob1_rdma_hdr_t* hdr);
void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
mca_btl_base_module_t* btl,
mca_pml_ob1_rdma_hdr_t* hdr );
int mca_pml_ob1_send_request_put_frag(mca_pml_ob1_rdma_frag_t* frag);