Optimize sm communication. Pass message type (MCA_BTL_SM_FRAG_ACK/
MCA_BTL_SM_FRAG_SEND) and status success/fail in low bits of pointers we are passing through circular buffer. The rank that receives ACK doesn't need to look into data it received and this is a big win since this data is not in the cache of the rank's CPU. (Note that we can use low bits of pointers because free_list always return pointers aligned at least to cache line size). This commit was SVN r13922.
Этот коммит содержится в:
родитель
90fb58de4f
Коммит
8078ae5977
@ -895,9 +895,8 @@ int mca_btl_sm_send(
|
||||
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor;
|
||||
int rc;
|
||||
|
||||
frag->hdr->u.s.len = frag->segment.seg_len;
|
||||
frag->hdr->u.s.tag = tag;
|
||||
frag->hdr->type = MCA_BTL_SM_FRAG_SEND;
|
||||
frag->hdr->len = frag->segment.seg_len;
|
||||
frag->hdr->tag = tag;
|
||||
|
||||
/*
|
||||
* post the descriptor in the queue - post with the relative
|
||||
|
@ -231,7 +231,6 @@ CLEANUP:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SM component initialization
|
||||
*/
|
||||
@ -392,28 +391,32 @@ int mca_btl_sm_component_progress(void)
|
||||
}
|
||||
|
||||
/* dispatch fragment by type */
|
||||
switch(hdr->type) {
|
||||
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
|
||||
case MCA_BTL_SM_FRAG_ACK:
|
||||
{
|
||||
frag = hdr->frag;
|
||||
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
|
||||
frag = (mca_btl_sm_frag_t *)((uintptr_t)hdr &
|
||||
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
|
||||
MCA_BTL_SM_FRAG_STATUS_MASK)));
|
||||
/* completion callback */
|
||||
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint, &frag->base, hdr->u.rc);
|
||||
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint,
|
||||
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
|
||||
break;
|
||||
}
|
||||
case MCA_BTL_SM_FRAG_SEND:
|
||||
{
|
||||
/* recv upcall */
|
||||
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->u.s.tag;
|
||||
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->tag;
|
||||
|
||||
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
|
||||
frag->segment.seg_addr.pval = ((char*)hdr) +
|
||||
sizeof(mca_btl_sm_hdr_t);
|
||||
frag->segment.seg_len = hdr->u.s.len;
|
||||
reg->cbfunc(&mca_btl_sm[0].super,hdr->u.s.tag,&frag->base,reg->cbdata);
|
||||
frag->segment.seg_len = hdr->len;
|
||||
reg->cbfunc(&mca_btl_sm[0].super,hdr->tag,&frag->base,reg->cbdata);
|
||||
MCA_BTL_SM_FRAG_RETURN(frag);
|
||||
hdr->type = MCA_BTL_SM_FRAG_ACK;
|
||||
hdr->u.rc = OMPI_SUCCESS;
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc );
|
||||
MCA_BTL_SM_FIFO_WRITE(
|
||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr->frag, rc);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto err;
|
||||
break;
|
||||
@ -421,10 +424,11 @@ int mca_btl_sm_component_progress(void)
|
||||
default:
|
||||
{
|
||||
/* unknown */
|
||||
hdr->u.rc = OMPI_ERROR;
|
||||
hdr->type = MCA_BTL_SM_FRAG_ACK;
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc );
|
||||
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
|
||||
MCA_BTL_SM_FRAG_STATUS_MASK);
|
||||
MCA_BTL_SM_FIFO_WRITE(
|
||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto err;
|
||||
break;
|
||||
@ -474,34 +478,37 @@ int mca_btl_sm_component_progress(void)
|
||||
opal_atomic_unlock(&(fifo->tail_lock));
|
||||
}
|
||||
|
||||
/* change the address from address relative to the shared
|
||||
* memory address, to a true virtual address */
|
||||
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
|
||||
mca_btl_sm_component.sm_offset[peer_smp_rank]);
|
||||
|
||||
/* dispatch fragment by type */
|
||||
switch(hdr->type) {
|
||||
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
|
||||
case MCA_BTL_SM_FRAG_ACK:
|
||||
{
|
||||
frag = hdr->frag;
|
||||
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
|
||||
frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr &
|
||||
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
|
||||
MCA_BTL_SM_FRAG_STATUS_MASK))));
|
||||
/* completion callback */
|
||||
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint, &frag->base, hdr->u.rc);
|
||||
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint,
|
||||
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
|
||||
break;
|
||||
}
|
||||
case MCA_BTL_SM_FRAG_SEND:
|
||||
{
|
||||
mca_btl_sm_recv_reg_t* reg;
|
||||
/* change the address from address relative to the shared
|
||||
* memory address, to a true virtual address */
|
||||
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
|
||||
mca_btl_sm_component.sm_offset[peer_smp_rank]);
|
||||
/* recv upcall */
|
||||
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[1].sm_reg + hdr->u.s.tag;
|
||||
reg = mca_btl_sm[1].sm_reg + hdr->tag;
|
||||
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
|
||||
frag->segment.seg_addr.pval = ((char*)hdr) +
|
||||
sizeof(mca_btl_sm_hdr_t);
|
||||
frag->segment.seg_len = hdr->u.s.len;
|
||||
reg->cbfunc(&mca_btl_sm[1].super,hdr->u.s.tag,&frag->base,reg->cbdata);
|
||||
frag->segment.seg_len = hdr->len;
|
||||
reg->cbfunc(&mca_btl_sm[1].super,hdr->tag,&frag->base,reg->cbdata);
|
||||
MCA_BTL_SM_FRAG_RETURN(frag);
|
||||
hdr->type = MCA_BTL_SM_FRAG_ACK;
|
||||
hdr->u.rc = OMPI_SUCCESS;
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc );
|
||||
MCA_BTL_SM_FIFO_WRITE(
|
||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr->frag, rc);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto err;
|
||||
break;
|
||||
@ -509,10 +516,11 @@ int mca_btl_sm_component_progress(void)
|
||||
default:
|
||||
{
|
||||
/* unknown */
|
||||
hdr->u.rc = OMPI_ERROR;
|
||||
hdr->type = MCA_BTL_SM_FRAG_ACK;
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc );
|
||||
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
|
||||
MCA_BTL_SM_FRAG_STATUS_MASK);
|
||||
MCA_BTL_SM_FIFO_WRITE(
|
||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, hdr, rc);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto err;
|
||||
break;
|
||||
|
@ -21,10 +21,13 @@
|
||||
|
||||
static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag)
|
||||
{
|
||||
frag->hdr = frag->base.super.ptr;
|
||||
if(frag->hdr != NULL)
|
||||
frag->hdr->frag = frag;
|
||||
frag->segment.seg_addr.pval = ((char*)frag->hdr) + sizeof(mca_btl_sm_hdr_t);
|
||||
frag->hdr = (mca_btl_sm_hdr_t*)frag->base.super.ptr;
|
||||
if(frag->hdr != NULL) {
|
||||
frag->hdr->frag = (mca_btl_sm_frag_t*)((uintptr_t)frag |
|
||||
MCA_BTL_SM_FRAG_ACK);
|
||||
frag->segment.seg_addr.pval = ((char*)frag->hdr) +
|
||||
sizeof(mca_btl_sm_hdr_t);
|
||||
}
|
||||
frag->segment.seg_len = frag->size;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
|
@ -27,29 +27,20 @@
|
||||
#include "btl_sm.h"
|
||||
|
||||
|
||||
/*typedef enum {
|
||||
MCA_BTL_SM_FRAG_SEND,
|
||||
MCA_BTL_SM_FRAG_PUT,
|
||||
MCA_BTL_SM_FRAG_GET,
|
||||
MCA_BTL_SM_FRAG_ACK
|
||||
} mca_btl_sm_frag_type_t; */
|
||||
#define MCA_BTL_SM_FRAG_TYPE_MASK ((uintptr_t)0x3)
|
||||
#define MCA_BTL_SM_FRAG_SEND ((uintptr_t)0x0)
|
||||
#define MCA_BTL_SM_FRAG_ACK ((uintptr_t)0x1)
|
||||
#define MCA_BTL_SM_FRAG_PUT ((uintptr_t)0x2)
|
||||
#define MCA_BTL_SM_FRAG_GET ((uintptr_t)0x3)
|
||||
|
||||
#define MCA_BTL_SM_FRAG_SEND 0
|
||||
#define MCA_BTL_SM_FRAG_ACK 1
|
||||
#define MCA_BTL_SM_FRAG_STATUS_MASK ((uintptr_t)0x4)
|
||||
|
||||
typedef uint8_t mca_btl_sm_frag_type_t;
|
||||
struct mca_btl_sm_frag_t;
|
||||
|
||||
struct mca_btl_sm_hdr_t {
|
||||
struct mca_btl_sm_frag_t *frag;
|
||||
union {
|
||||
struct {
|
||||
size_t len;
|
||||
mca_btl_base_tag_t tag;
|
||||
} s;
|
||||
int rc;
|
||||
} u;
|
||||
mca_btl_sm_frag_type_t type;
|
||||
size_t len;
|
||||
mca_btl_base_tag_t tag;
|
||||
};
|
||||
typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t;
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user