1
1

Optimize sm communication. Pass message type (MCA_BTL_SM_FRAG_ACK/

MCA_BTL_SM_FRAG_SEND) and status success/fail in low bits of pointers we
are passing through circular buffer. The rank that receives ACK doesn't need
to look into data it received and this is a big win since this data is not in
the cache of the rank's CPU. (Note that we can use low bits of pointers because
free_list always return pointers aligned at least to cache line size).

This commit was SVN r13922.
Этот коммит содержится в:
Gleb Natapov 2007-03-05 14:24:09 +00:00
родитель 90fb58de4f
Коммит 8078ae5977
4 изменённых файлов: 59 добавлений и 58 удалений

Просмотреть файл

@ -895,9 +895,8 @@ int mca_btl_sm_send(
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor; mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor;
int rc; int rc;
frag->hdr->u.s.len = frag->segment.seg_len; frag->hdr->len = frag->segment.seg_len;
frag->hdr->u.s.tag = tag; frag->hdr->tag = tag;
frag->hdr->type = MCA_BTL_SM_FRAG_SEND;
/* /*
* post the descriptor in the queue - post with the relative * post the descriptor in the queue - post with the relative

Просмотреть файл

@ -231,7 +231,6 @@ CLEANUP:
return return_value; return return_value;
} }
/* /*
* SM component initialization * SM component initialization
*/ */
@ -392,28 +391,32 @@ int mca_btl_sm_component_progress(void)
} }
/* dispatch fragment by type */ /* dispatch fragment by type */
switch(hdr->type) { switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
case MCA_BTL_SM_FRAG_ACK: case MCA_BTL_SM_FRAG_ACK:
{ {
frag = hdr->frag; int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
frag = (mca_btl_sm_frag_t *)((uintptr_t)hdr &
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
MCA_BTL_SM_FRAG_STATUS_MASK)));
/* completion callback */ /* completion callback */
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint, &frag->base, hdr->u.rc); frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint,
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
break; break;
} }
case MCA_BTL_SM_FRAG_SEND: case MCA_BTL_SM_FRAG_SEND:
{ {
/* recv upcall */ /* recv upcall */
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->u.s.tag; mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->tag;
MCA_BTL_SM_FRAG_ALLOC(frag, rc); MCA_BTL_SM_FRAG_ALLOC(frag, rc);
frag->segment.seg_addr.pval = ((char*)hdr) + frag->segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_sm_hdr_t); sizeof(mca_btl_sm_hdr_t);
frag->segment.seg_len = hdr->u.s.len; frag->segment.seg_len = hdr->len;
reg->cbfunc(&mca_btl_sm[0].super,hdr->u.s.tag,&frag->base,reg->cbdata); reg->cbfunc(&mca_btl_sm[0].super,hdr->tag,&frag->base,reg->cbdata);
MCA_BTL_SM_FRAG_RETURN(frag); MCA_BTL_SM_FRAG_RETURN(frag);
hdr->type = MCA_BTL_SM_FRAG_ACK; MCA_BTL_SM_FIFO_WRITE(
hdr->u.rc = OMPI_SUCCESS; mca_btl_sm_component.sm_peers[peer_smp_rank],
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank], my_smp_rank, peer_smp_rank, hdr->frag, rc);
my_smp_rank, peer_smp_rank, hdr, rc );
if(OMPI_SUCCESS != rc) if(OMPI_SUCCESS != rc)
goto err; goto err;
break; break;
@ -421,10 +424,11 @@ int mca_btl_sm_component_progress(void)
default: default:
{ {
/* unknown */ /* unknown */
hdr->u.rc = OMPI_ERROR; hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
hdr->type = MCA_BTL_SM_FRAG_ACK; MCA_BTL_SM_FRAG_STATUS_MASK);
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank], MCA_BTL_SM_FIFO_WRITE(
my_smp_rank, peer_smp_rank, hdr, rc ); mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc);
if(OMPI_SUCCESS != rc) if(OMPI_SUCCESS != rc)
goto err; goto err;
break; break;
@ -474,34 +478,37 @@ int mca_btl_sm_component_progress(void)
opal_atomic_unlock(&(fifo->tail_lock)); opal_atomic_unlock(&(fifo->tail_lock));
} }
/* change the address from address relative to the shared
* memory address, to a true virtual address */
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
mca_btl_sm_component.sm_offset[peer_smp_rank]);
/* dispatch fragment by type */ /* dispatch fragment by type */
switch(hdr->type) { switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
case MCA_BTL_SM_FRAG_ACK: case MCA_BTL_SM_FRAG_ACK:
{ {
frag = hdr->frag; int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr &
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
MCA_BTL_SM_FRAG_STATUS_MASK))));
/* completion callback */ /* completion callback */
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint, &frag->base, hdr->u.rc); frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint,
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
break; break;
} }
case MCA_BTL_SM_FRAG_SEND: case MCA_BTL_SM_FRAG_SEND:
{ {
mca_btl_sm_recv_reg_t* reg;
/* change the address from address relative to the shared
* memory address, to a true virtual address */
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
mca_btl_sm_component.sm_offset[peer_smp_rank]);
/* recv upcall */ /* recv upcall */
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[1].sm_reg + hdr->u.s.tag; reg = mca_btl_sm[1].sm_reg + hdr->tag;
MCA_BTL_SM_FRAG_ALLOC(frag, rc); MCA_BTL_SM_FRAG_ALLOC(frag, rc);
frag->segment.seg_addr.pval = ((char*)hdr) + frag->segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_sm_hdr_t); sizeof(mca_btl_sm_hdr_t);
frag->segment.seg_len = hdr->u.s.len; frag->segment.seg_len = hdr->len;
reg->cbfunc(&mca_btl_sm[1].super,hdr->u.s.tag,&frag->base,reg->cbdata); reg->cbfunc(&mca_btl_sm[1].super,hdr->tag,&frag->base,reg->cbdata);
MCA_BTL_SM_FRAG_RETURN(frag); MCA_BTL_SM_FRAG_RETURN(frag);
hdr->type = MCA_BTL_SM_FRAG_ACK; MCA_BTL_SM_FIFO_WRITE(
hdr->u.rc = OMPI_SUCCESS; mca_btl_sm_component.sm_peers[peer_smp_rank],
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank], my_smp_rank, peer_smp_rank, hdr->frag, rc);
my_smp_rank, peer_smp_rank, hdr, rc );
if(OMPI_SUCCESS != rc) if(OMPI_SUCCESS != rc)
goto err; goto err;
break; break;
@ -509,10 +516,11 @@ int mca_btl_sm_component_progress(void)
default: default:
{ {
/* unknown */ /* unknown */
hdr->u.rc = OMPI_ERROR; hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
hdr->type = MCA_BTL_SM_FRAG_ACK; MCA_BTL_SM_FRAG_STATUS_MASK);
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank], MCA_BTL_SM_FIFO_WRITE(
my_smp_rank, peer_smp_rank, hdr, rc ); mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc);
if(OMPI_SUCCESS != rc) if(OMPI_SUCCESS != rc)
goto err; goto err;
break; break;

Просмотреть файл

@ -21,10 +21,13 @@
static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag) static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag)
{ {
frag->hdr = frag->base.super.ptr; frag->hdr = (mca_btl_sm_hdr_t*)frag->base.super.ptr;
if(frag->hdr != NULL) if(frag->hdr != NULL) {
frag->hdr->frag = frag; frag->hdr->frag = (mca_btl_sm_frag_t*)((uintptr_t)frag |
frag->segment.seg_addr.pval = ((char*)frag->hdr) + sizeof(mca_btl_sm_hdr_t); MCA_BTL_SM_FRAG_ACK);
frag->segment.seg_addr.pval = ((char*)frag->hdr) +
sizeof(mca_btl_sm_hdr_t);
}
frag->segment.seg_len = frag->size; frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment; frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1; frag->base.des_src_cnt = 1;

Просмотреть файл

@ -27,29 +27,20 @@
#include "btl_sm.h" #include "btl_sm.h"
/*typedef enum { #define MCA_BTL_SM_FRAG_TYPE_MASK ((uintptr_t)0x3)
MCA_BTL_SM_FRAG_SEND, #define MCA_BTL_SM_FRAG_SEND ((uintptr_t)0x0)
MCA_BTL_SM_FRAG_PUT, #define MCA_BTL_SM_FRAG_ACK ((uintptr_t)0x1)
MCA_BTL_SM_FRAG_GET, #define MCA_BTL_SM_FRAG_PUT ((uintptr_t)0x2)
MCA_BTL_SM_FRAG_ACK #define MCA_BTL_SM_FRAG_GET ((uintptr_t)0x3)
} mca_btl_sm_frag_type_t; */
#define MCA_BTL_SM_FRAG_SEND 0 #define MCA_BTL_SM_FRAG_STATUS_MASK ((uintptr_t)0x4)
#define MCA_BTL_SM_FRAG_ACK 1
typedef uint8_t mca_btl_sm_frag_type_t;
struct mca_btl_sm_frag_t; struct mca_btl_sm_frag_t;
struct mca_btl_sm_hdr_t { struct mca_btl_sm_hdr_t {
struct mca_btl_sm_frag_t *frag; struct mca_btl_sm_frag_t *frag;
union { size_t len;
struct { mca_btl_base_tag_t tag;
size_t len;
mca_btl_base_tag_t tag;
} s;
int rc;
} u;
mca_btl_sm_frag_type_t type;
}; };
typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t; typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t;