1
1

Optimize sm communication. Pass message type (MCA_BTL_SM_FRAG_ACK/

MCA_BTL_SM_FRAG_SEND) and status success/fail in low bits of pointers we
are passing through circular buffer. The rank that receives ACK doesn't need
to look into data it received and this is a big win since this data is not in
the cache of the rank's CPU. (Note that we can use low bits of pointers because
free_list always return pointers aligned at least to cache line size).

This commit was SVN r13922.
Этот коммит содержится в:
Gleb Natapov 2007-03-05 14:24:09 +00:00
родитель 90fb58de4f
Коммит 8078ae5977
4 изменённых файлов: 59 добавлений и 58 удалений

Просмотреть файл

@ -895,9 +895,8 @@ int mca_btl_sm_send(
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor;
int rc;
frag->hdr->u.s.len = frag->segment.seg_len;
frag->hdr->u.s.tag = tag;
frag->hdr->type = MCA_BTL_SM_FRAG_SEND;
frag->hdr->len = frag->segment.seg_len;
frag->hdr->tag = tag;
/*
* post the descriptor in the queue - post with the relative

Просмотреть файл

@ -231,7 +231,6 @@ CLEANUP:
return return_value;
}
/*
* SM component initialization
*/
@ -392,28 +391,32 @@ int mca_btl_sm_component_progress(void)
}
/* dispatch fragment by type */
switch(hdr->type) {
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
case MCA_BTL_SM_FRAG_ACK:
{
frag = hdr->frag;
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
frag = (mca_btl_sm_frag_t *)((uintptr_t)hdr &
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
MCA_BTL_SM_FRAG_STATUS_MASK)));
/* completion callback */
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint, &frag->base, hdr->u.rc);
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint,
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
break;
}
case MCA_BTL_SM_FRAG_SEND:
{
/* recv upcall */
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->u.s.tag;
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->tag;
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
frag->segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_sm_hdr_t);
frag->segment.seg_len = hdr->u.s.len;
reg->cbfunc(&mca_btl_sm[0].super,hdr->u.s.tag,&frag->base,reg->cbdata);
frag->segment.seg_len = hdr->len;
reg->cbfunc(&mca_btl_sm[0].super,hdr->tag,&frag->base,reg->cbdata);
MCA_BTL_SM_FRAG_RETURN(frag);
hdr->type = MCA_BTL_SM_FRAG_ACK;
hdr->u.rc = OMPI_SUCCESS;
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc );
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr->frag, rc);
if(OMPI_SUCCESS != rc)
goto err;
break;
@ -421,10 +424,11 @@ int mca_btl_sm_component_progress(void)
default:
{
/* unknown */
hdr->u.rc = OMPI_ERROR;
hdr->type = MCA_BTL_SM_FRAG_ACK;
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc );
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
MCA_BTL_SM_FRAG_STATUS_MASK);
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc);
if(OMPI_SUCCESS != rc)
goto err;
break;
@ -474,34 +478,37 @@ int mca_btl_sm_component_progress(void)
opal_atomic_unlock(&(fifo->tail_lock));
}
/* change the address from address relative to the shared
* memory address, to a true virtual address */
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
mca_btl_sm_component.sm_offset[peer_smp_rank]);
/* dispatch fragment by type */
switch(hdr->type) {
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
case MCA_BTL_SM_FRAG_ACK:
{
frag = hdr->frag;
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr &
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
MCA_BTL_SM_FRAG_STATUS_MASK))));
/* completion callback */
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint, &frag->base, hdr->u.rc);
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint,
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
break;
}
case MCA_BTL_SM_FRAG_SEND:
{
mca_btl_sm_recv_reg_t* reg;
/* change the address from address relative to the shared
* memory address, to a true virtual address */
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
mca_btl_sm_component.sm_offset[peer_smp_rank]);
/* recv upcall */
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[1].sm_reg + hdr->u.s.tag;
reg = mca_btl_sm[1].sm_reg + hdr->tag;
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
frag->segment.seg_addr.pval = ((char*)hdr) +
sizeof(mca_btl_sm_hdr_t);
frag->segment.seg_len = hdr->u.s.len;
reg->cbfunc(&mca_btl_sm[1].super,hdr->u.s.tag,&frag->base,reg->cbdata);
frag->segment.seg_len = hdr->len;
reg->cbfunc(&mca_btl_sm[1].super,hdr->tag,&frag->base,reg->cbdata);
MCA_BTL_SM_FRAG_RETURN(frag);
hdr->type = MCA_BTL_SM_FRAG_ACK;
hdr->u.rc = OMPI_SUCCESS;
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc );
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr->frag, rc);
if(OMPI_SUCCESS != rc)
goto err;
break;
@ -509,10 +516,11 @@ int mca_btl_sm_component_progress(void)
default:
{
/* unknown */
hdr->u.rc = OMPI_ERROR;
hdr->type = MCA_BTL_SM_FRAG_ACK;
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc );
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
MCA_BTL_SM_FRAG_STATUS_MASK);
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr, rc);
if(OMPI_SUCCESS != rc)
goto err;
break;

Просмотреть файл

@ -21,10 +21,13 @@
static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag)
{
frag->hdr = frag->base.super.ptr;
if(frag->hdr != NULL)
frag->hdr->frag = frag;
frag->segment.seg_addr.pval = ((char*)frag->hdr) + sizeof(mca_btl_sm_hdr_t);
frag->hdr = (mca_btl_sm_hdr_t*)frag->base.super.ptr;
if(frag->hdr != NULL) {
frag->hdr->frag = (mca_btl_sm_frag_t*)((uintptr_t)frag |
MCA_BTL_SM_FRAG_ACK);
frag->segment.seg_addr.pval = ((char*)frag->hdr) +
sizeof(mca_btl_sm_hdr_t);
}
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;

Просмотреть файл

@ -27,29 +27,20 @@
#include "btl_sm.h"
/*typedef enum {
MCA_BTL_SM_FRAG_SEND,
MCA_BTL_SM_FRAG_PUT,
MCA_BTL_SM_FRAG_GET,
MCA_BTL_SM_FRAG_ACK
} mca_btl_sm_frag_type_t; */
#define MCA_BTL_SM_FRAG_TYPE_MASK ((uintptr_t)0x3)
#define MCA_BTL_SM_FRAG_SEND ((uintptr_t)0x0)
#define MCA_BTL_SM_FRAG_ACK ((uintptr_t)0x1)
#define MCA_BTL_SM_FRAG_PUT ((uintptr_t)0x2)
#define MCA_BTL_SM_FRAG_GET ((uintptr_t)0x3)
#define MCA_BTL_SM_FRAG_SEND 0
#define MCA_BTL_SM_FRAG_ACK 1
#define MCA_BTL_SM_FRAG_STATUS_MASK ((uintptr_t)0x4)
typedef uint8_t mca_btl_sm_frag_type_t;
struct mca_btl_sm_frag_t;
struct mca_btl_sm_hdr_t {
struct mca_btl_sm_frag_t *frag;
union {
struct {
size_t len;
mca_btl_base_tag_t tag;
} s;
int rc;
} u;
mca_btl_sm_frag_type_t type;
size_t len;
mca_btl_base_tag_t tag;
};
typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t;