From c70eb43e43536b88b243abea51fce3a11d74ebbd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 28 Aug 2006 11:03:56 +0000 Subject: [PATCH] Align eager RDMA buffer so that last byte of the buffer is on the last byte of the CPU cache line. Improves zero byte latency a little bit because of L1 cache miss reduction. This commit was SVN r11465. --- ompi/mca/btl/openib/btl_openib_component.c | 6 +++--- ompi/mca/btl/openib/btl_openib_endpoint.c | 7 ++++++- ompi/mca/btl/openib/btl_openib_frag.h | 6 +++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 7d3080e518..04adead708 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -562,7 +562,7 @@ btl_openib_component_init(int *num_btl_modules, sizeof(mca_btl_openib_footer_t) + openib_btl->super.btl_eager_limit; - openib_btl->eager_rdma_frag_size = length; + openib_btl->eager_rdma_frag_size = (length + mca_btl_openib_component.buffer_alignment) & ~(mca_btl_openib_component.buffer_alignment-1); ompi_free_list_init_ex(&openib_btl->send_free_eager, length, @@ -879,7 +879,7 @@ static int btl_openib_component_progress(void) size + sizeof(mca_btl_openib_footer_t)); frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t); - + ret = btl_openib_handle_incoming_hp(openib_btl, frag->endpoint, frag, size - sizeof(mca_btl_openib_footer_t)); @@ -1011,7 +1011,7 @@ static int btl_openib_component_progress(void) break; } } - + ne=ibv_poll_cq(openib_btl->ib_cq_lp, 1, &wc ); if(ne < 0){ BTL_ERROR(("error polling LP CQ with %d errno says %s", ne, strerror(errno))); diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index 75caa4d754..880d571959 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -1256,12 +1256,17 @@ void mca_btl_openib_endpoint_connect_eager_rdma( buf = openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool, openib_btl->eager_rdma_frag_size * - mca_btl_openib_component.eager_rdma_num, 0, 0, + mca_btl_openib_component.eager_rdma_num + + mca_btl_openib_component.buffer_alignment + + sizeof(mca_btl_openib_recv_frag_eager_t), 0, 0, (mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg); if(!buf) goto unlock_rdma_local; + buf = (char*)(((uintptr_t)buf+mca_btl_openib_component.buffer_alignment) & ~(mca_btl_openib_component.buffer_alignment-1)); + buf = buf + openib_btl->eager_rdma_frag_size - sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit - sizeof(mca_btl_openib_header_t) - sizeof(mca_btl_openib_frag_t); + for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) { ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf + i*openib_btl->eager_rdma_frag_size); diff --git a/ompi/mca/btl/openib/btl_openib_frag.h b/ompi/mca/btl/openib/btl_openib_frag.h index 0eb25c33d4..f92ba167ab 100644 --- a/ompi/mca/btl/openib/btl_openib_frag.h +++ b/ompi/mca/btl/openib/btl_openib_frag.h @@ -78,8 +78,10 @@ typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t; */ struct mca_btl_openib_frag_t { mca_btl_base_descriptor_t base; - mca_btl_base_segment_t segment; struct mca_btl_base_endpoint_t *endpoint; + mca_btl_openib_footer_t *ftr; + mca_btl_openib_header_t *hdr; + mca_btl_base_segment_t segment; size_t size; int rc; mca_btl_openib_frag_type_t type; @@ -89,8 +91,6 @@ struct mca_btl_openib_frag_t { } wr_desc; struct ibv_sge sg_entry; struct ibv_mr *mr; - mca_btl_openib_header_t *hdr; - mca_btl_openib_footer_t *ftr; mca_mpool_openib_registration_t * openib_reg; }; typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;