1
1

Correct a few alignment problems to address the issue brought up in ticket #2964

This commit was SVN r26078.
Этот коммит содержится в:
Terry Dontje 2012-03-01 17:29:40 +00:00
родитель f1525bdbff
Коммит 3e70cad203
5 изменённых файлов: 128 добавлений и 41 удалений

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -938,6 +938,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
if(mca_btl_openib_component.use_message_coalescing &&
(flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
int prio = !(flags & MCA_BTL_DES_FLAGS_PRIORITY);
sfrag = check_coalescing(&ep->qps[qp].no_wqe_pending_frags[prio],
&ep->endpoint_lock, ep, size);
@ -982,9 +983,10 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
to_com_frag(sfrag)->sg_entry.addr = (uint64_t)(uintptr_t)sfrag->hdr;
}
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)
(((unsigned char*)(sfrag->hdr + 1)) + sfrag->coalesced_length +
to_base_frag(sfrag)->segment.seg_len);
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)((unsigned char*)(sfrag->hdr + 1) +
sfrag->coalesced_length +
to_base_frag(sfrag)->segment.seg_len);
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)BTL_OPENIB_ALIGN_COALESCE_HDR(cfrag->hdr);
cfrag->hdr->alloc_size = size;
/* point coalesced frag pointer into a data buffer */

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -484,29 +484,36 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
ep->eager_rdma_remote.tokens=mca_btl_openib_component.eager_rdma_num - 1;
break;
case MCA_BTL_OPENIB_CONTROL_COALESCED:
while(len > 0) {
size_t skip;
mca_btl_base_descriptor_t tmp_des;
mca_btl_base_segment_t tmp_seg;
{
size_t pad = 0;
while(len > 0) {
size_t skip;
mca_btl_openib_header_coalesced_t* unalign_hdr = 0;
mca_btl_base_descriptor_t tmp_des;
mca_btl_base_segment_t tmp_seg;
assert(len >= sizeof(*clsc_hdr));
assert(len >= sizeof(*clsc_hdr));
if(ep->nbo)
BTL_OPENIB_HEADER_COALESCED_NTOH(*clsc_hdr);
if(ep->nbo)
BTL_OPENIB_HEADER_COALESCED_NTOH(*clsc_hdr);
skip = (sizeof(*clsc_hdr) + clsc_hdr->alloc_size);
skip = (sizeof(*clsc_hdr) + clsc_hdr->alloc_size - pad);
tmp_des.des_dst = &tmp_seg;
tmp_des.des_dst_cnt = 1;
tmp_seg.seg_addr.pval = clsc_hdr + 1;
tmp_seg.seg_len = clsc_hdr->size;
tmp_des.des_dst = &tmp_seg;
tmp_des.des_dst_cnt = 1;
tmp_seg.seg_addr.pval = clsc_hdr + 1;
tmp_seg.seg_len = clsc_hdr->size;
/* call registered callback */
reg = mca_btl_base_active_message_trigger + clsc_hdr->tag;
reg->cbfunc( &obtl->super, clsc_hdr->tag, &tmp_des, reg->cbdata );
len -= skip;
clsc_hdr = (mca_btl_openib_header_coalesced_t*)
(((unsigned char*)clsc_hdr) + skip);
/* call registered callback */
reg = mca_btl_base_active_message_trigger + clsc_hdr->tag;
reg->cbfunc( &obtl->super, clsc_hdr->tag, &tmp_des, reg->cbdata );
len -= (skip + pad);
unalign_hdr = (mca_btl_openib_header_coalesced_t*)
((unsigned char*)clsc_hdr + skip);
pad = (size_t)BTL_OPENIB_COALESCE_HDR_PADDING(unalign_hdr);
clsc_hdr = (mca_btl_openib_header_coalesced_t*)((unsigned char*)unalign_hdr +
pad);
}
}
break;
case MCA_BTL_OPENIB_CONTROL_CTS:
@ -536,8 +543,8 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
#if BTL_OPENIB_FAILOVER_ENABLED
case MCA_BTL_OPENIB_CONTROL_EP_BROKEN:
case MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR:
btl_openib_handle_failover_control_messages(ctl_hdr, ep);
break;
btl_openib_handle_failover_control_messages(ctl_hdr, ep);
break;
#endif
default:
BTL_ERROR(("Unknown message type received by BTL"));
@ -1857,7 +1864,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
break;
}
if(IBV_PORT_ACTIVE == ib_port_attr.state) {
/* Select the lower of the HCA and port active speed. With QLogic
/* Select the lower of the HCA and port active speed. With QLogic
HCAs that are capable of 4K MTU we had an issue when connected
to switches with 2K MTU. This fix is valid for other IB vendors
as well. */
@ -3419,7 +3426,7 @@ error:
"status number %d for wr_id %llu opcode %d vendor error %d qp_idx %d",
cq_name[cq], btl_openib_component_status_to_string(wc->status),
wc->status, wc->wr_id,
wc->opcode, wc->vendor_err, qp);
wc->opcode, wc->vendor_err, qp);
}
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
@ -3588,7 +3595,7 @@ static int progress_one_device(mca_btl_openib_device_t *device)
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
size + sizeof(mca_btl_openib_footer_t));
size - BTL_OPENIB_FTR_PADDING(size) + sizeof(mca_btl_openib_footer_t));
to_base_frag(frag)->segment.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);

Просмотреть файл

@ -14,7 +14,7 @@
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -493,18 +493,18 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
BTL_OPENIB_HEADER_HTON(*frag->hdr);
if(rdma) {
int32_t head;
int32_t head;
mca_btl_openib_footer_t* ftr =
(mca_btl_openib_footer_t*)(((char*)frag->hdr) + sg->length -
sizeof(mca_btl_openib_footer_t));
(mca_btl_openib_footer_t*)(((char*)frag->hdr) + sg->length +
BTL_OPENIB_FTR_PADDING(sg->length) - sizeof(mca_btl_openib_footer_t));
sr_desc->opcode = IBV_WR_RDMA_WRITE;
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
#if OPAL_ENABLE_DEBUG
do {
ftr->seq = ep->eager_rdma_remote.seq;
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
(int32_t) ftr->seq,
do {
ftr->seq = ep->eager_rdma_remote.seq;
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
(int32_t) ftr->seq,
(int32_t) (ftr->seq+1)));
#endif
if(ep->nbo)
@ -525,7 +525,7 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
sizeof(mca_btl_openib_header_t) +
mca_btl_openib_component.eager_limit +
sizeof(mca_btl_openib_footer_t);
sr_desc->wr.rdma.remote_addr -= sg->length;
sr_desc->wr.rdma.remote_addr -= sg->length + BTL_OPENIB_FTR_PADDING(sg->length);
} else {
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
sr_desc->opcode = IBV_WR_SEND;

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -126,6 +127,10 @@ static void recv_constructor(mca_btl_openib_recv_frag_t *frag)
static void send_control_constructor(mca_btl_openib_send_control_frag_t *frag)
{
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_CONTROL;
/* adjusting headers because there is no coalesce header in control messages */
frag->hdr = frag->chdr;
to_base_frag(frag)->segment.seg_addr.pval = frag->hdr + 1;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t)frag->hdr;
}
static void put_constructor(mca_btl_openib_put_frag_t *frag)

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2009 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,6 +25,7 @@
#define MCA_BTL_IB_FRAG_H
#include "ompi_config.h"
#include "opal/align.h"
#include <infiniband/verbs.h>
#include "ompi/mca/btl/btl.h"
@ -37,6 +38,9 @@ struct mca_btl_openib_header_t {
mca_btl_base_tag_t tag;
uint8_t cm_seen;
uint16_t credits;
#if OMPI_OPENIB_PAD_HDR
uint8_t padding[4];
#endif
};
typedef struct mca_btl_openib_header_t mca_btl_openib_header_t;
#define BTL_OPENIB_RDMA_CREDITS_FLAG (1<<15)
@ -57,6 +61,9 @@ typedef struct mca_btl_openib_header_coalesced_t {
mca_btl_base_tag_t tag;
uint32_t size;
uint32_t alloc_size;
#if OMPI_OPENIB_PAD_HDR
uint8_t padding[4];
#endif
} mca_btl_openib_header_coalesced_t;
#define BTL_OPENIB_HEADER_COALESCED_NTOH(h) \
@ -71,6 +78,59 @@ typedef struct mca_btl_openib_header_coalesced_t {
(h).alloc_size = htonl((h).alloc_size); \
} while(0)
#if OMPI_OPENIB_PAD_HDR
/* BTL_OPENIB_FTR_PADDING
* This macro is used to keep the pointer to openib footers aligned for
* systems like SPARC64 that take a big performance hit when addresses
* are not aligned (and by default sigbus instead of coercing the type on
* an unaligned address).
*
* We assure alignment of a packet's structures when OMPI_OPENIB_PAD_HDR
* is set to 1. When this is the case then several structures are padded
* to assure alignment and the mca_btl_openib_footer_t structure itself
* will uses the BTL_OPENIB_FTR_PADDING macro to shift the location of the
* pointer to assure proper alignment after the PML Header and data.
* For example sending a 1 byte data packet the memory layout without
* footer alignment would look something like the following:
*
* 0x00 : mca_btl_openib_coalesced_header_t (12 bytes + 4 byte pad)
* 0x10 : mca_btl_openib_control_header_t (1 byte + 7 byte pad)
* 0x18 : mca_btl_openib_header_t (4 bytes + 4 byte pad)
* 0x20 : PML Header and data (16 bytes PML + 1 byte data)
* 0x29 : mca_btl_openib_footer_t (4 bytes + 4 byte pad)
* 0x31 : end of packet
*
* By applying the BTL_OPENIB_FTR_PADDING() in the progress_one_device
* and post_send routines we adjust the pointer to mca_btl_openib_footer_t
* from 0x29 to 0x2C thus correctly aligning the start of the
* footer pointer. This adjustment will cause the padding field of
* mca_btl_openib_footer_t to overlap with the neighboring memory but since
* we never use the padding we do not end up inadvertently overwriting
* memory that does not belong to the fragment.
*/
#define BTL_OPENIB_FTR_PADDING(size) \
OPAL_ALIGN_PAD_AMOUNT(size, sizeof(uint64_t))
/* BTL_OPENIB_ALIGN_COALESCE_HDR
* This macro is used in btl_openib.c, while creating a coalesce fragment,
* to align the coalesce headers.
*/
#define BTL_OPENIB_ALIGN_COALESCE_HDR(ptr) \
OPAL_ALIGN_PTR(ptr, sizeof(uint32_t), unsigned char*)
/* BTL_OPENIB_COALESCE_HDR_PADDING
* This macro is used in btl_openib_component.c, while parsing an incoming
* coalesce fragment, to determine the padding amount used to align the
* mca_btl_openib_coalesce_hdr_t.
*/
#define BTL_OPENIB_COALESCE_HDR_PADDING(ptr) \
OPAL_ALIGN_PAD_AMOUNT(ptr, sizeof(uint32_t))
#else
#define BTL_OPENIB_FTR_PADDING(size) 0
#define BTL_OPENIB_ALIGN_COALESCE_HDR(ptr) ptr
#define BTL_OPENIB_COALESCE_HDR_PADDING(ptr) 0
#endif
struct mca_btl_openib_footer_t {
#if OPAL_ENABLE_DEBUG
uint32_t seq;
@ -79,6 +139,18 @@ struct mca_btl_openib_footer_t {
uint32_t size;
uint8_t buf[4];
} u;
#if OMPI_OPENIB_PAD_HDR
#if OPAL_ENABLE_DEBUG
/* this footer needs to be of a 8-byte multiple so by adding the
* seq field you throw this off and you cannot just remove the
* padding because the padding is needed in order to adjust the alignment
* and not overwrite other packets.
*/
uint8_t padding[12];
#else
uint8_t padding[8];
#endif
#endif
};
typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
@ -125,14 +197,13 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
struct mca_btl_openib_control_header_t {
uint8_t type;
#if OMPI_OPENIB_PAD_HDR
uint8_t padding[15];
uint8_t padding[7];
#endif
};
typedef struct mca_btl_openib_control_header_t mca_btl_openib_control_header_t;
struct mca_btl_openib_eager_rdma_header_t {
mca_btl_openib_control_header_t control;
uint8_t padding[3];
uint32_t rkey;
ompi_ptr_t rdma_start;
};
@ -153,6 +224,9 @@ typedef struct mca_btl_openib_eager_rdma_header_t mca_btl_openib_eager_rdma_head
struct mca_btl_openib_rdma_credits_header_t {
mca_btl_openib_control_header_t control;
#if OMPI_OPENIB_PAD_HDR
uint8_t padding[1];
#endif
uint8_t qpn;
uint16_t rdma_credits;
};
@ -171,7 +245,6 @@ do { \
#if BTL_OPENIB_FAILOVER_ENABLED
struct mca_btl_openib_broken_connection_header_t {
mca_btl_openib_control_header_t control;
uint8_t padding[3];
uint32_t lid;
uint64_t subnet_id;
uint32_t vpid;