1
1

Merge in changes from the bwb-heterogeneous temp branch (r12491 -

r12714) for supporting compilers / architectures with different
padding rules.

This commit was SVN r12749.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r12491
  r12714
Этот коммит содержится в:
Brian Barrett 2006-12-04 20:11:42 +00:00
родитель d64fa194f1
Коммит 441432950f
11 изменённых файлов: 138 добавлений и 16 удалений

Просмотреть файл

@ -166,7 +166,7 @@ ompi_convertor_find_or_create_master( uint32_t remote_arch )
uint64_t hetero_mask = 0;
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
if( remote_sizes[i] > 2 )
if( remote_sizes[i] > 1 )
hetero_mask |= (((uint64_t)1) << i);
}
hetero_mask &= ~((((uint64_t)1) << DT_LOGIC) | (((uint64_t)1) << DT_CXX_BOOL));

Просмотреть файл

@ -70,19 +70,21 @@
uint32_t ompi_ddt_external32_arch_id = OMPI_ARCH_LDEXPSIZEIS15 | OMPI_ARCH_LDMANTDIGIS113 |
OMPI_ARCH_LONGDOUBLEIS128 | OMPI_ARCH_ISBIGENDIAN |
OMPI_ARCH_HEADERMASK | OMPI_ARCH_HEADERMASK2;
OMPI_ARCH_HEADERMASK | OMPI_ARCH_HEADERMASK2 |
OMPI_ARCH_BOOLIS8 | OMPI_ARCH_LOGICALIS8;
ompi_convertor_t* ompi_mpi_external32_convertor = NULL;
ompi_convertor_t* ompi_mpi_local_convertor = NULL;
uint32_t ompi_mpi_local_arch = 0xFFFFFFFF;
int32_t ompi_ddt_default_convertors_init( void )
{
/* create the extern32 convertor */
{
ompi_arch_compute_local_id( &ompi_mpi_local_arch );
/* create the extern32 convertor */
ompi_mpi_external32_convertor = ompi_convertor_create( ompi_ddt_external32_arch_id, 0 );
/* create the local convertor */
ompi_arch_compute_local_id( &ompi_mpi_local_arch );
ompi_mpi_local_convertor = ompi_convertor_create( ompi_mpi_local_arch, 0 );
return OMPI_SUCCESS;

Просмотреть файл

@ -182,6 +182,9 @@ typedef void (*mca_btl_base_completion_fn_t)(
struct mca_btl_base_segment_t {
ompi_ptr_t seg_addr;
uint32_t seg_len;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t seg_padding[4];
#endif
union {
uint32_t key32[2];
uint64_t key64;

Просмотреть файл

@ -145,6 +145,9 @@ typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
struct mca_btl_openib_port_info_t {
uint32_t mtu;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[4];
#endif
uint64_t subnet;
};
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;

Просмотреть файл

@ -64,7 +64,7 @@
#include "btl_tcp_endpoint.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/datatype/convertor.h"
#include "btl_tcp_hdr.h"
mca_btl_tcp_component_t mca_btl_tcp_component = {
{
@ -226,6 +226,17 @@ int mca_btl_tcp_component_open(void)
MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_FAKE_RDMA);
#if 0
{
mca_btl_tcp_hdr_t header;
opal_output(0, "mca_btl_tcp_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_btl_tcp_hdr_t));
opal_output(0, "\t.base = %ld", (char*) &header.base - (char*) &header);
opal_output(0, "\t.type = %ld", (char*) &header.type - (char*) &header);
opal_output(0, "\t.count = %ld", (char*) &header.count - (char*) &header);
opal_output(0, "\t.size = %ld", (char*) &header.size - (char*) &header);
}
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -42,6 +42,10 @@ struct mca_btl_tcp_hdr_t {
mca_btl_base_header_t base;
uint8_t type;
uint16_t count;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
/* uint64_t may be required to be 8 byte aligned. */
uint8_t padding[4];
#endif
uint64_t size;
};
typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t;

Просмотреть файл

@ -307,13 +307,13 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
case MCA_PML_OB1_HDR_TYPE_ACK:
rc = mca_pml_ob1_recv_request_ack_send_btl(pckt->proc,
send_dst,
pckt->hdr.hdr_ack.hdr_src_req.pval,
pckt->hdr.hdr_ack.hdr_src_req.lval,
pckt->hdr.hdr_ack.hdr_dst_req.pval,
pckt->hdr.hdr_ack.hdr_rdma_offset);
MCA_PML_OB1_PCKT_PENDING_RETURN(pckt);
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
MCA_PML_OB1_ADD_ACK_TO_PENDING(pckt->proc,
pckt->hdr.hdr_ack.hdr_src_req.pval,
pckt->hdr.hdr_ack.hdr_src_req.lval,
pckt->hdr.hdr_ack.hdr_dst_req.pval,
pckt->hdr.hdr_ack.hdr_rdma_offset);
return;

Просмотреть файл

@ -182,6 +182,80 @@ int mca_pml_ob1_component_open(void)
opal_output(0, "WARNING: Cannot set both mpi_leave_pinned and mpi_leave_pinned_pipeline, defaulting to mpi_leave_pinned ONLY\n");
}
mca_pml_ob1.enabled = false;
#if 0
{
mca_pml_ob1_common_hdr_t header;
opal_output(0, "mca_pml_ob1_common_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_common_hdr_t));
opal_output(0, "\t.hdr_type = %ld", (char*) &header.hdr_type - (char*) &header);
opal_output(0, "\t.hdr_flags = %ld", (char*) &header.hdr_flags - (char*) &header);
}
{
mca_pml_ob1_match_hdr_t header;
opal_output(0, "mca_pml_ob1_match_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_match_hdr_t));
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
opal_output(0, "\t.hdr_ctx = %ld", (char*) &header.hdr_ctx - (char*) &header);
opal_output(0, "\t.hdr_src = %ld", (char*) &header.hdr_src - (char*) &header);
opal_output(0, "\t.hdr_tag = %ld", (char*) &header.hdr_tag - (char*) &header);
opal_output(0, "\t.hdr_seq = %ld", (char*) &header.hdr_seq - (char*) &header);
}
{
mca_pml_ob1_rendezvous_hdr_t header;
opal_output(0, "mca_pml_ob1_rendezvous_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rendezvous_hdr_t));
opal_output(0, "\t.hdr_match = %ld", (char*) &header.hdr_match - (char*) &header);
opal_output(0, "\t.hdr_msg_length = %ld", (char*) &header.hdr_msg_length - (char*) &header);
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
}
{
mca_pml_ob1_rget_hdr_t header;
opal_output(0, "mca_pml_ob1_rget_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rget_hdr_t));
opal_output(0, "\t.hdr_rndv = %ld", (char*) &header.hdr_rndv - (char*) &header);
opal_output(0, "\t.hdr_seg_cnt = %ld", (char*) &header.hdr_seg_cnt - (char*) &header);
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
opal_output(0, "\t.hdr_segs = %ld", (char*) &header.hdr_segs - (char*) &header);
}
{
mca_pml_ob1_frag_hdr_t header;
opal_output(0, "mca_pml_ob1_frag_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_frag_hdr_t));
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
opal_output(0, "\t.hdr_frag_offset = %ld", (char*) &header.hdr_frag_offset - (char*) &header);
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
opal_output(0, "\t.hdr_dst_req = %ld", (char*) &header.hdr_dst_req - (char*) &header);
}
{
mca_pml_ob1_ack_hdr_t header;
opal_output(0, "mca_pml_ob1_ack_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_ack_hdr_t));
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
opal_output(0, "\t.hdr_dst_req = %ld", (char*) &header.hdr_dst_req - (char*) &header);
opal_output(0, "\t.hdr_rdma_offset = %ld", (char*) &header.hdr_rdma_offset - (char*) &header);
}
{
mca_pml_ob1_rdma_hdr_t header;
opal_output(0, "mca_pml_ob1_rdma_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rdma_hdr_t));
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
opal_output(0, "\t.hdr_seg_cnt = %ld", (char*) &header.hdr_seg_cnt - (char*) &header);
opal_output(0, "\t.hdr_req = %ld", (char*) &header.hdr_req - (char*) &header);
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
opal_output(0, "\t.hdr_rdma_offset = %ld", (char*) &header.hdr_rdma_offset - (char*) &header);
opal_output(0, "\t.hdr_segs = %ld", (char*) &header.hdr_segs - (char*) &header);
}
{
mca_pml_ob1_fin_hdr_t header;
opal_output(0, "mca_pml_ob1_fin_hdr_t:");
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_fin_hdr_t));
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
}
#endif
return mca_bml_base_open();
}

Просмотреть файл

@ -69,6 +69,9 @@ struct mca_pml_ob1_match_hdr_t {
int32_t hdr_src; /**< source rank */
int32_t hdr_tag; /**< user tag */
uint16_t hdr_seq; /**< message sequence number */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[2]; /**< explicitly pad to 16 bytes. Compilers seem to already prefer to do this, but make it explicit just in case */
#endif
};
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
@ -102,6 +105,9 @@ struct mca_pml_ob1_rendezvous_hdr_t {
};
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
/* Note that hdr_src_req is not put in network byte order because it
is never processed by the receiver, other than being copied into
the ack header */
#define MCA_PML_OB1_RNDV_HDR_NTOH(h) \
do { \
MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \
@ -119,8 +125,11 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
*/
struct mca_pml_ob1_rget_hdr_t {
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
ompi_ptr_t hdr_des; /**< source descriptor */
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_des; /**< source descriptor */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
@ -130,6 +139,9 @@ typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
*/
struct mca_pml_ob1_frag_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[6];
#endif
uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_req; /**< pointer to source request */
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
@ -155,12 +167,19 @@ typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
struct mca_pml_ob1_ack_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[6];
#endif
ompi_ptr_t hdr_src_req; /**< source request */
ompi_ptr_t hdr_dst_req; /**< matched receive request */
uint64_t hdr_rdma_offset; /**< starting point rdma protocol */
};
typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
/* Note that the request headers are not put in NBO because the
src_req is already in receiver's byte order and the dst_req is not
used by the receiver for anything other than backpointers in return
headers */
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
do { \
MCA_PML_OB1_COMMON_HDR_NTOH(h.hdr_common); \
@ -179,10 +198,13 @@ typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
struct mca_pml_ob1_rdma_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
#endif
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
ompi_ptr_t hdr_req; /**< destination request */
ompi_ptr_t hdr_des; /**< source descriptor */
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
@ -193,6 +215,9 @@ typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
struct mca_pml_ob1_fin_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[6];
#endif
ompi_ptr_t hdr_des; /**< completed descriptor */
};
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;

Просмотреть файл

@ -179,7 +179,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
int mca_pml_ob1_recv_request_ack_send_btl(
ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
void *hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
{
mca_btl_base_descriptor_t* des;
mca_pml_ob1_ack_hdr_t* ack;
@ -195,7 +195,7 @@ int mca_pml_ob1_recv_request_ack_send_btl(
ack = (mca_pml_ob1_ack_hdr_t*)des->des_src->seg_addr.pval;
ack->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK;
ack->hdr_common.hdr_flags = 0;
ack->hdr_src_req.pval = hdr_src_req;
ack->hdr_src_req.lval = hdr_src_req;
ack->hdr_dst_req.pval = hdr_dst_req;
ack->hdr_rdma_offset = hdr_rdma_offset;
@ -303,7 +303,7 @@ static int mca_pml_ob1_recv_request_ack(
}
/* let know to shedule function there is no need to put ACK flag */
recvreq->req_ack_sent = true;
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.pval,
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
recvreq, recvreq->req_rdma_offset);
}

Просмотреть файл

@ -348,7 +348,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
\
MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt,_rc); \
_pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK; \
_pckt->hdr.hdr_ack.hdr_src_req.pval = (S); \
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
_pckt->hdr.hdr_ack.hdr_rdma_offset = (O); \
_pckt->proc = (P); \
@ -359,11 +359,11 @@ static inline void mca_pml_ob1_recv_request_schedule(
} while(0)
int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl, void *hdr_src_req, void *hdr_dst_req,
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
uint64_t hdr_rdma_offset);
static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
void *hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
{
size_t i;
mca_bml_base_btl_t* bml_btl;