Merge in changes from the bwb-heterogeneous temp branch (r12491 -
r12714) for supporting compilers / architectures with different padding rules. This commit was SVN r12749. The following SVN revisions from the original message are invalid or inconsistent and therefore were not cross-referenced: r12491 r12714
Этот коммит содержится в:
родитель
d64fa194f1
Коммит
441432950f
@ -166,7 +166,7 @@ ompi_convertor_find_or_create_master( uint32_t remote_arch )
|
||||
uint64_t hetero_mask = 0;
|
||||
|
||||
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
if( remote_sizes[i] > 2 )
|
||||
if( remote_sizes[i] > 1 )
|
||||
hetero_mask |= (((uint64_t)1) << i);
|
||||
}
|
||||
hetero_mask &= ~((((uint64_t)1) << DT_LOGIC) | (((uint64_t)1) << DT_CXX_BOOL));
|
||||
|
@ -70,7 +70,8 @@
|
||||
|
||||
uint32_t ompi_ddt_external32_arch_id = OMPI_ARCH_LDEXPSIZEIS15 | OMPI_ARCH_LDMANTDIGIS113 |
|
||||
OMPI_ARCH_LONGDOUBLEIS128 | OMPI_ARCH_ISBIGENDIAN |
|
||||
OMPI_ARCH_HEADERMASK | OMPI_ARCH_HEADERMASK2;
|
||||
OMPI_ARCH_HEADERMASK | OMPI_ARCH_HEADERMASK2 |
|
||||
OMPI_ARCH_BOOLIS8 | OMPI_ARCH_LOGICALIS8;
|
||||
|
||||
ompi_convertor_t* ompi_mpi_external32_convertor = NULL;
|
||||
ompi_convertor_t* ompi_mpi_local_convertor = NULL;
|
||||
@ -78,11 +79,12 @@ uint32_t ompi_mpi_local_arch = 0xFFFFFFFF;
|
||||
|
||||
int32_t ompi_ddt_default_convertors_init( void )
|
||||
{
|
||||
ompi_arch_compute_local_id( &ompi_mpi_local_arch );
|
||||
|
||||
/* create the extern32 convertor */
|
||||
ompi_mpi_external32_convertor = ompi_convertor_create( ompi_ddt_external32_arch_id, 0 );
|
||||
|
||||
/* create the local convertor */
|
||||
ompi_arch_compute_local_id( &ompi_mpi_local_arch );
|
||||
ompi_mpi_local_convertor = ompi_convertor_create( ompi_mpi_local_arch, 0 );
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -182,6 +182,9 @@ typedef void (*mca_btl_base_completion_fn_t)(
|
||||
struct mca_btl_base_segment_t {
|
||||
ompi_ptr_t seg_addr;
|
||||
uint32_t seg_len;
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t seg_padding[4];
|
||||
#endif
|
||||
union {
|
||||
uint32_t key32[2];
|
||||
uint64_t key64;
|
||||
|
@ -145,6 +145,9 @@ typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
|
||||
|
||||
struct mca_btl_openib_port_info_t {
|
||||
uint32_t mtu;
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t padding[4];
|
||||
#endif
|
||||
uint64_t subnet;
|
||||
};
|
||||
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
|
||||
|
@ -64,7 +64,7 @@
|
||||
#include "btl_tcp_endpoint.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/datatype/convertor.h"
|
||||
|
||||
#include "btl_tcp_hdr.h"
|
||||
|
||||
mca_btl_tcp_component_t mca_btl_tcp_component = {
|
||||
{
|
||||
@ -226,6 +226,17 @@ int mca_btl_tcp_component_open(void)
|
||||
MCA_BTL_FLAGS_NEED_CSUM |
|
||||
MCA_BTL_FLAGS_NEED_ACK |
|
||||
MCA_BTL_FLAGS_FAKE_RDMA);
|
||||
#if 0
|
||||
{
|
||||
mca_btl_tcp_hdr_t header;
|
||||
opal_output(0, "mca_btl_tcp_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_btl_tcp_hdr_t));
|
||||
opal_output(0, "\t.base = %ld", (char*) &header.base - (char*) &header);
|
||||
opal_output(0, "\t.type = %ld", (char*) &header.type - (char*) &header);
|
||||
opal_output(0, "\t.count = %ld", (char*) &header.count - (char*) &header);
|
||||
opal_output(0, "\t.size = %ld", (char*) &header.size - (char*) &header);
|
||||
}
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,10 @@ struct mca_btl_tcp_hdr_t {
|
||||
mca_btl_base_header_t base;
|
||||
uint8_t type;
|
||||
uint16_t count;
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* uint64_t may be required to be 8 byte aligned. */
|
||||
uint8_t padding[4];
|
||||
#endif
|
||||
uint64_t size;
|
||||
};
|
||||
typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t;
|
||||
|
@ -307,13 +307,13 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
case MCA_PML_OB1_HDR_TYPE_ACK:
|
||||
rc = mca_pml_ob1_recv_request_ack_send_btl(pckt->proc,
|
||||
send_dst,
|
||||
pckt->hdr.hdr_ack.hdr_src_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_src_req.lval,
|
||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_rdma_offset);
|
||||
MCA_PML_OB1_PCKT_PENDING_RETURN(pckt);
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
MCA_PML_OB1_ADD_ACK_TO_PENDING(pckt->proc,
|
||||
pckt->hdr.hdr_ack.hdr_src_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_src_req.lval,
|
||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_rdma_offset);
|
||||
return;
|
||||
|
@ -182,6 +182,80 @@ int mca_pml_ob1_component_open(void)
|
||||
opal_output(0, "WARNING: Cannot set both mpi_leave_pinned and mpi_leave_pinned_pipeline, defaulting to mpi_leave_pinned ONLY\n");
|
||||
}
|
||||
mca_pml_ob1.enabled = false;
|
||||
|
||||
#if 0
|
||||
{
|
||||
mca_pml_ob1_common_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_common_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_common_hdr_t));
|
||||
opal_output(0, "\t.hdr_type = %ld", (char*) &header.hdr_type - (char*) &header);
|
||||
opal_output(0, "\t.hdr_flags = %ld", (char*) &header.hdr_flags - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_match_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_match_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_match_hdr_t));
|
||||
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
|
||||
opal_output(0, "\t.hdr_ctx = %ld", (char*) &header.hdr_ctx - (char*) &header);
|
||||
opal_output(0, "\t.hdr_src = %ld", (char*) &header.hdr_src - (char*) &header);
|
||||
opal_output(0, "\t.hdr_tag = %ld", (char*) &header.hdr_tag - (char*) &header);
|
||||
opal_output(0, "\t.hdr_seq = %ld", (char*) &header.hdr_seq - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_rendezvous_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_rendezvous_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||
opal_output(0, "\t.hdr_match = %ld", (char*) &header.hdr_match - (char*) &header);
|
||||
opal_output(0, "\t.hdr_msg_length = %ld", (char*) &header.hdr_msg_length - (char*) &header);
|
||||
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_rget_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_rget_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rget_hdr_t));
|
||||
opal_output(0, "\t.hdr_rndv = %ld", (char*) &header.hdr_rndv - (char*) &header);
|
||||
opal_output(0, "\t.hdr_seg_cnt = %ld", (char*) &header.hdr_seg_cnt - (char*) &header);
|
||||
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
|
||||
opal_output(0, "\t.hdr_segs = %ld", (char*) &header.hdr_segs - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_frag_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_frag_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
|
||||
opal_output(0, "\t.hdr_frag_offset = %ld", (char*) &header.hdr_frag_offset - (char*) &header);
|
||||
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
|
||||
opal_output(0, "\t.hdr_dst_req = %ld", (char*) &header.hdr_dst_req - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_ack_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_ack_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_ack_hdr_t));
|
||||
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
|
||||
opal_output(0, "\t.hdr_src_req = %ld", (char*) &header.hdr_src_req - (char*) &header);
|
||||
opal_output(0, "\t.hdr_dst_req = %ld", (char*) &header.hdr_dst_req - (char*) &header);
|
||||
opal_output(0, "\t.hdr_rdma_offset = %ld", (char*) &header.hdr_rdma_offset - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_rdma_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_rdma_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_rdma_hdr_t));
|
||||
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
|
||||
opal_output(0, "\t.hdr_seg_cnt = %ld", (char*) &header.hdr_seg_cnt - (char*) &header);
|
||||
opal_output(0, "\t.hdr_req = %ld", (char*) &header.hdr_req - (char*) &header);
|
||||
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
|
||||
opal_output(0, "\t.hdr_rdma_offset = %ld", (char*) &header.hdr_rdma_offset - (char*) &header);
|
||||
opal_output(0, "\t.hdr_segs = %ld", (char*) &header.hdr_segs - (char*) &header);
|
||||
}
|
||||
{
|
||||
mca_pml_ob1_fin_hdr_t header;
|
||||
opal_output(0, "mca_pml_ob1_fin_hdr_t:");
|
||||
opal_output(0, "\tsizeof() = %ld", sizeof(mca_pml_ob1_fin_hdr_t));
|
||||
opal_output(0, "\t.hdr_common = %ld", (char*) &header.hdr_common - (char*) &header);
|
||||
opal_output(0, "\t.hdr_des = %ld", (char*) &header.hdr_des - (char*) &header);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mca_bml_base_open();
|
||||
|
||||
}
|
||||
|
@ -69,6 +69,9 @@ struct mca_pml_ob1_match_hdr_t {
|
||||
int32_t hdr_src; /**< source rank */
|
||||
int32_t hdr_tag; /**< user tag */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /**< explicitly pad to 16 bytes. Compilers seem to already prefer to do this, but make it explicit just in case */
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
||||
|
||||
@ -102,6 +105,9 @@ struct mca_pml_ob1_rendezvous_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
|
||||
/* Note that hdr_src_req is not put in network byte order because it
|
||||
is never processed by the receiver, other than being copied into
|
||||
the ack header */
|
||||
#define MCA_PML_OB1_RNDV_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \
|
||||
@ -119,8 +125,11 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
*/
|
||||
struct mca_pml_ob1_rget_hdr_t {
|
||||
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[4];
|
||||
#endif
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
|
||||
@ -130,6 +139,9 @@ typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
|
||||
*/
|
||||
struct mca_pml_ob1_frag_hdr_t {
|
||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[6];
|
||||
#endif
|
||||
uint64_t hdr_frag_offset; /**< offset into message */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
||||
@ -155,12 +167,19 @@ typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
|
||||
|
||||
struct mca_pml_ob1_ack_hdr_t {
|
||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[6];
|
||||
#endif
|
||||
ompi_ptr_t hdr_src_req; /**< source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< matched receive request */
|
||||
uint64_t hdr_rdma_offset; /**< starting point rdma protocol */
|
||||
};
|
||||
typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
|
||||
|
||||
/* Note that the request headers are not put in NBO because the
|
||||
src_req is already in receiver's byte order and the dst_req is not
|
||||
used by the receiver for anything other than backpointers in return
|
||||
headers */
|
||||
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH(h.hdr_common); \
|
||||
@ -179,10 +198,13 @@ typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
|
||||
|
||||
struct mca_pml_ob1_rdma_hdr_t {
|
||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
|
||||
#endif
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
ompi_ptr_t hdr_req; /**< destination request */
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
|
||||
@ -193,6 +215,9 @@ typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
|
||||
|
||||
struct mca_pml_ob1_fin_hdr_t {
|
||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[6];
|
||||
#endif
|
||||
ompi_ptr_t hdr_des; /**< completed descriptor */
|
||||
};
|
||||
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
|
||||
|
@ -179,7 +179,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
void *hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
|
||||
{
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_pml_ob1_ack_hdr_t* ack;
|
||||
@ -195,7 +195,7 @@ int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
ack = (mca_pml_ob1_ack_hdr_t*)des->des_src->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = 0;
|
||||
ack->hdr_src_req.pval = hdr_src_req;
|
||||
ack->hdr_src_req.lval = hdr_src_req;
|
||||
ack->hdr_dst_req.pval = hdr_dst_req;
|
||||
ack->hdr_rdma_offset = hdr_rdma_offset;
|
||||
|
||||
@ -303,7 +303,7 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
}
|
||||
/* let know to shedule function there is no need to put ACK flag */
|
||||
recvreq->req_ack_sent = true;
|
||||
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.pval,
|
||||
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
|
||||
recvreq, recvreq->req_rdma_offset);
|
||||
}
|
||||
|
||||
|
@ -348,7 +348,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
\
|
||||
MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt,_rc); \
|
||||
_pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK; \
|
||||
_pckt->hdr.hdr_ack.hdr_src_req.pval = (S); \
|
||||
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
|
||||
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
|
||||
_pckt->hdr.hdr_ack.hdr_rdma_offset = (O); \
|
||||
_pckt->proc = (P); \
|
||||
@ -359,11 +359,11 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
} while(0)
|
||||
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl, void *hdr_src_req, void *hdr_dst_req,
|
||||
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_rdma_offset);
|
||||
|
||||
static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
void *hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_rdma_offset)
|
||||
{
|
||||
size_t i;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user