diff --git a/ompi/mca/btl/btl.h b/ompi/mca/btl/btl.h index 2278db8794..90d8e957e1 100644 --- a/ompi/mca/btl/btl.h +++ b/ompi/mca/btl/btl.h @@ -219,6 +219,9 @@ struct mca_btl_base_header_t{ }; typedef struct mca_btl_base_header_t mca_btl_base_header_t; +#define MCA_BTL_BASE_HEADER_HTON(hdr) +#define MCA_BTL_BASE_HEADER_NTOH(hdr) + /* * BTL component interface functions and datatype. */ diff --git a/ompi/mca/btl/tcp/btl_tcp.c b/ompi/mca/btl/tcp/btl_tcp.c index f6609061ca..6a500af1d6 100644 --- a/ompi/mca/btl/tcp/btl_tcp.c +++ b/ompi/mca/btl/tcp/btl_tcp.c @@ -390,6 +390,7 @@ int mca_btl_tcp_send( frag->hdr.base.tag = tag; frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_SEND; frag->hdr.count = 0; + if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); return mca_btl_tcp_endpoint_send(endpoint,frag); } @@ -431,6 +432,7 @@ int mca_btl_tcp_put( frag->hdr.base.tag = MCA_BTL_TAG_BTL; frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_PUT; frag->hdr.count = frag->base.des_dst_cnt; + if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); return mca_btl_tcp_endpoint_send(endpoint,frag); } @@ -466,6 +468,7 @@ int mca_btl_tcp_get( frag->hdr.base.tag = MCA_BTL_TAG_BTL; frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_GET; frag->hdr.count = frag->base.des_src_cnt; + if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); return mca_btl_tcp_endpoint_send(endpoint,frag); } diff --git a/ompi/mca/btl/tcp/btl_tcp_component.c b/ompi/mca/btl/tcp/btl_tcp_component.c index f1499f61fa..afa322bcd4 100644 --- a/ompi/mca/btl/tcp/btl_tcp_component.c +++ b/ompi/mca/btl/tcp/btl_tcp_component.c @@ -601,6 +601,7 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) close(sd); return; } + OMPI_PROCESS_NAME_NTOH(guid); /* now set socket up to be non-blocking */ if((flags = fcntl(sd, F_GETFL, 0)) < 0) { diff --git a/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/ompi/mca/btl/tcp/btl_tcp_endpoint.c index 3775f37ce6..50820050b0 100644 --- a/ompi/mca/btl/tcp/btl_tcp_endpoint.c +++ b/ompi/mca/btl/tcp/btl_tcp_endpoint.c @@ -277,8 +277,11 @@ static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_en { /* send process identifier to remote endpoint */ mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local(); - if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &btl_proc->proc_name, sizeof(btl_proc->proc_name)) != - sizeof(btl_proc->proc_name)) { + orte_process_name_t guid = btl_proc->proc_name; + + OMPI_PROCESS_NAME_HTON(guid); + if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) != + sizeof(guid)) { return OMPI_ERR_UNREACH; } return OMPI_SUCCESS; @@ -438,6 +441,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en if((mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &guid, sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) { return OMPI_ERR_UNREACH; } + OMPI_PROCESS_NAME_NTOH(guid); /* compare this to the expected values */ if(memcmp(&btl_proc->proc_name, &guid, sizeof(orte_process_name_t)) != 0) { diff --git a/ompi/mca/btl/tcp/btl_tcp_frag.c b/ompi/mca/btl/tcp/btl_tcp_frag.c index 70abf5c813..4a973b6c45 100644 --- a/ompi/mca/btl/tcp/btl_tcp_frag.c +++ b/ompi/mca/btl/tcp/btl_tcp_frag.c @@ -206,6 +206,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) /* read header */ if(frag->iov_cnt == 0) { + if (btl_endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_NTOH(frag->hdr); switch(frag->hdr.type) { case MCA_BTL_TCP_HDR_TYPE_SEND: if(frag->iov_idx == 1 && frag->hdr.size) { diff --git a/ompi/mca/btl/tcp/btl_tcp_hdr.h b/ompi/mca/btl/tcp/btl_tcp_hdr.h index 89836d1de0..55b2a69e8c 100644 --- a/ompi/mca/btl/tcp/btl_tcp_hdr.h +++ b/ompi/mca/btl/tcp/btl_tcp_hdr.h @@ -23,6 +23,7 @@ #include "ompi_config.h" #include "ompi/mca/btl/base/base.h" #include "btl_tcp.h" +#include "opal/types.h" #if defined(c_plusplus) || defined(__cplusplus) extern "C" { @@ -45,6 +46,18 @@ struct mca_btl_tcp_hdr_t { }; typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t; +#define MCA_BTL_TCP_HDR_HTON(hdr) \ + do { \ + hdr.count = htons(hdr.count); \ + hdr.size = hton64(hdr.size); \ + } while (0) + +#define MCA_BTL_TCP_HDR_NTOH(hdr) \ + do { \ + hdr.count = ntohs(hdr.count); \ + hdr.size = ntoh64(hdr.size); \ + } while (0) + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/ompi/mca/btl/tcp/btl_tcp_proc.c b/ompi/mca/btl/tcp/btl_tcp_proc.c index 1c17aedb25..1d445738c6 100644 --- a/ompi/mca/btl/tcp/btl_tcp_proc.c +++ b/ompi/mca/btl/tcp/btl_tcp_proc.c @@ -28,6 +28,7 @@ #include "orte/class/orte_proc_table.h" #include "ompi/mca/btl/base/btl_base_error.h" #include "ompi/mca/pml/base/pml_base_module_exchange.h" +#include "ompi/datatype/dt_arch.h" #include "btl_tcp.h" #include "btl_tcp_proc.h" @@ -172,6 +173,20 @@ int mca_btl_tcp_proc_insert( size_t i; unsigned long net1; +#ifndef WORDS_BIGENDIAN + /* if we are little endian and our peer is not so lucky, then we + need to put all information sent to him in big endian (aka + Network Byte Order) and expect all information received to + be in NBO. Since big endian machines always send and receive + in NBO, we don't care so much about that case. */ + if (btl_proc->proc_ompi->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + printf("setting BIGENDIAN\n"); + btl_endpoint->endpoint_nbo = true; + } else { + printf("not setting big endian\n"); + } +#endif + /* insert into endpoint array */ btl_endpoint->endpoint_proc = btl_proc; btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h index 657a7cf7cb..5363d2d584 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h @@ -21,6 +21,7 @@ #include #endif +#include "opal/types.h" #define OMPI_OSC_PT2PT_HDR_PUT 0x0001 #define OMPI_OSC_PT2PT_HDR_ACC 0x0002 @@ -134,43 +135,4 @@ typedef struct ompi_osc_pt2pt_control_header_t ompi_osc_pt2pt_control_header_t; (hdr).hdr_value[1] = ntohl((hdr).hdr_value[1]); \ } while (0) - -/* - * Convert a 64 bit value to network byte order. - */ -static inline uint64_t hton64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = htonl(w.l[1]); - r.l[1] = htonl(w.l[0]); - return r.ll; -} - - -/* - * Convert a 64 bit value from network to host byte order. - */ -static inline uint64_t ntoh64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = ntohl(w.l[1]); - r.l[1] = ntohl(w.l[0]); - return r.ll; -} - - #endif /* OMPI_MCA_OSC_PT2PT_HDR_H */ diff --git a/ompi/mca/pml/dr/pml_dr_hdr.h b/ompi/mca/pml/dr/pml_dr_hdr.h index f45e9ce9b2..5438076ca7 100644 --- a/ompi/mca/pml/dr/pml_dr_hdr.h +++ b/ompi/mca/pml/dr/pml_dr_hdr.h @@ -29,6 +29,8 @@ #include #endif +#include "opal/types.h" + #define MCA_PML_DR_HDR_TYPE_MATCH 1 #define MCA_PML_DR_HDR_TYPE_RNDV 2 #define MCA_PML_DR_HDR_TYPE_ACK 3 @@ -39,46 +41,6 @@ #define MCA_PML_DR_HDR_FLAGS_MATCH 4 /* is the ack in response to a match */ -/* - * Convert a 64 bit value to network byte order. - */ - -static inline uint64_t hton64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = htonl(w.l[1]); - r.l[1] = htonl(w.l[0]); - return r.ll; -} - - -/* - * Convert a 64 bit value from network to host byte order. - */ - -static inline uint64_t ntoh64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = ntohl(w.l[1]); - r.l[1] = ntohl(w.l[0]); - return r.ll; -} - - /** * Common hdr attributes - must be first element in each hdr type */ diff --git a/ompi/mca/pml/ob1/pml_ob1_hdr.h b/ompi/mca/pml/ob1/pml_ob1_hdr.h index bd54288944..b2993e6238 100644 --- a/ompi/mca/pml/ob1/pml_ob1_hdr.h +++ b/ompi/mca/pml/ob1/pml_ob1_hdr.h @@ -29,6 +29,8 @@ #include #endif +#include "opal/types.h" + #define MCA_PML_OB1_HDR_TYPE_MATCH 1 #define MCA_PML_OB1_HDR_TYPE_RNDV 2 #define MCA_PML_OB1_HDR_TYPE_RGET 3 @@ -45,47 +47,6 @@ #define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */ #define MCA_PML_OB1_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ - -/* - * Convert a 64 bit value to network byte order. - */ - -static inline uint64_t hton64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = htonl(w.l[1]); - r.l[1] = htonl(w.l[0]); - return r.ll; -} - - -/* - * Convert a 64 bit value from network to host byte order. - */ - -static inline uint64_t ntoh64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = ntohl(w.l[1]); - r.l[1] = ntohl(w.l[0]); - return r.ll; -} - - /** * Common hdr attributes - must be first element in each hdr type */ @@ -203,13 +164,13 @@ typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t; #define MCA_PML_OB1_ACK_HDR_NTOH(h) \ do { \ MCA_PML_OB1_COMMON_HDR_NTOH(h.hdr_common); \ - (h).hdr_dst_size = ntoh64((h).hdr_dst_size); \ + (h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \ } while (0) #define MCA_PML_OB1_ACK_HDR_HTON(h) \ do { \ MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_dst_size = hton64((h).hdr_dst_size); \ + (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ } while (0) /** @@ -236,6 +197,16 @@ struct mca_pml_ob1_fin_hdr_t { }; typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t; +#define MCA_PML_OB1_FIN_HDR_NTOH(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_NTOH(h.hdr_common); \ + } while (0) + +#define MCA_PML_OB1_FIN_HDR_HTON(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ + } while (0) + /** * Union of defined hdr types. */ diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 7c0e07221c..8192237542 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -33,6 +33,7 @@ #include "pml_ob1_recvreq.h" #include "pml_ob1_sendreq.h" #include "pml_ob1_hdr.h" +#include "ompi/datatype/dt_arch.h" @@ -68,17 +69,45 @@ void mca_pml_ob1_recv_frag_callback( return; } + /* hdr_type and hdr_flags are uint8_t, so no endian problems */ switch(hdr->hdr_common.hdr_type) { case MCA_PML_OB1_HDR_TYPE_MATCH: + { +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + if (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO) { + MCA_PML_OB1_MATCH_HDR_NTOH(hdr->hdr_match); + } +#endif + mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt); + break; + } case MCA_PML_OB1_HDR_TYPE_RNDV: + { +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + if (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO) { + MCA_PML_OB1_RNDV_HDR_NTOH(hdr->hdr_rndv); + } +#endif + mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt); + break; + } case MCA_PML_OB1_HDR_TYPE_RGET: { +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* BWB - FIX ME - Tim, what do I do with rget? */ +#endif mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt); break; } case MCA_PML_OB1_HDR_TYPE_ACK: { - mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*) + mca_pml_ob1_send_request_t* sendreq; +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + if (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO) { + MCA_PML_OB1_ACK_HDR_NTOH(hdr->hdr_ack); + } +#endif + sendreq = (mca_pml_ob1_send_request_t*) hdr->hdr_ack.hdr_src_req.pval; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset; @@ -87,21 +116,37 @@ void mca_pml_ob1_recv_frag_callback( } case MCA_PML_OB1_HDR_TYPE_FRAG: { - mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*) + mca_pml_ob1_recv_request_t* recvreq; +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + if (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO) { + MCA_PML_OB1_FRAG_HDR_NTOH(hdr->hdr_frag); + } +#endif + recvreq = (mca_pml_ob1_recv_request_t*) hdr->hdr_frag.hdr_dst_req.pval; mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt); break; } case MCA_PML_OB1_HDR_TYPE_PUT: { - mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*) + mca_pml_ob1_send_request_t* sendreq; +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* BWB - FIX ME - Tim, what do I do with rdma headers? */ +#endif + sendreq = (mca_pml_ob1_send_request_t*) hdr->hdr_rdma.hdr_req.pval; mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma); break; } case MCA_PML_OB1_HDR_TYPE_FIN: { - mca_btl_base_descriptor_t* rdma = (mca_btl_base_descriptor_t*) + mca_btl_base_descriptor_t* rdma; +#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT + if (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO) { + MCA_PML_OB1_FIN_HDR_NTOH(hdr->hdr_fin); + } +#endif + rdma = (mca_btl_base_descriptor_t*) hdr->hdr_fin.hdr_des.pval; rdma->des_cbfunc(btl, NULL, rdma, OMPI_SUCCESS); break; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index f4c3a09fa9..4cb1796c1b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -29,7 +29,8 @@ #include "pml_ob1_rdmafrag.h" #include "ompi/mca/bml/base/base.h" #include "orte/mca/errmgr/errmgr.h" - +#include "ompi/datatype/dt_arch.h" + static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc( mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc); @@ -264,6 +265,18 @@ static void mca_pml_ob1_recv_request_ack( ack->hdr_dst_req.pval = recvreq; ack->hdr_rdma_offset = recvreq->req_rdma_offset; +#ifdef WORDS_BIGENDIAN + ack->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (recvreq->req_recv.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + ack->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_ACK_HDR_HTON(*ack); + } +#endif + /* initialize descriptor */ des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; des->des_cbfunc = mca_pml_ob1_ctl_completion; @@ -350,6 +363,18 @@ static void mca_pml_ob1_rget_completion( hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; hdr->hdr_des = frag->rdma_hdr.hdr_rget.hdr_des; + +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (recvreq->req_recv.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_FIN_HDR_HTON(*hdr); + } +#endif /* queue request */ rc = mca_bml_base_send( @@ -716,6 +741,17 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq) hdr->hdr_seg_cnt = dst->des_dst_cnt; memcpy(hdr->hdr_segs, dst->des_dst, dst->des_dst_cnt * sizeof(mca_btl_base_segment_t)); +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (recvreq->req_recv.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + /* BWB - FIX ME - TIM, what do we do here? */ + } +#endif + /* update request state */ recvreq->req_rdma_offset += size; OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,1); diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 3b926e5386..c36e1b15a4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -33,6 +33,7 @@ #include "pml_ob1_recvreq.h" #include "pml_ob1_endpoint.h" #include "ompi/mca/bml/base/base.h" +#include "ompi/datatype/dt_arch.h" static int mca_pml_ob1_send_request_fini(struct ompi_request_t** request) @@ -336,6 +337,18 @@ int mca_pml_ob1_send_request_start_buffered( hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_rndv.hdr_src_req.pval = sendreq; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_RNDV_HDR_HTON(hdr->hdr_rndv); + } +#endif + /* update lengths */ segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data; sendreq->req_send_offset = max_data; @@ -437,6 +450,18 @@ int mca_pml_ob1_send_request_start_copy( hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_MATCH_HDR_HTON(hdr->hdr_match); + } +#endif + /* update lengths */ segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data; sendreq->req_send_offset = max_data; @@ -497,6 +522,18 @@ int mca_pml_ob1_send_request_start_prepare( hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_MATCH_HDR_HTON(hdr->hdr_match); + } +#endif + /* short message */ descriptor->des_cbfunc = mca_pml_ob1_match_completion_free; @@ -578,6 +615,19 @@ int mca_pml_ob1_send_request_start_rdma( hdr->hdr_rndv.hdr_src_req.pval = sendreq; hdr->hdr_rget.hdr_des.pval = src; hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt; + +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + /* BWB - FIX ME - Ask Tim what we are supposed to do in this case */ + return OMPI_ERR_NOT_SUPPORTED; + } +#endif + for(i=0; ides_src_cnt; i++) hdr->hdr_rget.hdr_segs[i] = src->des_src[i]; des->des_cbfunc = mca_pml_ob1_ctl_completion; @@ -605,6 +655,18 @@ int mca_pml_ob1_send_request_start_rdma( hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_rndv.hdr_src_req.pval = sendreq; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_RNDV_HDR_HTON(hdr->hdr_rndv); + } +#endif + /* update lengths with number of bytes actually packed */ segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t); sendreq->req_send_offset = 0; @@ -674,6 +736,18 @@ int mca_pml_ob1_send_request_start_rndv( hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_rndv.hdr_src_req.pval = sendreq; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_RNDV_HDR_HTON(hdr->hdr_rndv); + } +#endif + /* first fragment of a long message */ des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; des->des_cbdata = sendreq; @@ -796,6 +870,18 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq) hdr->hdr_src_req.pval = sendreq; hdr->hdr_dst_req = sendreq->req_recv; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_FRAG_HDR_HTON(*hdr); + } +#endif + /* update state */ sendreq->req_send_offset += size; OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1); @@ -897,6 +983,18 @@ static void mca_pml_ob1_put_completion( hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; hdr->hdr_des = frag->rdma_hdr.hdr_rdma.hdr_des; +#ifdef WORDS_BIGENDIAN + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT + /* if we are little endian and the remote side is big endian, + we're responsible for making sure the data is in network byte + order */ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; + MCA_PML_OB1_FIN_HDR_HTON(*hdr); + } +#endif + /* queue request */ rc = mca_bml_base_send( bml_btl, diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 302a2d0330..0f813a1548 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -29,6 +29,7 @@ #include "pml_ob1_hdr.h" #include "pml_ob1_rdma.h" #include "ompi/datatype/convertor.h" +#include "ompi/datatype/dt_arch.h" #include "ompi/mca/bml/bml.h" #if defined(c_plusplus) || defined(__cplusplus) @@ -108,6 +109,22 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t); * Start a send request. */ +#ifdef WORDS_BIGENDIAN +#define MCA_PML_OB1_SEND_REQUEST_START_MATCH_FIXUP(sendreq, hdr) \ + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; +#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT +#define MCA_PML_OB1_SEND_REQUEST_START_MATCH_FIXUP(sendreq, hdr) \ + do { \ + if (sendreq->req_send.req_base.req_proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) { \ + hdr->hdr_common.hdr_flags |= MCA_PML_OB1_HDR_FLAGS_NBO; \ + MCA_PML_OB1_MATCH_HDR_HTON(hdr->hdr_match); \ + } \ + } while (0) +#else +#define MCA_PML_OB1_SEND_REQUEST_START_MATCH_FIXUP(sendreq, hdr) +#endif + + #define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \ do { \ mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \ @@ -173,6 +190,7 @@ do { hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \ hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \ hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \ + MCA_PML_OB1_SEND_REQUEST_START_MATCH_FIXUP(sendreq, hdr); \ \ /* short message */ \ descriptor->des_cbfunc = mca_pml_ob1_match_completion_cache; \ diff --git a/opal/include/opal/types.h b/opal/include/opal/types.h index cb9304975e..9cf1a6dc39 100644 --- a/opal/include/opal/types.h +++ b/opal/include/opal/types.h @@ -103,4 +103,42 @@ typedef socklen_t ompi_socklen_t; typedef int ompi_socklen_t; #endif + +/* + * Convert a 64 bit value to network byte order. + */ +static inline uint64_t hton64(uint64_t val) +{ + union { uint64_t ll; + uint32_t l[2]; + } w, r; + + /* platform already in network byte order? */ + if(htonl(1) == 1L) + return val; + w.ll = val; + r.l[0] = htonl(w.l[1]); + r.l[1] = htonl(w.l[0]); + return r.ll; +} + +/* + * Convert a 64 bit value from network to host byte order. + */ + +static inline uint64_t ntoh64(uint64_t val) +{ + union { uint64_t ll; + uint32_t l[2]; + } w, r; + + /* platform already in network byte order? */ + if(htonl(1) == 1L) + return val; + w.ll = val; + r.l[0] = ntohl(w.l[1]); + r.l[1] = ntohl(w.l[0]); + return r.ll; +} + #endif diff --git a/orte/mca/ns/ns_types.h b/orte/mca/ns/ns_types.h index 4d97ea29b2..704f9435cf 100644 --- a/orte/mca/ns/ns_types.h +++ b/orte/mca/ns/ns_types.h @@ -89,6 +89,52 @@ typedef struct orte_process_name_t orte_process_name_t; extern orte_process_name_t orte_name_all; #define ORTE_NAME_ALL &orte_name_all +#if SIZEOF_SIZE_T == 8 + +/** + * Convert process name from host to network byte order. + * + * @param name + */ +#define OMPI_PROCESS_NAME_HTON(n) \ + n.cellid = hton64(n.cellid); \ + n.jobid = hton64(n.jobid); \ + n.vpid = hton64(n.vpid); + +/** + * Convert process name from network to host byte order. + * + * @param name + */ +#define OMPI_PROCESS_NAME_NTOH(n) \ + n.cellid = ntoh64(n.cellid); \ + n.jobid = ntoh64(n.jobid); \ + n.vpid = ntoh64(n.vpid); + +#else + +/** + * Convert process name from host to network byte order. + * + * @param name + */ +#define OMPI_PROCESS_NAME_HTON(n) \ + n.cellid = htonl(n.cellid); \ + n.jobid = htonl(n.jobid); \ + n.vpid = htonl(n.vpid); + +/** + * Convert process name from network to host byte order. + * + * @param name + */ +#define OMPI_PROCESS_NAME_NTOH(n) \ + n.cellid = ntohl(n.cellid); \ + n.jobid = ntohl(n.jobid); \ + n.vpid = ntohl(n.vpid); + +#endif + /** List of names for general use */ diff --git a/orte/mca/oob/tcp/oob_tcp.h b/orte/mca/oob/tcp/oob_tcp.h index 1d27af8e76..82b0ad66f7 100644 --- a/orte/mca/oob/tcp/oob_tcp.h +++ b/orte/mca/oob/tcp/oob_tcp.h @@ -61,88 +61,6 @@ int mca_oob_tcp_init(void); */ int mca_oob_tcp_fini(void); -#if SIZEOF_SIZE_T == 8 -/* - * Convert a 64 bit value to network byte order. - */ -static inline uint64_t hton64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = htonl(w.l[1]); - r.l[1] = htonl(w.l[0]); - return r.ll; -} - -/* - * Convert a 64 bit value from network to host byte order. - */ - -static inline uint64_t ntoh64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = ntohl(w.l[1]); - r.l[1] = ntohl(w.l[0]); - return r.ll; -} - -/** - * Convert process name from host to network byte order. - * - * @param name - */ -#define OMPI_PROCESS_NAME_HTON(n) \ - n.cellid = hton64(n.cellid); \ - n.jobid = hton64(n.jobid); \ - n.vpid = hton64(n.vpid); - -/** - * Convert process name from network to host byte order. - * - * @param name - */ -#define OMPI_PROCESS_NAME_NTOH(n) \ - n.cellid = ntoh64(n.cellid); \ - n.jobid = ntoh64(n.jobid); \ - n.vpid = ntoh64(n.vpid); - -#else - -/** - * Convert process name from host to network byte order. - * - * @param name - */ -#define OMPI_PROCESS_NAME_HTON(n) \ - n.cellid = htonl(n.cellid); \ - n.jobid = htonl(n.jobid); \ - n.vpid = htonl(n.vpid); - -/** - * Convert process name from network to host byte order. - * - * @param name - */ -#define OMPI_PROCESS_NAME_NTOH(n) \ - n.cellid = ntohl(n.cellid); \ - n.jobid = ntohl(n.jobid); \ - n.vpid = ntohl(n.vpid); - -#endif - /** * Compare two process names for equality. *