1
1

Continue to cleanup the csum pml module. Some minor corrections and debug output added.

This commit was SVN r20894.
Этот коммит содержится в:
Ralph Castain 2009-03-29 23:27:06 +00:00
родитель 5498f93a50
Коммит d5e6104035
9 изменённых файлов: 141 добавлений и 233 удалений

Просмотреть файл

@ -28,8 +28,13 @@
#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \ #define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
do { \ do { \
volatile uint32_t __csum; \ volatile uint32_t __csum; \
volatile uint32_t __ui1, __ui2; \
__csum = (CONVERTOR)->checksum; \ __csum = (CONVERTOR)->checksum; \
__ui1 = (CONVERTOR)->csum_ui1; \
__ui2 = (CONVERTOR)->csum_ui2; \
(CONVERTOR)->checksum += OPAL_CSUM_BCOPY_PARTIAL( (SRC), (DST), (BLENGTH), (BLENGTH), &(CONVERTOR)->csum_ui1, &(CONVERTOR)->csum_ui2 ); \ (CONVERTOR)->checksum += OPAL_CSUM_BCOPY_PARTIAL( (SRC), (DST), (BLENGTH), (BLENGTH), &(CONVERTOR)->csum_ui1, &(CONVERTOR)->csum_ui2 ); \
(CONVERTOR)->csum_ui1 = __ui1; \
(CONVERTOR)->csum_ui2 = __ui2; \
__csum += OPAL_CSUM_PARTIAL( (DST), (BLENGTH), &(CONVERTOR)->csum_ui1, &(CONVERTOR)->csum_ui2); \ __csum += OPAL_CSUM_PARTIAL( (DST), (BLENGTH), &(CONVERTOR)->csum_ui1, &(CONVERTOR)->csum_ui2); \
if (__csum != (CONVERTOR)->checksum) { \ if (__csum != (CONVERTOR)->checksum) { \
opal_output(0, "%s:%d:csum2: Invalid \'MEMCPY_CSUM check\' - dst csum:0x%04x != src csum:0x%04x\n", __FILE__, __LINE__, __csum, (CONVERTOR)->checksum); \ opal_output(0, "%s:%d:csum2: Invalid \'MEMCPY_CSUM check\' - dst csum:0x%04x != src csum:0x%04x\n", __FILE__, __LINE__, __csum, (CONVERTOR)->checksum); \

Просмотреть файл

@ -30,6 +30,7 @@
#include "opal/class/opal_bitmap.h" #include "opal/class/opal_bitmap.h"
#include "opal/util/crc.h" #include "opal/util/crc.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/mca/paffinity/paffinity.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/grpcomm.h"
@ -289,10 +290,14 @@ int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs)
if(nprocs == 0) if(nprocs == 0)
return OMPI_SUCCESS; return OMPI_SUCCESS;
/* we don't have any endpoint data we need to cache on the
ompi_proc_t, so set proc_pml to NULL */
for (i = 0 ; i < nprocs ; ++i) { for (i = 0 ; i < nprocs ; ++i) {
/* we don't have any endpoint data we need to cache on the
ompi_proc_t, so set proc_pml to NULL */
procs[i]->proc_pml = NULL; procs[i]->proc_pml = NULL;
/* if the proc isn't local, tell the convertor to
* checksum the data
*/
procs[i]->proc_convertor->flags |= CONVERTOR_WITH_CHECKSUM;
} }
OBJ_CONSTRUCT(&reachable, opal_bitmap_t); OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
@ -371,6 +376,10 @@ int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs)
cleanup_and_return: cleanup_and_return:
OBJ_DESTRUCT(&reachable); OBJ_DESTRUCT(&reachable);
for (i=0; i < nprocs; i++) {
opal_output(0, "procs[%lu]->cflags = %04x", (unsigned long) i,
procs[i]->proc_convertor->flags);
}
return rc; return rc;
} }
@ -432,7 +441,6 @@ int mca_pml_csum_send_fin( ompi_proc_t* proc,
mca_btl_base_descriptor_t* fin; mca_btl_base_descriptor_t* fin;
mca_pml_csum_fin_hdr_t* hdr; mca_pml_csum_fin_hdr_t* hdr;
int rc; int rc;
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t), mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
@ -450,14 +458,12 @@ int mca_pml_csum_send_fin( ompi_proc_t* proc,
hdr->hdr_common.hdr_csum = 0; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_des.pval = hdr_des; hdr->hdr_des.pval = hdr_des;
hdr->hdr_fail = status; hdr->hdr_fail = status;
hdr->hdr_common.hdr_csum = (do_csum ? hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t)) : OPAL_CSUM_ZERO);
if(do_csum) { OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output,
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output, "%s: Sending \'FIN\' with header csum:0x%04x\n",
"%s: Sending \'FIN\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr->hdr_common.hdr_csum));
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr->hdr_common.hdr_csum));
}
csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FIN, proc); csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FIN, proc);
/* queue request */ /* queue request */

Просмотреть файл

@ -168,6 +168,8 @@ mca_pml_csum_component_init( int* priority,
{ {
opal_output_verbose( 10, 0, "in csum, my priority is 0\n"); opal_output_verbose( 10, 0, "in csum, my priority is 0\n");
opal_output(0, "hdr size %d", (int)sizeof(mca_pml_csum_match_hdr_t));
/* select us only if we are specified */ /* select us only if we are specified */
if((*priority) > 0) { if((*priority) > 0) {
*priority = 0; *priority = 0;

Просмотреть файл

@ -73,42 +73,15 @@ typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t;
struct mca_pml_csum_match_hdr_t { struct mca_pml_csum_match_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint16_t hdr_ctx; /**< communicator index */ uint16_t hdr_ctx; /**< communicator index */
uint16_t hdr_seq; /**< message sequence number */
int32_t hdr_src; /**< source rank */ int32_t hdr_src; /**< source rank */
int32_t hdr_tag; /**< user tag */ int32_t hdr_tag; /**< user tag */
uint16_t hdr_seq; /**< message sequence number */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[12]; /**< explicitly pad to 16-byte boundary. Compilers seem to already prefer to do this, but make it explicit just in case */
#endif
uint32_t hdr_csum; /**< checksum over data */ uint32_t hdr_csum; /**< checksum over data */
}; };
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#define OMPI_PML_CSUM_MATCH_HDR_LEN 32
#else
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20 #define OMPI_PML_CSUM_MATCH_HDR_LEN 20
#endif
typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t; typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_MATCH_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
(h).hdr_padding[4] = 0; \
(h).hdr_padding[5] = 0; \
(h).hdr_padding[6] = 0; \
(h).hdr_padding[7] = 0; \
(h).hdr_padding[8] = 0; \
(h).hdr_padding[9] = 0; \
(h).hdr_padding[10] = 0; \
(h).hdr_padding[11] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_MATCH_HDR_FILL(h)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG */
#define MCA_PML_CSUM_MATCH_HDR_NTOH(h) \ #define MCA_PML_CSUM_MATCH_HDR_NTOH(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
@ -122,7 +95,6 @@ do { \
#define MCA_PML_CSUM_MATCH_HDR_HTON(h) \ #define MCA_PML_CSUM_MATCH_HDR_HTON(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
MCA_PML_CSUM_MATCH_HDR_FILL(h); \
(h).hdr_ctx = htons((h).hdr_ctx); \ (h).hdr_ctx = htons((h).hdr_ctx); \
(h).hdr_src = htonl((h).hdr_src); \ (h).hdr_src = htonl((h).hdr_src); \
(h).hdr_tag = htonl((h).hdr_tag); \ (h).hdr_tag = htonl((h).hdr_tag); \
@ -142,13 +114,6 @@ struct mca_pml_csum_rendezvous_hdr_t {
}; };
typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t; typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_RNDV_HDR_FILL(h) \
MCA_PML_CSUM_MATCH_HDR_FILL((h).hdr_match)
#else
#define MCA_PML_CSUM_RNDV_HDR_FILL(h)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG */
/* Note that hdr_src_req is not put in network byte order because it /* Note that hdr_src_req is not put in network byte order because it
is never processed by the receiver, other than being copied into is never processed by the receiver, other than being copied into
the ack header */ the ack header */
@ -161,7 +126,6 @@ typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t;
#define MCA_PML_CSUM_RNDV_HDR_HTON(h) \ #define MCA_PML_CSUM_RNDV_HDR_HTON(h) \
do { \ do { \
MCA_PML_CSUM_MATCH_HDR_HTON((h).hdr_match); \ MCA_PML_CSUM_MATCH_HDR_HTON((h).hdr_match); \
MCA_PML_CSUM_RNDV_HDR_FILL(h); \
(h).hdr_msg_length = hton64((h).hdr_msg_length); \ (h).hdr_msg_length = hton64((h).hdr_msg_length); \
} while (0) } while (0)
@ -182,7 +146,6 @@ typedef struct mca_pml_csum_rget_hdr_t mca_pml_csum_rget_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_RGET_HDR_FILL(h) \ #define MCA_PML_CSUM_RGET_HDR_FILL(h) \
do { \ do { \
MCA_PML_CSUM_RNDV_HDR_FILL((h).hdr_rndv); \
(h).hdr_padding[0] = 0; \ (h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \ (h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \ (h).hdr_padding[2] = 0; \
@ -210,26 +173,13 @@ do { \
*/ */
struct mca_pml_csum_frag_hdr_t { struct mca_pml_csum_frag_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT uint32_t hdr_csum;
uint8_t hdr_padding[2]; /**< align to 16-byte boundary */
#endif
uint64_t hdr_frag_offset; /**< offset into message */ uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_req; /**< pointer to source request */ ompi_ptr_t hdr_src_req; /**< pointer to source request */
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */ ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
uint32_t hdr_csum;
}; };
typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t; typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_FRAG_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_FRAG_HDR_FILL(h)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG */
#define MCA_PML_CSUM_FRAG_HDR_NTOH(h) \ #define MCA_PML_CSUM_FRAG_HDR_NTOH(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
@ -241,7 +191,6 @@ do { \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_csum = htonl((h).hdr_csum); \ (h).hdr_csum = htonl((h).hdr_csum); \
MCA_PML_CSUM_FRAG_HDR_FILL(h); \
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
} while (0) } while (0)
@ -252,7 +201,7 @@ do { \
struct mca_pml_csum_ack_hdr_t { struct mca_pml_csum_ack_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[6]; uint8_t hdr_padding[4];
#endif #endif
ompi_ptr_t hdr_src_req; /**< source request */ ompi_ptr_t hdr_src_req; /**< source request */
ompi_ptr_t hdr_dst_req; /**< matched receive request */ ompi_ptr_t hdr_dst_req; /**< matched receive request */
@ -267,8 +216,6 @@ do { \
(h).hdr_padding[1] = 0; \ (h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \ (h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \ (h).hdr_padding[3] = 0; \
(h).hdr_padding[4] = 0; \
(h).hdr_padding[5] = 0; \
} while (0) } while (0)
#else #else
#define MCA_PML_CSUM_ACK_HDR_FILL(h) #define MCA_PML_CSUM_ACK_HDR_FILL(h)
@ -297,9 +244,6 @@ do { \
struct mca_pml_csum_rdma_hdr_t { struct mca_pml_csum_rdma_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
#endif
uint32_t hdr_seg_cnt; /**< number of segments for rdma */ uint32_t hdr_seg_cnt; /**< number of segments for rdma */
ompi_ptr_t hdr_req; /**< destination request */ ompi_ptr_t hdr_req; /**< destination request */
ompi_ptr_t hdr_des; /**< source descriptor */ ompi_ptr_t hdr_des; /**< source descriptor */
@ -308,16 +252,6 @@ struct mca_pml_csum_rdma_hdr_t {
}; };
typedef struct mca_pml_csum_rdma_hdr_t mca_pml_csum_rdma_hdr_t; typedef struct mca_pml_csum_rdma_hdr_t mca_pml_csum_rdma_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_RDMA_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_RDMA_HDR_FILL(h)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG */
#define MCA_PML_CSUM_RDMA_HDR_NTOH(h) \ #define MCA_PML_CSUM_RDMA_HDR_NTOH(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
@ -328,7 +262,6 @@ do { \
#define MCA_PML_CSUM_RDMA_HDR_HTON(h) \ #define MCA_PML_CSUM_RDMA_HDR_HTON(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
MCA_PML_CSUM_RDMA_HDR_FILL(h); \
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
(h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
} while (0) } while (0)
@ -339,37 +272,24 @@ do { \
struct mca_pml_csum_fin_hdr_t { struct mca_pml_csum_fin_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT uint32_t hdr_csum;
uint8_t hdr_padding[6];
#endif
ompi_ptr_t hdr_des; /**< completed descriptor */ ompi_ptr_t hdr_des; /**< completed descriptor */
uint32_t hdr_fail; /**< RDMA operation failed */ uint32_t hdr_fail; /**< RDMA operation failed */
}; };
typedef struct mca_pml_csum_fin_hdr_t mca_pml_csum_fin_hdr_t; typedef struct mca_pml_csum_fin_hdr_t mca_pml_csum_fin_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_FIN_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
(h).hdr_padding[4] = 0; \
(h).hdr_padding[5] = 0; \
} while (0)
#else
#define MCA_PML_CSUM_FIN_HDR_FILL(h)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG */
#define MCA_PML_CSUM_FIN_HDR_NTOH(h) \ #define MCA_PML_CSUM_FIN_HDR_NTOH(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_csum = ntohl((h).hdr_csum); \
(h).hdr_fail = ntohl((h).hdr_fail); \
} while (0) } while (0)
#define MCA_PML_CSUM_FIN_HDR_HTON(h) \ #define MCA_PML_CSUM_FIN_HDR_HTON(h) \
do { \ do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
MCA_PML_CSUM_FIN_HDR_FILL(h); \ (h).hdr_csum = htonl((h).hdr_csum); \
(h).hdr_fail = htonl((h).hdr_fail); \
} while (0) } while (0)
/** /**

Просмотреть файл

@ -121,26 +121,28 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
size_t bytes_received = 0; size_t bytes_received = 0;
uint16_t csum_received, csum=0; uint16_t csum_received, csum=0;
uint32_t csum_data; uint32_t csum_data;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) { if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) {
return; return;
} }
csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH); csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH);
if (do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_match_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_match_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received;
opal_output(0, "%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x",
if (csum_received != csum) { ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__,
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n", hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); hdr->hdr_ctx, hdr->hdr_seq, hdr->hdr_src, hdr->hdr_tag, hdr->hdr_csum);
orte_errmgr.abort(-1,NULL);
} if (csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
orte_errmgr.abort(-1,NULL);
} }
/* communicator pointer */ /* communicator pointer */
comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
if(OPAL_UNLIKELY(NULL == comm_ptr)) { if(OPAL_UNLIKELY(NULL == comm_ptr)) {
@ -254,12 +256,12 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
match->req_recv.req_base.req_datatype); match->req_recv.req_base.req_datatype);
); );
} }
if (do_csum && bytes_received > 0) { if (bytes_received > 0) {
csum_data = match->req_recv.req_base.req_convertor.checksum; csum_data = match->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr->hdr_csum, csum, (unsigned long)bytes_received)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr->hdr_csum, csum_received, (unsigned long)bytes_received));
if (csum_data != hdr->hdr_csum) { if (csum_data != hdr->hdr_csum) {
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n", opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
@ -292,24 +294,22 @@ void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments = des->des_dst; mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint16_t csum_received, csum; uint16_t csum_received, csum;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return; return;
} }
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV); csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV);
if (do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received; if (csum_received != csum) {
if (csum_received != csum) { opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); orte_errmgr.abort(-1,NULL);
orte_errmgr.abort(-1,NULL);
}
} }
mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments, mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments,
des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RNDV); des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RNDV);
return; return;
@ -341,7 +341,6 @@ void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_send_request_t* sendreq; mca_pml_csum_send_request_t* sendreq;
uint16_t csum_received, csum; uint16_t csum_received, csum;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return; return;
@ -349,19 +348,18 @@ void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_ACK); csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_ACK);
if (do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_ack_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_ack_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received; OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output, "%s Received \'ACK\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum));
"%s Received \'ACK\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum)); if (csum_received != csum) {
if (csum_received != csum) { opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); orte_errmgr.abort(-1,NULL);
orte_errmgr.abort(-1,NULL);
}
} }
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
@ -391,25 +389,23 @@ void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_recv_request_t* recvreq; mca_pml_csum_recv_request_t* recvreq;
uint16_t csum_received, csum; uint16_t csum_received, csum;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return; return;
} }
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG); csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG);
if(do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received; if(csum_received != csum) {
if(csum_received != csum) { opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); orte_errmgr.abort(-1,NULL);
orte_errmgr.abort(-1,NULL); }
}
} recvreq = (mca_pml_csum_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
recvreq = (mca_pml_csum_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
mca_pml_csum_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt); mca_pml_csum_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
return; return;
@ -424,7 +420,6 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_send_request_t* sendreq; mca_pml_csum_send_request_t* sendreq;
uint16_t csum_received, csum; uint16_t csum_received, csum;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return; return;
@ -432,19 +427,18 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_PUT); csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_PUT);
if(do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received; OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output, "%s Received \'PUT\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum));
"%s Received \'PUT\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum)); if(csum_received != csum) {
if(csum_received != csum) { opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); orte_errmgr.abort(-1,NULL);
orte_errmgr.abort(-1,NULL);
}
} }
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_rdma.hdr_req.pval; sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
mca_pml_csum_send_request_put(sendreq,btl,&hdr->hdr_rdma); mca_pml_csum_send_request_put(sendreq,btl,&hdr->hdr_rdma);
@ -460,7 +454,6 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_btl_base_descriptor_t* rdma; mca_btl_base_descriptor_t* rdma;
uint16_t csum_received, csum; uint16_t csum_received, csum;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return; return;
@ -468,19 +461,18 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FIN); csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FIN);
if(do_csum) { csum_received = hdr->hdr_common.hdr_csum;
csum_received = hdr->hdr_common.hdr_csum; hdr->hdr_common.hdr_csum = 0;
hdr->hdr_common.hdr_csum = 0; csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t)); hdr->hdr_common.hdr_csum = csum_received;
hdr->hdr_common.hdr_csum = csum_received; OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output, "%s Received \'FIN\' with header csum:0x%04x\n",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),csum));
"%s Received \'FIN\' with header csum:0x%04x\n",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),csum)); if(csum_received != csum) {
if(csum_received != csum) { opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); orte_errmgr.abort(-1,NULL);
orte_errmgr.abort(-1,NULL);
}
} }
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval; rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
rdma->des_cbfunc(btl, NULL, rdma, rdma->des_cbfunc(btl, NULL, rdma,
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS); hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);

Просмотреть файл

@ -223,7 +223,6 @@ int mca_pml_csum_recv_request_ack_send_btl(
mca_btl_base_descriptor_t* des; mca_btl_base_descriptor_t* des;
mca_pml_csum_ack_hdr_t* ack; mca_pml_csum_ack_hdr_t* ack;
int rc; int rc;
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
/* allocate descriptor */ /* allocate descriptor */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
@ -241,8 +240,7 @@ int mca_pml_csum_recv_request_ack_send_btl(
ack->hdr_src_req.lval = hdr_src_req; ack->hdr_src_req.lval = hdr_src_req;
ack->hdr_dst_req.pval = hdr_dst_req; ack->hdr_dst_req.pval = hdr_dst_req;
ack->hdr_send_offset = hdr_send_offset; ack->hdr_send_offset = hdr_send_offset;
ack->hdr_common.hdr_csum = (do_csum ? ack->hdr_common.hdr_csum = opal_csum16(ack, sizeof(mca_pml_csum_ack_hdr_t));
opal_csum16(ack, sizeof(mca_pml_csum_ack_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'ACK\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ack->hdr_common.hdr_csum)); "%s Sending \'ACK\' with header csum:0x%04x\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ack->hdr_common.hdr_csum));
@ -439,7 +437,6 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
size_t data_offset = 0; size_t data_offset = 0;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO; uint32_t csum = OPAL_CSUM_ZERO;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments, MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received ); 0, bytes_received );
@ -471,11 +468,11 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
if (do_csum && bytes_received > 0) { if (bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum; csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Received \'frag\' with data csum:0x%x, frag csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_frag.hdr_csum, (unsigned long)bytes_received));
if(csum != hdr->hdr_frag.hdr_csum) { if(csum != hdr->hdr_frag.hdr_csum) {
opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n", opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum);
@ -577,7 +574,6 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
size_t data_offset = 0; size_t data_offset = 0;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO; uint32_t csum = OPAL_CSUM_ZERO;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments, MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received ); 0, bytes_received );
@ -614,11 +610,11 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
recvreq->req_recv.req_base.req_datatype); recvreq->req_recv.req_base.req_datatype);
); );
} }
if (do_csum && bytes_received > 0) { if (bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum; csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Received \'rndv\' with csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_match.hdr_csum, (unsigned long)bytes_received));
if (csum != hdr->hdr_match.hdr_csum) { if (csum != hdr->hdr_match.hdr_csum) {
opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n", opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
@ -648,7 +644,6 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
size_t data_offset = 0; size_t data_offset = 0;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint32_t csum = OPAL_CSUM_ZERO; uint32_t csum = OPAL_CSUM_ZERO;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments, MCA_PML_CSUM_COMPUTE_SEGMENT_LENGTH( segments, num_segments,
0, bytes_received ); 0, bytes_received );
@ -672,11 +667,11 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
data_offset, data_offset,
bytes_received, bytes_received,
bytes_delivered); bytes_delivered);
if (do_csum && bytes_received > 0) { if (bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum; csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Received \'match\' with csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_match.hdr_csum, (unsigned long)bytes_received));
if (csum != hdr->hdr_match.hdr_csum) { if (csum != hdr->hdr_match.hdr_csum) {
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n", opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
@ -752,7 +747,6 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre
size_t i, prev_bytes_remaining = 0; size_t i, prev_bytes_remaining = 0;
size_t bytes_remaining = recvreq->req_send_offset - size_t bytes_remaining = recvreq->req_send_offset -
recvreq->req_rdma_offset; recvreq->req_rdma_offset;
bool do_csum;
/* if starting bml_btl is provided schedule next fragment on it first */ /* if starting bml_btl is provided schedule next fragment on it first */
if(start_bml_btl != NULL) { if(start_bml_btl != NULL) {
@ -865,9 +859,7 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre
if(!recvreq->req_ack_sent) if(!recvreq->req_ack_sent)
recvreq->req_ack_sent = true; recvreq->req_ack_sent = true;
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM; hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t));
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'PUT\' with header csum:0x%04x\n", "%s Sending \'PUT\' with header csum:0x%04x\n",

Просмотреть файл

@ -197,17 +197,13 @@ extern void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req);
static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req) static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req)
{ {
mca_bml_base_endpoint_t* endpoint =
req->req_recv.req_base.req_proc->proc_bml;
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM;
if( req->req_recv.req_base.req_datatype->size | req->req_recv.req_base.req_count ) { if( req->req_recv.req_base.req_datatype->size | req->req_recv.req_base.req_count ) {
ompi_convertor_copy_and_prepare_for_recv( ompi_convertor_copy_and_prepare_for_recv(
req->req_recv.req_base.req_proc->proc_convertor, req->req_recv.req_base.req_proc->proc_convertor,
req->req_recv.req_base.req_datatype, req->req_recv.req_base.req_datatype,
req->req_recv.req_base.req_count, req->req_recv.req_base.req_count,
req->req_recv.req_base.req_addr, req->req_recv.req_base.req_addr,
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), CONVERTOR_WITH_CHECKSUM,
&req->req_recv.req_base.req_convertor); &req->req_recv.req_base.req_convertor);
ompi_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor, ompi_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
&req->req_bytes_delivered); &req->req_bytes_delivered);

Просмотреть файл

@ -485,9 +485,8 @@ int mca_pml_csum_send_request_start_copy( mca_pml_csum_send_request_t* sendreq,
unsigned int iov_count; unsigned int iov_count;
size_t max_data = size; size_t max_data = size;
int rc; int rc;
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if(NULL != bml_btl->btl->btl_sendi) { if(NULL != bml_btl->btl->btl_sendi && 0) {
mca_pml_csum_match_hdr_t match; mca_pml_csum_match_hdr_t match;
match.hdr_common.hdr_flags = 0; match.hdr_common.hdr_flags = 0;
match.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_MATCH; match.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_MATCH;
@ -496,11 +495,15 @@ int mca_pml_csum_send_request_start_copy( mca_pml_csum_send_request_t* sendreq,
match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
match.hdr_tag = sendreq->req_send.req_base.req_tag; match.hdr_tag = sendreq->req_send.req_base.req_tag;
match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
match.hdr_csum = (size > 0 && do_csum ? match.hdr_csum = (size > 0 ?
sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO);
match.hdr_common.hdr_csum = (do_csum ? match.hdr_common.hdr_csum = opal_csum16(&match, sizeof(mca_pml_csum_match_hdr_t));
opal_csum16(&match, sizeof(mca_pml_csum_match_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__,
hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)max_data));
csum_hdr_hton(&match, MCA_PML_CSUM_HDR_TYPE_MATCH, csum_hdr_hton(&match, MCA_PML_CSUM_HDR_TYPE_MATCH,
sendreq->req_send.req_base.req_proc); sendreq->req_send.req_base.req_proc);
@ -571,15 +574,20 @@ int mca_pml_csum_send_request_start_copy( mca_pml_csum_send_request_t* sendreq,
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ? hdr->hdr_match.hdr_csum = (size > 0 ?
sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_common.hdr_csum = (do_csum ? hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_match_hdr_t));
opal_csum16(hdr, sizeof(mca_pml_csum_match_hdr_t)) : OPAL_CSUM_ZERO);
opal_output(0, "%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__,
hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum,
hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_seq, hdr->hdr_match.hdr_src, hdr->hdr_match.hdr_tag, hdr->hdr_match.hdr_csum);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu \n", "%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)max_data)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__,
hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)max_data));
csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_MATCH, csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_MATCH,
sendreq->req_send.req_base.req_proc); sendreq->req_send.req_base.req_proc);
@ -688,12 +696,9 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
mca_pml_csum_hdr_t* hdr; mca_pml_csum_hdr_t* hdr;
bool need_local_cb = false; bool need_local_cb = false;
int rc; int rc;
bool do_csum;
bml_btl = sendreq->req_rdma[0].bml_btl; bml_btl = sendreq->req_rdma[0].bml_btl;
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) { if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg; mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
mca_btl_base_descriptor_t* src; mca_btl_base_descriptor_t* src;
@ -805,8 +810,7 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq; hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_match.hdr_csum = OPAL_CSUM_ZERO; hdr->hdr_match.hdr_csum = OPAL_CSUM_ZERO;
hdr->hdr_common.hdr_csum = (do_csum ? hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t));
opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'rndv\'(initiate RDMA PUT) with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Sending \'rndv\'(initiate RDMA PUT) with data csum:0x%x, header csum:0x%04x, size:%lu\n",
@ -855,7 +859,6 @@ int mca_pml_csum_send_request_start_rndv( mca_pml_csum_send_request_t* sendreq,
mca_btl_base_segment_t* segment; mca_btl_base_segment_t* segment;
mca_pml_csum_hdr_t* hdr; mca_pml_csum_hdr_t* hdr;
int rc; int rc;
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
/* prepare descriptor */ /* prepare descriptor */
if(size == 0) { if(size == 0) {
@ -903,10 +906,9 @@ int mca_pml_csum_send_request_start_rndv( mca_pml_csum_send_request_t* sendreq,
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq; hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ? hdr->hdr_match.hdr_csum = (size > 0 ?
sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_common.hdr_csum = (do_csum ? hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t));
opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Sending \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
@ -1023,7 +1025,6 @@ mca_pml_csum_send_request_schedule_once(mca_pml_csum_send_request_t* sendreq)
size_t prev_bytes_remaining = 0; size_t prev_bytes_remaining = 0;
mca_pml_csum_send_range_t *range; mca_pml_csum_send_range_t *range;
int num_fail = 0; int num_fail = 0;
bool do_csum;
/* check pipeline_depth here before attempting to get any locks */ /* check pipeline_depth here before attempting to get any locks */
if(true == sendreq->req_throttle_sends && if(true == sendreq->req_throttle_sends &&
@ -1121,8 +1122,6 @@ cannot_pack:
des->des_cbfunc = mca_pml_csum_frag_completion; des->des_cbfunc = mca_pml_csum_frag_completion;
des->des_cbdata = sendreq; des->des_cbdata = sendreq;
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
/* setup header */ /* setup header */
hdr = (mca_pml_csum_frag_hdr_t*)des->des_src->seg_addr.pval; hdr = (mca_pml_csum_frag_hdr_t*)des->des_src->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
@ -1131,10 +1130,9 @@ cannot_pack:
hdr->hdr_frag_offset = range->range_send_offset; hdr->hdr_frag_offset = range->range_send_offset;
hdr->hdr_src_req.pval = sendreq; hdr->hdr_src_req.pval = sendreq;
hdr->hdr_dst_req = sendreq->req_recv; hdr->hdr_dst_req = sendreq->req_recv;
hdr->hdr_csum = (size > 0 && do_csum ? hdr->hdr_csum = (size > 0 ?
sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_common.hdr_csum = (do_csum ? hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t));
opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t)) : OPAL_CSUM_ZERO);
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Sending \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", "%s Sending \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",

Просмотреть файл

@ -142,9 +142,6 @@ get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
sendmode, \ sendmode, \
persistent) \ persistent) \
{ \ { \
mca_bml_base_endpoint_t* endpoint = \
sendreq->req_send.req_base.req_proc->proc_bml; \
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM; \
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \ MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
buf, \ buf, \
count, \ count, \
@ -154,7 +151,7 @@ get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
comm, \ comm, \
sendmode, \ sendmode, \
persistent, \ persistent, \
do_csum ? CONVERTOR_WITH_CHECKSUM: 0); \ CONVERTOR_WITH_CHECKSUM); \
(sendreq)->req_recv.pval = NULL; \ (sendreq)->req_recv.pval = NULL; \
} }