diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index 944ed0b52e..d13d9e84c9 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -127,67 +127,22 @@ portals4_init_interface(void) goto error; } - /* Bind MD/MDs across all memory. We prefer (for obvious reasons) - to have a single MD across all of memory */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1; - ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2; + /* Bind MD across all memory */ + md.start = 0; + md.length = PTL_SIZE_MAX; + md.options = 0; + md.eq_handle = ompi_mtl_portals4.send_eq_h; + md.ct_handle = PTL_CT_NONE; - ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds); - if (NULL == ompi_mtl_portals4.send_md_hs) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: Error allocating MD array", - __FILE__, __LINE__); - ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; - goto error; - } - - for (i = 0 ; i < num_mds ; ++i) { - ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE; - } - - for (i = 0 ; i < num_mds ; ++i) { - md.start = (char*) (offset_unit * i); - md.length = (i - 1 == num_mds) ? size / 2 : size; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; - - opal_output_verbose(50, ompi_mtl_base_framework.framework_output, - "Binding md from %p of length %lx", - md.start, md.length); - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &ompi_mtl_portals4.send_md_hs[i]); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - } + ret = PtlMDBind(ompi_mtl_portals4.ni_h, + &md, + &ompi_mtl_portals4.send_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlMDBind failed: %d\n", + __FILE__, __LINE__, ret); + goto error; } -#else - md.start = 0; - md.length = PTL_SIZE_MAX; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &ompi_mtl_portals4.send_md_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } -#endif /* Handle long overflows */ me.start = NULL; @@ -255,24 +210,9 @@ portals4_init_interface(void) if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.zero_md_h); } -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - if (NULL != ompi_mtl_portals4.send_md_hs) { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - - for (i = 0 ; i < num_mds ; ++i) { - if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) { - PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]); - } - } - - free(ompi_mtl_portals4.send_md_hs); - } -#else if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.send_md_h); } -#endif if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); } @@ -457,20 +397,7 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl) PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); PtlMDRelease(ompi_mtl_portals4.zero_md_h); -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - - for (i = 0 ; i < num_mds ; ++i) { - PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]); - } - - free(ompi_mtl_portals4.send_md_hs); - } -#else PtlMDRelease(ompi_mtl_portals4.send_md_h); -#endif PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 9f9245d89c..8cb60566df 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -76,12 +76,8 @@ struct mca_mtl_portals4_module_t { /** MD handle for sending ACKS */ ptl_handle_md_t zero_md_h; - /** Send MD handle(s). Use ompi_mtl_portals4_get_md() to get the right md */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - ptl_handle_md_t *send_md_hs; -#else + /** Send MD handle */ ptl_handle_md_t send_md_h; -#endif /** long message receive overflow ME. Persistent ME, first in overflow list on the recv_idx portal table. */ @@ -212,64 +208,6 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; #define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) - -/* - * Not all implementations of Portals 4 support binding a memory - * descriptor which covers all of memory, but all support covering a - * large fraction of memory. Therefore, rather than working around - * the issue by pinning per message, we use a number of memory - * descriptors to cover all of memory. As long as the maximum memory - * descriptor is a large fraction of the user virtual address space - * (like 46 bit MDs on a platform with 47 bits of user virtual address - * space), this works fine. - * - * Our scheme is to create N memory descriptors which contiguously - * cover the entire user address space, then another N-1 contiguous - * memory descriptors offset by 1/2 the size of the MD, then a final - * memory descriptor of 1/2 the size of the other MDs covering the top - * of the memory space, to avoid if statements in the critical path. This - * scheme allows for a maximum message size of 1/2 the size of the MD - * without ever crossing an MD boundary. Also, because MD sizes are - * always on a power of 2 in this scheme, computing the offsets and MD - * selection are quick, using only bit shift and mask.q - * - * ompi_mtl_portals4_get_md() relies heavily on compiler constant folding. - * "mask" can be constant folded into a constant. "which" compiler folds - * into a bit shift of a register a constant number of times, then masked - * by a constant (the input is, unfortunately, not constant). - * - * In the case where an MD can cover all of memory, - * ompi_mtl_portals4_get_md() will be compiled into two assignments. - * Assuming the function inlines (and it certainly should be), the two - * assignments should be optimized into register assignments for the - * Portals call relatively easily. - */ -static inline void -ompi_mtl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1; - int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask; - *md_h = ompi_mtl_portals4.send_md_hs[which]; - *base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1))); -#else - *md_h = ompi_mtl_portals4.send_md_h; - *base_ptr = 0; -#endif -} - - -static inline int -ompi_mtl_portals4_get_num_mds(void) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)); -#else - return 1; -#endif -} - - /* MTL interface functions */ extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index a9d2e57315..1c797d19db 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -225,12 +225,6 @@ ompi_mtl_portals4_component_open(void) ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE; -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - ompi_mtl_portals4.send_md_hs = NULL; -#else - ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE; -#endif - ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL; ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL; @@ -485,3 +479,4 @@ ompi_mtl_portals4_progress(void) return count; } + diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index f00a750f9f..7d6ac1f238 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -184,8 +184,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, ptl_match_bits_t match_bits; ptl_me_t me; ptl_hdr_data_t hdr_data; - ptl_handle_md_t md_h; - void *base; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SHORT_MSG); @@ -233,23 +231,20 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, ptl_request->opcount, hdr_data, match_bits)); } - ompi_mtl_portals4_get_md(start, &md_h, &base); - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Send %lu, start: %p, base: %p, offset: %lx", - ptl_request->opcount, start, base, - (ptl_size_t) ((char*) start - (char*) base))); + "Send %lu, start: %p", + ptl_request->opcount, start)); - ret = PtlPut(md_h, - (ptl_size_t) ((char*) start - (char*) base), + ret = PtlPut(ompi_mtl_portals4.send_md_h, + (ptl_size_t) start, length, - PTL_ACK_REQ, - ptl_proc, - ompi_mtl_portals4.recv_idx, - match_bits, - 0, + PTL_ACK_REQ, + ptl_proc, + ompi_mtl_portals4.recv_idx, + match_bits, + 0, ptl_request, - hdr_data); + hdr_data); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d", @@ -274,8 +269,6 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, ptl_me_t me; ptl_hdr_data_t hdr_data; ptl_size_t put_length; - ptl_handle_md_t md_h; - void *base; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_LONG_MSG); @@ -316,10 +309,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, put_length = (rndv == ompi_mtl_portals4.protocol) ? (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length; - ompi_mtl_portals4_get_md(start, &md_h, &base); - - ret = PtlPut(md_h, - (ptl_size_t) ((char*) start - (char*) base), + ret = PtlPut(ompi_mtl_portals4.send_md_h, + (ptl_size_t) start, put_length, PTL_ACK_REQ, ptl_proc, diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 61fb04541f..c0f93accb0 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -76,13 +76,8 @@ struct ompi_osc_portals4_module_t { ptl_handle_ni_t ni_h; /* network interface used by this window */ ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */ ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - ptl_handle_md_t *md_h; /* memory descriptor describing all of memory used by this window */ - ptl_handle_md_t *req_md_h; /* memory descriptor with event completion used by this window */ -#else - ptl_handle_md_t md_h[1]; /* memory descriptor describing all of memory used by this window */ - ptl_handle_md_t req_md_h[1]; /* memory descriptor with event completion used by this window */ -#endif + ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */ + ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */ ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */ ptl_handle_me_t control_me_h; /* match list entry for control data (node_state_t). Match bits are (CID | OSC_PORTALS4_MB_CONTROL). */ int64_t opcount; @@ -120,39 +115,6 @@ get_displacement(ompi_osc_portals4_module_t *module, } -/* - * See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with - * platforms that don't allow us to crate an MD that covers all of - * memory. - */ -static inline void -ompi_osc_portals4_get_md(const void *ptr, const ptl_handle_md_t *array, - ptl_handle_md_t *md_h, void **base_ptr) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1; - int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask; - *md_h = array[which]; - *base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1))); -#else - *md_h = array[0]; - *base_ptr = 0; -#endif -} - - -static inline int -ompi_osc_portals4_get_num_mds(void) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)); -#else - return 1; -#endif -} - - - int ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len); int ompi_osc_portals4_detach(struct ompi_win_t *win, void *base); diff --git a/ompi/mca/osc/portals4/osc_portals4_active_target.c b/ompi/mca/osc/portals4/osc_portals4_active_target.c index 83de0f49d3..656aa990b0 100644 --- a/ompi/mca/osc/portals4/osc_portals4_active_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_active_target.c @@ -74,8 +74,6 @@ ompi_osc_portals4_complete(struct ompi_win_t *win) ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; int ret, i, size; - ptl_handle_md_t md_h; - void *base; ret = ompi_osc_portals4_complete_all(module); if (ret != OMPI_SUCCESS) return ret; @@ -84,13 +82,11 @@ ompi_osc_portals4_complete(struct ompi_win_t *win) module->state.post_count = 0; PtlAtomicSync(); - ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base); - size = ompi_group_size(module->start_group); for (i = 0 ; i < size ; ++i) { - ret = PtlAtomic(md_h, - (ptl_size_t) ((char*) &module->one - (char*) base), + ret = PtlAtomic(module->md_h, + (ptl_size_t) &module->one, sizeof(module->one), PTL_ACK_REQ, ompi_osc_portals4_get_peer_group(module->start_group, i), @@ -124,8 +120,6 @@ ompi_osc_portals4_post(struct ompi_group_t *group, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; int ret, i, size; - ptl_handle_md_t md_h; - void *base; if (0 == (assert & MPI_MODE_NOCHECK)) { OBJ_RETAIN(group); @@ -134,12 +128,10 @@ ompi_osc_portals4_post(struct ompi_group_t *group, module->state.complete_count = 0; PtlAtomicSync(); - ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base); - size = ompi_group_size(module->post_group); for (i = 0 ; i < size ; ++i) { - ret = PtlAtomic(md_h, - (ptl_size_t) ((char*) &module->one - (char*) base), + ret = PtlAtomic(module->md_h, + (ptl_size_t) &module->one, sizeof(module->one), PTL_ACK_REQ, ompi_osc_portals4_get_peer_group(module->post_group, i), diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 5bacfea7b4..4a096b4103 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -197,8 +197,6 @@ ompi_osc_portals4_rput(void *origin_addr, ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", @@ -228,9 +226,8 @@ ompi_osc_portals4_rput(void *origin_addr, return ret; } length *= origin_count; - ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base); - ret = PtlPut(md_h, - (ptl_size_t) ((char*) origin_addr - (char*) md_base), + ret = PtlPut(module->req_md_h, + (ptl_size_t) origin_addr, length, PTL_ACK_REQ, peer, @@ -267,8 +264,6 @@ ompi_osc_portals4_rget(void *origin_addr, ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", @@ -298,9 +293,8 @@ ompi_osc_portals4_rget(void *origin_addr, return ret; } length *= origin_count; - ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base); - ret = PtlGet(md_h, - (ptl_size_t) ((char*) origin_addr - (char*) md_base), + ret = PtlGet(module->req_md_h, + (ptl_size_t) origin_addr, length, peer, module->pt_idx, @@ -338,8 +332,6 @@ ompi_osc_portals4_raccumulate(void *origin_addr, size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx", @@ -372,8 +364,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr, length *= origin_count; sent = 0; - ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base); - md_offset = ((char*) origin_addr - (char*) md_base); + md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->atomic_max, length - sent); @@ -381,7 +372,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr, request->ops_expected++; if (MPI_REPLACE == op) { - ret = PtlPut(md_h, + ret = PtlPut(module->req_md_h, md_offset + sent, msg_length, PTL_ACK_REQ, @@ -398,7 +389,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr, ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; - ret = PtlAtomic(md_h, + ret = PtlAtomic(module->req_md_h, offset + sent, msg_length, PTL_ACK_REQ, @@ -475,8 +466,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, sent = 0; if (MPI_REPLACE == op) { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); @@ -486,10 +475,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, } length *= origin_count; - ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); @@ -497,9 +484,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; - ret = PtlSwap(result_md_h, + ret = PtlSwap(module->req_md_h, result_md_offset + sent, - origin_md_h, + module->md_h, origin_md_offset + sent, msg_length, peer, @@ -514,8 +501,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, sent += msg_length; } while (sent < length); } else if (MPI_NO_OP == op) { - ptl_handle_md_t md_h; - void *md_base; ptl_size_t md_offset; ret = ompi_datatype_type_size(target_dt, &length); @@ -525,8 +510,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, } length *= target_count; - ompi_osc_portals4_get_md(result_addr, module->req_md_h, &md_h, &md_base); - md_offset = ((char*) result_addr - (char*) md_base); + md_offset = (ptl_size_t) result_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); @@ -534,7 +518,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; - ret = PtlGet(md_h, + ret = PtlGet(module->req_md_h, md_offset + sent, msg_length, peer, @@ -545,8 +529,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, sent += msg_length; } while (sent < length); } else { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); @@ -556,10 +538,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, } length *= origin_count; - ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; @@ -573,9 +553,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; - ret = PtlFetchAtomic(result_md_h, + ret = PtlFetchAtomic(module->req_md_h, result_md_offset + sent, - origin_md_h, + module->md_h, origin_md_offset + sent, msg_length, peer, @@ -615,8 +595,6 @@ ompi_osc_portals4_put(void *origin_addr, ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", @@ -639,9 +617,8 @@ ompi_osc_portals4_put(void *origin_addr, return ret; } length *= origin_count; - ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base); - ret = PtlPut(md_h, - (ptl_size_t) ((char*) origin_addr - (char*) md_base), + ret = PtlPut(module->md_h, + (ptl_size_t) origin_addr, length, PTL_ACK_REQ, peer, @@ -675,8 +652,6 @@ ompi_osc_portals4_get(void *origin_addr, ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", @@ -699,9 +674,8 @@ ompi_osc_portals4_get(void *origin_addr, return ret; } length *= origin_count; - ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base); - ret = PtlGet(md_h, - (ptl_size_t) ((char*) origin_addr - (char*) md_base), + ret = PtlGet(module->md_h, + (ptl_size_t) origin_addr, length, peer, module->pt_idx, @@ -736,8 +710,6 @@ ompi_osc_portals4_accumulate(void *origin_addr, size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - ptl_handle_md_t md_h; - void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", @@ -764,15 +736,14 @@ ompi_osc_portals4_accumulate(void *origin_addr, length *= origin_count; sent = 0; - ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base); - md_offset = ((char*) origin_addr - (char*) md_base); + md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { - ret = PtlPut(md_h, + ret = PtlPut(module->md_h, md_offset + sent, msg_length, PTL_ACK_REQ, @@ -789,7 +760,7 @@ ompi_osc_portals4_accumulate(void *origin_addr, ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; - ret = PtlAtomic(md_h, + ret = PtlAtomic(module->md_h, md_offset + sent, msg_length, PTL_ACK_REQ, @@ -858,8 +829,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, } else { sent = 0; if (MPI_REPLACE == op) { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); @@ -868,19 +837,17 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, } length *= origin_count; - ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); - ret = PtlSwap(result_md_h, + ret = PtlSwap(module->md_h, result_md_offset + sent, - origin_md_h, + module->md_h, origin_md_offset + sent, msg_length, peer, @@ -895,8 +862,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, sent += msg_length; } while (sent < length); } else if (MPI_NO_OP == op) { - ptl_handle_md_t md_h; - void *md_base; ptl_size_t md_offset; ret = ompi_datatype_type_size(target_dt, &length); @@ -905,15 +870,14 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, } length *= target_count; - ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base); - md_offset = ((char*) result_addr - (char*) md_base); + md_offset = (ptl_size_t) result_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); - ret = PtlGet(md_h, + ret = PtlGet(module->md_h, md_offset + sent, msg_length, peer, @@ -924,8 +888,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, sent += msg_length; } while (sent < length); } else { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); @@ -934,10 +896,8 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, } length *= origin_count; - ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; @@ -951,9 +911,9 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); - ret = PtlFetchAtomic(result_md_h, + ret = PtlFetchAtomic(module->md_h, result_md_offset + sent, - origin_md_h, + module->md_h, origin_md_offset + sent, msg_length, peer, @@ -992,8 +952,6 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr, size_t length; size_t offset; ptl_datatype_t ptl_dt; - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -1014,16 +972,14 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr, assert(length < module->fetch_atomic_max); - ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; (void)opal_atomic_add_64(&module->opcount, 1); - ret = PtlSwap(result_md_h, + ret = PtlSwap(module->md_h, result_md_offset, - origin_md_h, + module->md_h, origin_md_offset, length, peer, @@ -1082,18 +1038,14 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; - ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; - ret = PtlSwap(result_md_h, + ret = PtlSwap(module->md_h, result_md_offset, - origin_md_h, + module->md_h, origin_md_offset, length, peer, @@ -1106,14 +1058,11 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, PTL_SWAP, ptl_dt); } else if (MPI_NO_OP == op) { - ptl_handle_md_t md_h; - void *md_base; ptl_size_t md_offset; - ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base); - md_offset = ((char*) result_addr - (char*) md_base); + md_offset = (ptl_size_t) result_addr; - ret = PtlGet(md_h, + ret = PtlGet(module->md_h, md_offset, length, peer, @@ -1122,21 +1071,17 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, offset, NULL); } else { - ptl_handle_md_t result_md_h, origin_md_h; - void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; - ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); - result_md_offset = ((char*) result_addr - (char*) result_md_base); - ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); - origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); + result_md_offset = (ptl_size_t) result_addr; + origin_md_offset = (ptl_size_t) origin_addr; - ret = PtlFetchAtomic(result_md_h, + ret = PtlFetchAtomic(module->md_h, result_md_offset, - origin_md_h, + module->md_h, origin_md_offset, length, peer, diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 4cb9f81c85..5f30d4e4b5 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -441,64 +441,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit goto error; } -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - ptl_size_t size = 1ULL << OPAL_PORTALS4_MAX_MD_SIZE; - ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2; - - module->md_h = malloc(sizeof(ptl_handle_md_t) * num_mds); - if (NULL == module->md_h) { - ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; - goto error; - } - for (i = 0 ; i < num_mds ; ++i) { - module->md_h[i] = PTL_INVALID_HANDLE; - } - - module->req_md_h = malloc(sizeof(ptl_handle_md_t) * num_mds); - if (NULL == module->req_md_h) { - ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; - goto error; - } - for (i = 0 ; i < num_mds ; ++i) { - module->req_md_h[i] = PTL_INVALID_HANDLE; - } - - for (i = 0 ; i < num_mds ; ++i) { - md.start = (char*) (offset_unit * i); - md.length = (i - 1 == num_mds) ? size / 2 : size; - - md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; - md.eq_handle = mca_osc_portals4_component.matching_eq_h; - md.ct_handle = module->ct_h; - ret = PtlMDBind(module->ni_h, &md, &module->md_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; - md.eq_handle = mca_osc_portals4_component.matching_eq_h; - md.ct_handle = module->ct_h; - ret = PtlMDBind(module->ni_h, &md, &module->req_md_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - } -#else md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; - ret = PtlMDBind(module->ni_h, &md, &module->md_h[0]); + ret = PtlMDBind(module->ni_h, &md, &module->md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", @@ -511,18 +459,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; - ret = PtlMDBind(module->ni_h, &md, &module->req_md_h[0]); + ret = PtlMDBind(module->ni_h, &md, &module->req_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } -#endif if (MPI_WIN_FLAVOR_DYNAMIC == flavor) { me.start = 0; - me.length = SIZE_MAX; + me.length = PTL_SIZE_MAX; } else { me.start = *base; me.length = size; @@ -619,12 +566,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* BWB: FIX ME: This is all wrong... */ if (0 != module->ct_h) PtlCTFree(module->ct_h); if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h); -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - /* BWB: FIX ME */ -#else - if (0 != module->req_md_h) PtlMDRelease(module->req_md_h[0]); - if (0 != module->md_h) PtlMDRelease(module->md_h[0]); -#endif + if (0 != module->req_md_h) PtlMDRelease(module->req_md_h); + if (0 != module->md_h) PtlMDRelease(module->md_h); if (NULL != module->comm) ompi_comm_free(&module->comm); if (NULL != module) free(module); @@ -659,12 +602,8 @@ ompi_osc_portals4_free(struct ompi_win_t *win) /* cleanup */ PtlMEUnlink(module->data_me_h); -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - /* BWB: FIX ME */ -#else - PtlMDRelease(module->md_h[0]); - PtlMDRelease(module->req_md_h[0]); -#endif + PtlMDRelease(module->md_h); + PtlMDRelease(module->req_md_h); PtlCTFree(module->ct_h); if (NULL != module->disp_units) free(module->disp_units); ompi_comm_free(&module->comm); diff --git a/ompi/mca/osc/portals4/osc_portals4_passive_target.c b/ompi/mca/osc/portals4/osc_portals4_passive_target.c index e867d273e6..2a3a55110e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_passive_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_passive_target.c @@ -44,18 +44,13 @@ lk_cas64(ompi_osc_portals4_module_t *module, { int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - ptl_handle_md_t result_md_h, write_md_h; - void *result_base, *write_base; (void)opal_atomic_add_64(&module->opcount, 1); - ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base); - ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base); - - ret = PtlSwap(result_md_h, - (char*) result_val - (char*) result_base, - write_md_h, - (char*) &write_val - (char*) write_base, + ret = PtlSwap(module->md_h, + (ptl_size_t) result_val, + module->md_h, + (ptl_size_t) &write_val, sizeof(int64_t), ompi_osc_portals4_get_peer(module, target), module->pt_idx, @@ -82,15 +77,11 @@ lk_write64(ompi_osc_portals4_module_t *module, { int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - ptl_handle_md_t md_h; - void *base; (void)opal_atomic_add_64(&module->opcount, 1); - ompi_osc_portals4_get_md(&write_val, module->md_h, &md_h, &base); - - ret = PtlPut(md_h, - (char*) &write_val - (char*) base, + ret = PtlPut(module->md_h, + (ptl_size_t) &write_val, sizeof(int64_t), PTL_ACK_REQ, ompi_osc_portals4_get_peer(module, target), @@ -116,18 +107,13 @@ lk_add64(ompi_osc_portals4_module_t *module, { int ret; size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock); - ptl_handle_md_t result_md_h, write_md_h; - void *result_base, *write_base; (void)opal_atomic_add_64(&module->opcount, 1); - ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base); - ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base); - - ret = PtlFetchAtomic(result_md_h, - (char*) result_val - (char*) result_base, - write_md_h, - (char*) &write_val - (char*) write_base, + ret = PtlFetchAtomic(module->md_h, + (ptl_size_t) result_val, + module->md_h, + (ptl_size_t) &write_val, sizeof(int64_t), ompi_osc_portals4_get_peer(module, target), module->pt_idx, diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 4d65e4d869..5b7b70eb5f 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -139,52 +139,7 @@ btl_portals4_init_interface(void) OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface)); - /* Bind MD/MDs across all memory. We prefer (for obvious reasons) - to have a single MD across all of memory */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - { - int i; - int num_mds = mca_btl_portals4_get_num_mds(); - ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1; - ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2; - - portals4_btl->send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds); - if (NULL == portals4_btl->send_md_hs) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: Error allocating MD array", - __FILE__, __LINE__); - ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE; - goto error; - } - - for (i = 0 ; i < num_mds ; ++i) { - portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE; - } - - for (i = 0 ; i < num_mds ; ++i) { - md.start = (char*) (offset_unit * i); - md.length = (i - 1 == num_mds) ? size / 2 : size; - md.options = 0; - md.eq_handle = portals4_btl->recv_eq_h; - md.ct_handle = PTL_CT_NONE; - - opal_output_verbose(50, opal_btl_base_framework.framework_output, - "Binding md from %p of length %lx", - md.start, md.length); - - ret = PtlMDBind(portals4_btl->portals_ni_h, - &md, - &portals4_btl->send_md_hs[i]); - if (PTL_OK != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMDBind failed for NI %d: %d\n", - __FILE__, __LINE__, interface, ret); - goto error; - } - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (all memory) OK for NI %d\n", interface)); - } -#else + /* Bind MD across all memory */ md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; @@ -200,7 +155,6 @@ btl_portals4_init_interface(void) __FILE__, __LINE__, interface, ret); goto error; } -#endif /* Handle long overflows */ me.start = NULL; @@ -653,27 +607,10 @@ void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl) mca_btl_portals4_component_progress(); } -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - if (NULL != portals4_btl->send_md_hs) { - int i; - int num_mds = mca_btl_portals4_get_num_mds(); - - for (i = 0 ; i < num_mds ; ++i) { - if (!PtlHandleIsEqual(portals4_btl->send_md_hs[i], PTL_INVALID_HANDLE)) { - PtlMDRelease(portals4_btl->send_md_hs[i]); - portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE; - } - } - - free(portals4_btl->send_md_hs); - portals4_btl->send_md_hs = NULL; - } -#else if (!PtlHandleIsEqual(portals4_btl->send_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(portals4_btl->send_md_h); portals4_btl->send_md_h = PTL_INVALID_HANDLE; } -#endif if (!PtlHandleIsEqual(portals4_btl->zero_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(portals4_btl->zero_md_h); portals4_btl->zero_md_h = PTL_INVALID_HANDLE; diff --git a/opal/mca/btl/portals4/btl_portals4.h b/opal/mca/btl/portals4/btl_portals4.h index 83c31c43fc..9af415b580 100644 --- a/opal/mca/btl/portals4/btl_portals4.h +++ b/opal/mca/btl/portals4/btl_portals4.h @@ -120,12 +120,8 @@ struct mca_btl_portals4_module_t { /** MD handle for sending ACKS */ ptl_handle_md_t zero_md_h; - /** Send MD handle(s). Use opal_mtl_portals4_get_md() to get the right md */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - ptl_handle_md_t *send_md_hs; -#else + /** Send MD handle */ ptl_handle_md_t send_md_h; -#endif /** long message receive overflow ME. Persistent ME, first in overflow list on the recv_idx portal table. */ @@ -177,36 +173,6 @@ typedef struct mca_btl_portals4_module_t mca_btl_portals4_module_t; #define REQ_BTL_TABLE_ID 2 -/* - * See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with - * platforms that don't allow us to crate an MD that covers all of - * memory. - */ -static inline void -opal_btl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr, mca_btl_portals4_module_t *portals4_btl) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1; - int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask; - *md_h = portals4_btl->send_md_hs[which]; - *base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1))); -#else - *md_h = portals4_btl->send_md_h; - *base_ptr = 0; -#endif -} - - -static inline int -mca_btl_portals4_get_num_mds(void) -{ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)); -#else - return 1; -#endif -} - int mca_btl_portals4_component_progress(void); void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl); diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index f73248d6c3..33086c24e2 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -242,11 +242,7 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE; -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - mca_btl_portals4_module.send_md_hs = NULL; -#else mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE; -#endif mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE; mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE; diff --git a/opal/mca/btl/portals4/btl_portals4_send.c b/opal/mca/btl/portals4/btl_portals4_send.c index fa3814cddd..0b50a20337 100644 --- a/opal/mca/btl/portals4/btl_portals4_send.c +++ b/opal/mca/btl/portals4/btl_portals4_send.c @@ -36,9 +36,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor; ptl_match_bits_t match_bits, msglen_type; ptl_size_t put_length; - int64_t offset; - ptl_handle_md_t md_h; - void *base; int ret; frag->endpoint = endpoint; @@ -51,9 +48,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type); - opal_btl_portals4_get_md(frag->segments[0].base.seg_addr.pval, &md_h, &base, portals4_btl); - offset = (ptl_size_t) ((char*) frag->segments[0].base.seg_addr.pval - (char*) base); - /* reserve space in the event queue for rdma operations immediately */ while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { @@ -71,8 +65,8 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, (void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, put_length, (uint64_t)match_bits)); - ret = PtlPut(md_h, - (ptl_size_t) offset, + ret = PtlPut(portals4_btl->send_md_h, + (ptl_size_t) frag->segments[0].base.seg_addr.pval, put_length, /* fragment length */ (mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ), endpoint->ptl_proc, @@ -85,8 +79,10 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, opal_output(opal_btl_base_framework.framework_output, "mca_btl_portals4_send: PtlPut failed with error %d", ret); return OPAL_ERROR; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx\n", - (void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, (void *)offset, put_length, (uint64_t)match_bits)); + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx", + (void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, + (void *)frag->segments[0].base.seg_addr.pval, put_length, (uint64_t)match_bits)); return OPAL_SUCCESS; }