portals4: use a single Memory Descriptor to cover all of memory
In days past, some implementations of Portals4 could not cover all of memory with a single Memory Descriptor so multiple large overlapping Memory Descriptors were created. Because none of the current implementations have this limitation (and no future implementations should either), this commit removes the overlapping Memory Descriptors code.
Этот коммит содержится в:
родитель
8497a6a140
Коммит
9df163f116
@ -127,67 +127,22 @@ portals4_init_interface(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
/* Bind MD across all memory */
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == ompi_mtl_portals4.send_md_hs) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
@ -255,24 +210,9 @@ portals4_init_interface(void)
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
}
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != ompi_mtl_portals4.send_md_hs) {
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
}
|
||||
#endif
|
||||
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
}
|
||||
@ -457,20 +397,7 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
|
||||
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
#endif
|
||||
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
|
@ -76,12 +76,8 @@ struct mca_mtl_portals4_module_t {
|
||||
/** MD handle for sending ACKS */
|
||||
ptl_handle_md_t zero_md_h;
|
||||
|
||||
/** Send MD handle(s). Use ompi_mtl_portals4_get_md() to get the right md */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
ptl_handle_md_t *send_md_hs;
|
||||
#else
|
||||
/** Send MD handle */
|
||||
ptl_handle_md_t send_md_h;
|
||||
#endif
|
||||
|
||||
/** long message receive overflow ME. Persistent ME, first in
|
||||
overflow list on the recv_idx portal table. */
|
||||
@ -212,64 +208,6 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
|
||||
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
|
||||
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
|
||||
|
||||
|
||||
/*
|
||||
* Not all implementations of Portals 4 support binding a memory
|
||||
* descriptor which covers all of memory, but all support covering a
|
||||
* large fraction of memory. Therefore, rather than working around
|
||||
* the issue by pinning per message, we use a number of memory
|
||||
* descriptors to cover all of memory. As long as the maximum memory
|
||||
* descriptor is a large fraction of the user virtual address space
|
||||
* (like 46 bit MDs on a platform with 47 bits of user virtual address
|
||||
* space), this works fine.
|
||||
*
|
||||
* Our scheme is to create N memory descriptors which contiguously
|
||||
* cover the entire user address space, then another N-1 contiguous
|
||||
* memory descriptors offset by 1/2 the size of the MD, then a final
|
||||
* memory descriptor of 1/2 the size of the other MDs covering the top
|
||||
* of the memory space, to avoid if statements in the critical path. This
|
||||
* scheme allows for a maximum message size of 1/2 the size of the MD
|
||||
* without ever crossing an MD boundary. Also, because MD sizes are
|
||||
* always on a power of 2 in this scheme, computing the offsets and MD
|
||||
* selection are quick, using only bit shift and mask.q
|
||||
*
|
||||
* ompi_mtl_portals4_get_md() relies heavily on compiler constant folding.
|
||||
* "mask" can be constant folded into a constant. "which" compiler folds
|
||||
* into a bit shift of a register a constant number of times, then masked
|
||||
* by a constant (the input is, unfortunately, not constant).
|
||||
*
|
||||
* In the case where an MD can cover all of memory,
|
||||
* ompi_mtl_portals4_get_md() will be compiled into two assignments.
|
||||
* Assuming the function inlines (and it certainly should be), the two
|
||||
* assignments should be optimized into register assignments for the
|
||||
* Portals call relatively easily.
|
||||
*/
|
||||
static inline void
|
||||
ompi_mtl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = ompi_mtl_portals4.send_md_hs[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = ompi_mtl_portals4.send_md_h;
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_mtl_portals4_get_num_mds(void)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* MTL interface functions */
|
||||
extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl);
|
||||
|
||||
|
@ -225,12 +225,6 @@ ompi_mtl_portals4_component_open(void)
|
||||
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
ompi_mtl_portals4.send_md_hs = NULL;
|
||||
#else
|
||||
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
|
||||
#endif
|
||||
|
||||
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
|
||||
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
|
||||
@ -485,3 +479,4 @@ ompi_mtl_portals4_progress(void)
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -184,8 +184,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_match_bits_t match_bits;
|
||||
ptl_me_t me;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_SHORT_MSG);
|
||||
@ -233,23 +231,20 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_request->opcount, hdr_data, match_bits));
|
||||
}
|
||||
|
||||
ompi_mtl_portals4_get_md(start, &md_h, &base);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||
"Send %lu, start: %p, base: %p, offset: %lx",
|
||||
ptl_request->opcount, start, base,
|
||||
(ptl_size_t) ((char*) start - (char*) base)));
|
||||
"Send %lu, start: %p",
|
||||
ptl_request->opcount, start));
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
||||
(ptl_size_t) start,
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
ptl_proc,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
match_bits,
|
||||
0,
|
||||
PTL_ACK_REQ,
|
||||
ptl_proc,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
match_bits,
|
||||
0,
|
||||
ptl_request,
|
||||
hdr_data);
|
||||
hdr_data);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
@ -274,8 +269,6 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
|
||||
ptl_me_t me;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
ptl_size_t put_length;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_LONG_MSG);
|
||||
@ -316,10 +309,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
|
||||
put_length = (rndv == ompi_mtl_portals4.protocol) ?
|
||||
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
|
||||
|
||||
ompi_mtl_portals4_get_md(start, &md_h, &base);
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
||||
(ptl_size_t) start,
|
||||
put_length,
|
||||
PTL_ACK_REQ,
|
||||
ptl_proc,
|
||||
|
@ -76,13 +76,8 @@ struct ompi_osc_portals4_module_t {
|
||||
ptl_handle_ni_t ni_h; /* network interface used by this window */
|
||||
ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */
|
||||
ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
ptl_handle_md_t *md_h; /* memory descriptor describing all of memory used by this window */
|
||||
ptl_handle_md_t *req_md_h; /* memory descriptor with event completion used by this window */
|
||||
#else
|
||||
ptl_handle_md_t md_h[1]; /* memory descriptor describing all of memory used by this window */
|
||||
ptl_handle_md_t req_md_h[1]; /* memory descriptor with event completion used by this window */
|
||||
#endif
|
||||
ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */
|
||||
ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */
|
||||
ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */
|
||||
ptl_handle_me_t control_me_h; /* match list entry for control data (node_state_t). Match bits are (CID | OSC_PORTALS4_MB_CONTROL). */
|
||||
int64_t opcount;
|
||||
@ -120,39 +115,6 @@ get_displacement(ompi_osc_portals4_module_t *module,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with
|
||||
* platforms that don't allow us to crate an MD that covers all of
|
||||
* memory.
|
||||
*/
|
||||
static inline void
|
||||
ompi_osc_portals4_get_md(const void *ptr, const ptl_handle_md_t *array,
|
||||
ptl_handle_md_t *md_h, void **base_ptr)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = array[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = array[0];
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_osc_portals4_get_num_mds(void)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len);
|
||||
int ompi_osc_portals4_detach(struct ompi_win_t *win, void *base);
|
||||
|
||||
|
@ -74,8 +74,6 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
|
||||
ompi_osc_portals4_module_t *module =
|
||||
(ompi_osc_portals4_module_t*) win->w_osc_module;
|
||||
int ret, i, size;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
ret = ompi_osc_portals4_complete_all(module);
|
||||
if (ret != OMPI_SUCCESS) return ret;
|
||||
@ -84,13 +82,11 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
|
||||
module->state.post_count = 0;
|
||||
PtlAtomicSync();
|
||||
|
||||
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
|
||||
|
||||
size = ompi_group_size(module->start_group);
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
|
||||
ret = PtlAtomic(md_h,
|
||||
(ptl_size_t) ((char*) &module->one - (char*) base),
|
||||
ret = PtlAtomic(module->md_h,
|
||||
(ptl_size_t) &module->one,
|
||||
sizeof(module->one),
|
||||
PTL_ACK_REQ,
|
||||
ompi_osc_portals4_get_peer_group(module->start_group, i),
|
||||
@ -124,8 +120,6 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
|
||||
ompi_osc_portals4_module_t *module =
|
||||
(ompi_osc_portals4_module_t*) win->w_osc_module;
|
||||
int ret, i, size;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
||||
OBJ_RETAIN(group);
|
||||
@ -134,12 +128,10 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
|
||||
module->state.complete_count = 0;
|
||||
PtlAtomicSync();
|
||||
|
||||
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
|
||||
|
||||
size = ompi_group_size(module->post_group);
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
ret = PtlAtomic(md_h,
|
||||
(ptl_size_t) ((char*) &module->one - (char*) base),
|
||||
ret = PtlAtomic(module->md_h,
|
||||
(ptl_size_t) &module->one,
|
||||
sizeof(module->one),
|
||||
PTL_ACK_REQ,
|
||||
ompi_osc_portals4_get_peer_group(module->post_group, i),
|
||||
|
@ -197,8 +197,6 @@ ompi_osc_portals4_rput(void *origin_addr,
|
||||
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
|
||||
size_t length;
|
||||
size_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
|
||||
@ -228,9 +226,8 @@ ompi_osc_portals4_rput(void *origin_addr,
|
||||
return ret;
|
||||
}
|
||||
length *= origin_count;
|
||||
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
|
||||
ret = PtlPut(module->req_md_h,
|
||||
(ptl_size_t) origin_addr,
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
peer,
|
||||
@ -267,8 +264,6 @@ ompi_osc_portals4_rget(void *origin_addr,
|
||||
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
|
||||
size_t length;
|
||||
size_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
|
||||
@ -298,9 +293,8 @@ ompi_osc_portals4_rget(void *origin_addr,
|
||||
return ret;
|
||||
}
|
||||
length *= origin_count;
|
||||
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
|
||||
ret = PtlGet(md_h,
|
||||
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
|
||||
ret = PtlGet(module->req_md_h,
|
||||
(ptl_size_t) origin_addr,
|
||||
length,
|
||||
peer,
|
||||
module->pt_idx,
|
||||
@ -338,8 +332,6 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
|
||||
size_t offset;
|
||||
ptl_op_t ptl_op;
|
||||
ptl_datatype_t ptl_dt;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx",
|
||||
@ -372,8 +364,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
|
||||
length *= origin_count;
|
||||
sent = 0;
|
||||
|
||||
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
|
||||
md_offset = ((char*) origin_addr - (char*) md_base);
|
||||
md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->atomic_max, length - sent);
|
||||
@ -381,7 +372,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
|
||||
request->ops_expected++;
|
||||
|
||||
if (MPI_REPLACE == op) {
|
||||
ret = PtlPut(md_h,
|
||||
ret = PtlPut(module->req_md_h,
|
||||
md_offset + sent,
|
||||
msg_length,
|
||||
PTL_ACK_REQ,
|
||||
@ -398,7 +389,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
|
||||
ret = ompi_osc_portals4_get_op(op, &ptl_op);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
|
||||
ret = PtlAtomic(md_h,
|
||||
ret = PtlAtomic(module->req_md_h,
|
||||
offset + sent,
|
||||
msg_length,
|
||||
PTL_ACK_REQ,
|
||||
@ -475,8 +466,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
sent = 0;
|
||||
|
||||
if (MPI_REPLACE == op) {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(origin_dt, &length);
|
||||
@ -486,10 +475,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= origin_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
|
||||
@ -497,9 +484,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
request->ops_expected++;
|
||||
|
||||
ret = PtlSwap(result_md_h,
|
||||
ret = PtlSwap(module->req_md_h,
|
||||
result_md_offset + sent,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -514,8 +501,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
sent += msg_length;
|
||||
} while (sent < length);
|
||||
} else if (MPI_NO_OP == op) {
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
ptl_size_t md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(target_dt, &length);
|
||||
@ -525,8 +510,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= target_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &md_h, &md_base);
|
||||
md_offset = ((char*) result_addr - (char*) md_base);
|
||||
md_offset = (ptl_size_t) result_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
|
||||
@ -534,7 +518,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
request->ops_expected++;
|
||||
|
||||
ret = PtlGet(md_h,
|
||||
ret = PtlGet(module->req_md_h,
|
||||
md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -545,8 +529,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
sent += msg_length;
|
||||
} while (sent < length);
|
||||
} else {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(origin_dt, &length);
|
||||
@ -556,10 +538,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= origin_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
@ -573,9 +553,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
request->ops_expected++;
|
||||
|
||||
ret = PtlFetchAtomic(result_md_h,
|
||||
ret = PtlFetchAtomic(module->req_md_h,
|
||||
result_md_offset + sent,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -615,8 +595,6 @@ ompi_osc_portals4_put(void *origin_addr,
|
||||
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
|
||||
size_t length;
|
||||
size_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
|
||||
@ -639,9 +617,8 @@ ompi_osc_portals4_put(void *origin_addr,
|
||||
return ret;
|
||||
}
|
||||
length *= origin_count;
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
|
||||
ret = PtlPut(module->md_h,
|
||||
(ptl_size_t) origin_addr,
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
peer,
|
||||
@ -675,8 +652,6 @@ ompi_osc_portals4_get(void *origin_addr,
|
||||
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
|
||||
size_t length;
|
||||
size_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
|
||||
@ -699,9 +674,8 @@ ompi_osc_portals4_get(void *origin_addr,
|
||||
return ret;
|
||||
}
|
||||
length *= origin_count;
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
|
||||
ret = PtlGet(md_h,
|
||||
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
|
||||
ret = PtlGet(module->md_h,
|
||||
(ptl_size_t) origin_addr,
|
||||
length,
|
||||
peer,
|
||||
module->pt_idx,
|
||||
@ -736,8 +710,6 @@ ompi_osc_portals4_accumulate(void *origin_addr,
|
||||
size_t offset;
|
||||
ptl_op_t ptl_op;
|
||||
ptl_datatype_t ptl_dt;
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
|
||||
@ -764,15 +736,14 @@ ompi_osc_portals4_accumulate(void *origin_addr,
|
||||
length *= origin_count;
|
||||
sent = 0;
|
||||
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
|
||||
md_offset = ((char*) origin_addr - (char*) md_base);
|
||||
md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->atomic_max, length - sent);
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
if (MPI_REPLACE == op) {
|
||||
ret = PtlPut(md_h,
|
||||
ret = PtlPut(module->md_h,
|
||||
md_offset + sent,
|
||||
msg_length,
|
||||
PTL_ACK_REQ,
|
||||
@ -789,7 +760,7 @@ ompi_osc_portals4_accumulate(void *origin_addr,
|
||||
ret = ompi_osc_portals4_get_op(op, &ptl_op);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
|
||||
ret = PtlAtomic(md_h,
|
||||
ret = PtlAtomic(module->md_h,
|
||||
md_offset + sent,
|
||||
msg_length,
|
||||
PTL_ACK_REQ,
|
||||
@ -858,8 +829,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
} else {
|
||||
sent = 0;
|
||||
if (MPI_REPLACE == op) {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(origin_dt, &length);
|
||||
@ -868,19 +837,17 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= origin_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlSwap(result_md_h,
|
||||
ret = PtlSwap(module->md_h,
|
||||
result_md_offset + sent,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -895,8 +862,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
sent += msg_length;
|
||||
} while (sent < length);
|
||||
} else if (MPI_NO_OP == op) {
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
ptl_size_t md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(target_dt, &length);
|
||||
@ -905,15 +870,14 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= target_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
|
||||
md_offset = ((char*) result_addr - (char*) md_base);
|
||||
md_offset = (ptl_size_t) result_addr;
|
||||
|
||||
do {
|
||||
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlGet(md_h,
|
||||
ret = PtlGet(module->md_h,
|
||||
md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -924,8 +888,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
sent += msg_length;
|
||||
} while (sent < length);
|
||||
} else {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ret = ompi_datatype_type_size(origin_dt, &length);
|
||||
@ -934,10 +896,8 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
}
|
||||
length *= origin_count;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
@ -951,9 +911,9 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlFetchAtomic(result_md_h,
|
||||
ret = PtlFetchAtomic(module->md_h,
|
||||
result_md_offset + sent,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset + sent,
|
||||
msg_length,
|
||||
peer,
|
||||
@ -992,8 +952,6 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr,
|
||||
size_t length;
|
||||
size_t offset;
|
||||
ptl_datatype_t ptl_dt;
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
@ -1014,16 +972,14 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr,
|
||||
|
||||
assert(length < module->fetch_atomic_max);
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ret = PtlSwap(result_md_h,
|
||||
ret = PtlSwap(module->md_h,
|
||||
result_md_offset,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset,
|
||||
length,
|
||||
peer,
|
||||
@ -1082,18 +1038,14 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
if (MPI_REPLACE == op) {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
ret = PtlSwap(result_md_h,
|
||||
ret = PtlSwap(module->md_h,
|
||||
result_md_offset,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset,
|
||||
length,
|
||||
peer,
|
||||
@ -1106,14 +1058,11 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
|
||||
PTL_SWAP,
|
||||
ptl_dt);
|
||||
} else if (MPI_NO_OP == op) {
|
||||
ptl_handle_md_t md_h;
|
||||
void *md_base;
|
||||
ptl_size_t md_offset;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
|
||||
md_offset = ((char*) result_addr - (char*) md_base);
|
||||
md_offset = (ptl_size_t) result_addr;
|
||||
|
||||
ret = PtlGet(md_h,
|
||||
ret = PtlGet(module->md_h,
|
||||
md_offset,
|
||||
length,
|
||||
peer,
|
||||
@ -1122,21 +1071,17 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
|
||||
offset,
|
||||
NULL);
|
||||
} else {
|
||||
ptl_handle_md_t result_md_h, origin_md_h;
|
||||
void *result_md_base, *origin_md_base;
|
||||
ptl_size_t result_md_offset, origin_md_offset;
|
||||
|
||||
ret = ompi_osc_portals4_get_op(op, &ptl_op);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
|
||||
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
|
||||
result_md_offset = ((char*) result_addr - (char*) result_md_base);
|
||||
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
|
||||
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
|
||||
result_md_offset = (ptl_size_t) result_addr;
|
||||
origin_md_offset = (ptl_size_t) origin_addr;
|
||||
|
||||
ret = PtlFetchAtomic(result_md_h,
|
||||
ret = PtlFetchAtomic(module->md_h,
|
||||
result_md_offset,
|
||||
origin_md_h,
|
||||
module->md_h,
|
||||
origin_md_offset,
|
||||
length,
|
||||
peer,
|
||||
|
@ -441,64 +441,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
goto error;
|
||||
}
|
||||
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
ptl_size_t size = 1ULL << OPAL_PORTALS4_MAX_MD_SIZE;
|
||||
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
module->md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == module->md_h) {
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
module->md_h[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
module->req_md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == module->req_md_h) {
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
module->req_md_h[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
|
||||
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
|
||||
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
|
||||
md.ct_handle = module->ct_h;
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
|
||||
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
|
||||
md.ct_handle = module->ct_h;
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
|
||||
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
|
||||
md.ct_handle = module->ct_h;
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->md_h[0]);
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
@ -511,18 +459,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
|
||||
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
|
||||
md.ct_handle = module->ct_h;
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h[0]);
|
||||
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
|
||||
me.start = 0;
|
||||
me.length = SIZE_MAX;
|
||||
me.length = PTL_SIZE_MAX;
|
||||
} else {
|
||||
me.start = *base;
|
||||
me.length = size;
|
||||
@ -619,12 +566,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
/* BWB: FIX ME: This is all wrong... */
|
||||
if (0 != module->ct_h) PtlCTFree(module->ct_h);
|
||||
if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
/* BWB: FIX ME */
|
||||
#else
|
||||
if (0 != module->req_md_h) PtlMDRelease(module->req_md_h[0]);
|
||||
if (0 != module->md_h) PtlMDRelease(module->md_h[0]);
|
||||
#endif
|
||||
if (0 != module->req_md_h) PtlMDRelease(module->req_md_h);
|
||||
if (0 != module->md_h) PtlMDRelease(module->md_h);
|
||||
if (NULL != module->comm) ompi_comm_free(&module->comm);
|
||||
if (NULL != module) free(module);
|
||||
|
||||
@ -659,12 +602,8 @@ ompi_osc_portals4_free(struct ompi_win_t *win)
|
||||
|
||||
/* cleanup */
|
||||
PtlMEUnlink(module->data_me_h);
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
/* BWB: FIX ME */
|
||||
#else
|
||||
PtlMDRelease(module->md_h[0]);
|
||||
PtlMDRelease(module->req_md_h[0]);
|
||||
#endif
|
||||
PtlMDRelease(module->md_h);
|
||||
PtlMDRelease(module->req_md_h);
|
||||
PtlCTFree(module->ct_h);
|
||||
if (NULL != module->disp_units) free(module->disp_units);
|
||||
ompi_comm_free(&module->comm);
|
||||
|
@ -44,18 +44,13 @@ lk_cas64(ompi_osc_portals4_module_t *module,
|
||||
{
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
ptl_handle_md_t result_md_h, write_md_h;
|
||||
void *result_base, *write_base;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
|
||||
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
|
||||
|
||||
ret = PtlSwap(result_md_h,
|
||||
(char*) result_val - (char*) result_base,
|
||||
write_md_h,
|
||||
(char*) &write_val - (char*) write_base,
|
||||
ret = PtlSwap(module->md_h,
|
||||
(ptl_size_t) result_val,
|
||||
module->md_h,
|
||||
(ptl_size_t) &write_val,
|
||||
sizeof(int64_t),
|
||||
ompi_osc_portals4_get_peer(module, target),
|
||||
module->pt_idx,
|
||||
@ -82,15 +77,11 @@ lk_write64(ompi_osc_portals4_module_t *module,
|
||||
{
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ompi_osc_portals4_get_md(&write_val, module->md_h, &md_h, &base);
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(char*) &write_val - (char*) base,
|
||||
ret = PtlPut(module->md_h,
|
||||
(ptl_size_t) &write_val,
|
||||
sizeof(int64_t),
|
||||
PTL_ACK_REQ,
|
||||
ompi_osc_portals4_get_peer(module, target),
|
||||
@ -116,18 +107,13 @@ lk_add64(ompi_osc_portals4_module_t *module,
|
||||
{
|
||||
int ret;
|
||||
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
|
||||
ptl_handle_md_t result_md_h, write_md_h;
|
||||
void *result_base, *write_base;
|
||||
|
||||
(void)opal_atomic_add_64(&module->opcount, 1);
|
||||
|
||||
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
|
||||
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
|
||||
|
||||
ret = PtlFetchAtomic(result_md_h,
|
||||
(char*) result_val - (char*) result_base,
|
||||
write_md_h,
|
||||
(char*) &write_val - (char*) write_base,
|
||||
ret = PtlFetchAtomic(module->md_h,
|
||||
(ptl_size_t) result_val,
|
||||
module->md_h,
|
||||
(ptl_size_t) &write_val,
|
||||
sizeof(int64_t),
|
||||
ompi_osc_portals4_get_peer(module, target),
|
||||
module->pt_idx,
|
||||
|
@ -139,52 +139,7 @@ btl_portals4_init_interface(void)
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface));
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = mca_btl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
portals4_btl->send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == portals4_btl->send_md_hs) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = portals4_btl->recv_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, opal_btl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(portals4_btl->portals_ni_h,
|
||||
&md,
|
||||
&portals4_btl->send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed for NI %d: %d\n",
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (all memory) OK for NI %d\n", interface));
|
||||
}
|
||||
#else
|
||||
/* Bind MD across all memory */
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
@ -200,7 +155,6 @@ btl_portals4_init_interface(void)
|
||||
__FILE__, __LINE__, interface, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
@ -653,27 +607,10 @@ void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl)
|
||||
mca_btl_portals4_component_progress();
|
||||
}
|
||||
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != portals4_btl->send_md_hs) {
|
||||
int i;
|
||||
int num_mds = mca_btl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(portals4_btl->send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->send_md_hs[i]);
|
||||
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
free(portals4_btl->send_md_hs);
|
||||
portals4_btl->send_md_hs = NULL;
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(portals4_btl->send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->send_md_h);
|
||||
portals4_btl->send_md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
#endif
|
||||
if (!PtlHandleIsEqual(portals4_btl->zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(portals4_btl->zero_md_h);
|
||||
portals4_btl->zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
@ -120,12 +120,8 @@ struct mca_btl_portals4_module_t {
|
||||
/** MD handle for sending ACKS */
|
||||
ptl_handle_md_t zero_md_h;
|
||||
|
||||
/** Send MD handle(s). Use opal_mtl_portals4_get_md() to get the right md */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
ptl_handle_md_t *send_md_hs;
|
||||
#else
|
||||
/** Send MD handle */
|
||||
ptl_handle_md_t send_md_h;
|
||||
#endif
|
||||
|
||||
/** long message receive overflow ME. Persistent ME, first in
|
||||
overflow list on the recv_idx portal table. */
|
||||
@ -177,36 +173,6 @@ typedef struct mca_btl_portals4_module_t mca_btl_portals4_module_t;
|
||||
|
||||
#define REQ_BTL_TABLE_ID 2
|
||||
|
||||
/*
|
||||
* See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with
|
||||
* platforms that don't allow us to crate an MD that covers all of
|
||||
* memory.
|
||||
*/
|
||||
static inline void
|
||||
opal_btl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr, mca_btl_portals4_module_t *portals4_btl)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = portals4_btl->send_md_hs[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = portals4_btl->send_md_h;
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
mca_btl_portals4_get_num_mds(void)
|
||||
{
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int mca_btl_portals4_component_progress(void);
|
||||
void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl);
|
||||
|
||||
|
@ -242,11 +242,7 @@ mca_btl_portals4_component_open(void)
|
||||
|
||||
mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;
|
||||
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
mca_btl_portals4_module.send_md_hs = NULL;
|
||||
#else
|
||||
mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
|
||||
#endif
|
||||
|
||||
mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
|
||||
mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
@ -36,9 +36,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
|
||||
ptl_match_bits_t match_bits, msglen_type;
|
||||
ptl_size_t put_length;
|
||||
int64_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
int ret;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
@ -51,9 +48,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
|
||||
|
||||
opal_btl_portals4_get_md(frag->segments[0].base.seg_addr.pval, &md_h, &base, portals4_btl);
|
||||
offset = (ptl_size_t) ((char*) frag->segments[0].base.seg_addr.pval - (char*) base);
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
|
||||
portals4_btl->portals_max_outstanding_ops) {
|
||||
@ -71,8 +65,8 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
|
||||
put_length, (uint64_t)match_bits));
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
ret = PtlPut(portals4_btl->send_md_h,
|
||||
(ptl_size_t) frag->segments[0].base.seg_addr.pval,
|
||||
put_length, /* fragment length */
|
||||
(mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
|
||||
endpoint->ptl_proc,
|
||||
@ -85,8 +79,10 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
|
||||
opal_output(opal_btl_base_framework.framework_output, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx\n",
|
||||
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, (void *)offset, put_length, (uint64_t)match_bits));
|
||||
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
|
||||
"PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx",
|
||||
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
|
||||
(void *)frag->segments[0].base.seg_addr.pval, put_length, (uint64_t)match_bits));
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user