1
1

portals4: use a single Memory Descriptor to cover all of memory

In days past, some implementations of Portals4 could not cover all
of memory with a single Memory Descriptor so multiple large
overlapping Memory Descriptors were created.  Because none of the
current implementations have this limitation (and no future
implementations should either), this commit removes the overlapping
Memory Descriptors code.
Этот коммит содержится в:
Todd Kordenbrock 2015-05-11 11:25:57 -05:00
родитель 8497a6a140
Коммит 9df163f116
13 изменённых файлов: 107 добавлений и 537 удалений

Просмотреть файл

@ -127,67 +127,22 @@ portals4_init_interface(void)
goto error;
}
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
/* Bind MD across all memory */
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == ompi_mtl_portals4.send_md_hs) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"Binding md from %p of length %lx",
md.start, md.length);
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
/* Handle long overflows */
me.start = NULL;
@ -255,24 +210,9 @@ portals4_init_interface(void)
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
}
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
if (NULL != ompi_mtl_portals4.send_md_hs) {
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
}
free(ompi_mtl_portals4.send_md_hs);
}
#else
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_h);
}
#endif
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
}
@ -457,20 +397,7 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
free(ompi_mtl_portals4.send_md_hs);
}
#else
PtlMDRelease(ompi_mtl_portals4.send_md_h);
#endif
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);

Просмотреть файл

@ -76,12 +76,8 @@ struct mca_mtl_portals4_module_t {
/** MD handle for sending ACKS */
ptl_handle_md_t zero_md_h;
/** Send MD handle(s). Use ompi_mtl_portals4_get_md() to get the right md */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *send_md_hs;
#else
/** Send MD handle */
ptl_handle_md_t send_md_h;
#endif
/** long message receive overflow ME. Persistent ME, first in
overflow list on the recv_idx portal table. */
@ -212,64 +208,6 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
/*
* Not all implementations of Portals 4 support binding a memory
* descriptor which covers all of memory, but all support covering a
* large fraction of memory. Therefore, rather than working around
* the issue by pinning per message, we use a number of memory
* descriptors to cover all of memory. As long as the maximum memory
* descriptor is a large fraction of the user virtual address space
* (like 46 bit MDs on a platform with 47 bits of user virtual address
* space), this works fine.
*
* Our scheme is to create N memory descriptors which contiguously
* cover the entire user address space, then another N-1 contiguous
* memory descriptors offset by 1/2 the size of the MD, then a final
* memory descriptor of 1/2 the size of the other MDs covering the top
* of the memory space, to avoid if statements in the critical path. This
* scheme allows for a maximum message size of 1/2 the size of the MD
* without ever crossing an MD boundary. Also, because MD sizes are
* always on a power of 2 in this scheme, computing the offsets and MD
* selection are quick, using only bit shift and mask.q
*
* ompi_mtl_portals4_get_md() relies heavily on compiler constant folding.
* "mask" can be constant folded into a constant. "which" compiler folds
* into a bit shift of a register a constant number of times, then masked
* by a constant (the input is, unfortunately, not constant).
*
* In the case where an MD can cover all of memory,
* ompi_mtl_portals4_get_md() will be compiled into two assignments.
* Assuming the function inlines (and it certainly should be), the two
* assignments should be optimized into register assignments for the
* Portals call relatively easily.
*/
static inline void
ompi_mtl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = ompi_mtl_portals4.send_md_hs[which];
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = ompi_mtl_portals4.send_md_h;
*base_ptr = 0;
#endif
}
static inline int
ompi_mtl_portals4_get_num_mds(void)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
/* MTL interface functions */
extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl);

Просмотреть файл

@ -225,12 +225,6 @@ ompi_mtl_portals4_component_open(void)
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
ompi_mtl_portals4.send_md_hs = NULL;
#else
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
#endif
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
@ -485,3 +479,4 @@ ompi_mtl_portals4_progress(void)
return count;
}

Просмотреть файл

@ -184,8 +184,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_match_bits_t match_bits;
ptl_me_t me;
ptl_hdr_data_t hdr_data;
ptl_handle_md_t md_h;
void *base;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_SHORT_MSG);
@ -233,23 +231,20 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_request->opcount, hdr_data, match_bits));
}
ompi_mtl_portals4_get_md(start, &md_h, &base);
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Send %lu, start: %p, base: %p, offset: %lx",
ptl_request->opcount, start, base,
(ptl_size_t) ((char*) start - (char*) base)));
"Send %lu, start: %p",
ptl_request->opcount, start));
ret = PtlPut(md_h,
(ptl_size_t) ((char*) start - (char*) base),
ret = PtlPut(ompi_mtl_portals4.send_md_h,
(ptl_size_t) start,
length,
PTL_ACK_REQ,
ptl_proc,
ompi_mtl_portals4.recv_idx,
match_bits,
0,
PTL_ACK_REQ,
ptl_proc,
ompi_mtl_portals4.recv_idx,
match_bits,
0,
ptl_request,
hdr_data);
hdr_data);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPut failed: %d",
@ -274,8 +269,6 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
ptl_me_t me;
ptl_hdr_data_t hdr_data;
ptl_size_t put_length;
ptl_handle_md_t md_h;
void *base;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_LONG_MSG);
@ -316,10 +309,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
put_length = (rndv == ompi_mtl_portals4.protocol) ?
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
ompi_mtl_portals4_get_md(start, &md_h, &base);
ret = PtlPut(md_h,
(ptl_size_t) ((char*) start - (char*) base),
ret = PtlPut(ompi_mtl_portals4.send_md_h,
(ptl_size_t) start,
put_length,
PTL_ACK_REQ,
ptl_proc,

Просмотреть файл

@ -76,13 +76,8 @@ struct ompi_osc_portals4_module_t {
ptl_handle_ni_t ni_h; /* network interface used by this window */
ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */
ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *md_h; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t *req_md_h; /* memory descriptor with event completion used by this window */
#else
ptl_handle_md_t md_h[1]; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t req_md_h[1]; /* memory descriptor with event completion used by this window */
#endif
ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */
ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */
ptl_handle_me_t control_me_h; /* match list entry for control data (node_state_t). Match bits are (CID | OSC_PORTALS4_MB_CONTROL). */
int64_t opcount;
@ -120,39 +115,6 @@ get_displacement(ompi_osc_portals4_module_t *module,
}
/*
* See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with
* platforms that don't allow us to crate an MD that covers all of
* memory.
*/
static inline void
ompi_osc_portals4_get_md(const void *ptr, const ptl_handle_md_t *array,
ptl_handle_md_t *md_h, void **base_ptr)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = array[which];
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = array[0];
*base_ptr = 0;
#endif
}
static inline int
ompi_osc_portals4_get_num_mds(void)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
int ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len);
int ompi_osc_portals4_detach(struct ompi_win_t *win, void *base);

Просмотреть файл

@ -74,8 +74,6 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int ret, i, size;
ptl_handle_md_t md_h;
void *base;
ret = ompi_osc_portals4_complete_all(module);
if (ret != OMPI_SUCCESS) return ret;
@ -84,13 +82,11 @@ ompi_osc_portals4_complete(struct ompi_win_t *win)
module->state.post_count = 0;
PtlAtomicSync();
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
size = ompi_group_size(module->start_group);
for (i = 0 ; i < size ; ++i) {
ret = PtlAtomic(md_h,
(ptl_size_t) ((char*) &module->one - (char*) base),
ret = PtlAtomic(module->md_h,
(ptl_size_t) &module->one,
sizeof(module->one),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer_group(module->start_group, i),
@ -124,8 +120,6 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int ret, i, size;
ptl_handle_md_t md_h;
void *base;
if (0 == (assert & MPI_MODE_NOCHECK)) {
OBJ_RETAIN(group);
@ -134,12 +128,10 @@ ompi_osc_portals4_post(struct ompi_group_t *group,
module->state.complete_count = 0;
PtlAtomicSync();
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
size = ompi_group_size(module->post_group);
for (i = 0 ; i < size ; ++i) {
ret = PtlAtomic(md_h,
(ptl_size_t) ((char*) &module->one - (char*) base),
ret = PtlAtomic(module->md_h,
(ptl_size_t) &module->one,
sizeof(module->one),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer_group(module->post_group, i),

Просмотреть файл

@ -197,8 +197,6 @@ ompi_osc_portals4_rput(void *origin_addr,
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
@ -228,9 +226,8 @@ ompi_osc_portals4_rput(void *origin_addr,
return ret;
}
length *= origin_count;
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
ret = PtlPut(md_h,
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
ret = PtlPut(module->req_md_h,
(ptl_size_t) origin_addr,
length,
PTL_ACK_REQ,
peer,
@ -267,8 +264,6 @@ ompi_osc_portals4_rget(void *origin_addr,
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
@ -298,9 +293,8 @@ ompi_osc_portals4_rget(void *origin_addr,
return ret;
}
length *= origin_count;
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
ret = PtlGet(md_h,
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
ret = PtlGet(module->req_md_h,
(ptl_size_t) origin_addr,
length,
peer,
module->pt_idx,
@ -338,8 +332,6 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx",
@ -372,8 +364,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
length *= origin_count;
sent = 0;
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
md_offset = ((char*) origin_addr - (char*) md_base);
md_offset = (ptl_size_t) origin_addr;
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
@ -381,7 +372,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
request->ops_expected++;
if (MPI_REPLACE == op) {
ret = PtlPut(md_h,
ret = PtlPut(module->req_md_h,
md_offset + sent,
msg_length,
PTL_ACK_REQ,
@ -398,7 +389,7 @@ ompi_osc_portals4_raccumulate(void *origin_addr,
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
ret = PtlAtomic(md_h,
ret = PtlAtomic(module->req_md_h,
offset + sent,
msg_length,
PTL_ACK_REQ,
@ -475,8 +466,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
sent = 0;
if (MPI_REPLACE == op) {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
@ -486,10 +475,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
}
length *= origin_count;
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
@ -497,9 +484,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
ret = PtlSwap(result_md_h,
ret = PtlSwap(module->req_md_h,
result_md_offset + sent,
origin_md_h,
module->md_h,
origin_md_offset + sent,
msg_length,
peer,
@ -514,8 +501,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
sent += msg_length;
} while (sent < length);
} else if (MPI_NO_OP == op) {
ptl_handle_md_t md_h;
void *md_base;
ptl_size_t md_offset;
ret = ompi_datatype_type_size(target_dt, &length);
@ -525,8 +510,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
}
length *= target_count;
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &md_h, &md_base);
md_offset = ((char*) result_addr - (char*) md_base);
md_offset = (ptl_size_t) result_addr;
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
@ -534,7 +518,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
ret = PtlGet(md_h,
ret = PtlGet(module->req_md_h,
md_offset + sent,
msg_length,
peer,
@ -545,8 +529,6 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
sent += msg_length;
} while (sent < length);
} else {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
@ -556,10 +538,8 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
}
length *= origin_count;
ompi_osc_portals4_get_md(result_addr, module->req_md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
@ -573,9 +553,9 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
ret = PtlFetchAtomic(result_md_h,
ret = PtlFetchAtomic(module->req_md_h,
result_md_offset + sent,
origin_md_h,
module->md_h,
origin_md_offset + sent,
msg_length,
peer,
@ -615,8 +595,6 @@ ompi_osc_portals4_put(void *origin_addr,
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
@ -639,9 +617,8 @@ ompi_osc_portals4_put(void *origin_addr,
return ret;
}
length *= origin_count;
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
ret = PtlPut(md_h,
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
ret = PtlPut(module->md_h,
(ptl_size_t) origin_addr,
length,
PTL_ACK_REQ,
peer,
@ -675,8 +652,6 @@ ompi_osc_portals4_get(void *origin_addr,
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
@ -699,9 +674,8 @@ ompi_osc_portals4_get(void *origin_addr,
return ret;
}
length *= origin_count;
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
ret = PtlGet(md_h,
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
ret = PtlGet(module->md_h,
(ptl_size_t) origin_addr,
length,
peer,
module->pt_idx,
@ -736,8 +710,6 @@ ompi_osc_portals4_accumulate(void *origin_addr,
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
@ -764,15 +736,14 @@ ompi_osc_portals4_accumulate(void *origin_addr,
length *= origin_count;
sent = 0;
ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
md_offset = ((char*) origin_addr - (char*) md_base);
md_offset = (ptl_size_t) origin_addr;
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
if (MPI_REPLACE == op) {
ret = PtlPut(md_h,
ret = PtlPut(module->md_h,
md_offset + sent,
msg_length,
PTL_ACK_REQ,
@ -789,7 +760,7 @@ ompi_osc_portals4_accumulate(void *origin_addr,
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
ret = PtlAtomic(md_h,
ret = PtlAtomic(module->md_h,
md_offset + sent,
msg_length,
PTL_ACK_REQ,
@ -858,8 +829,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
} else {
sent = 0;
if (MPI_REPLACE == op) {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
@ -868,19 +837,17 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
}
length *= origin_count;
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
ret = PtlSwap(result_md_h,
ret = PtlSwap(module->md_h,
result_md_offset + sent,
origin_md_h,
module->md_h,
origin_md_offset + sent,
msg_length,
peer,
@ -895,8 +862,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
sent += msg_length;
} while (sent < length);
} else if (MPI_NO_OP == op) {
ptl_handle_md_t md_h;
void *md_base;
ptl_size_t md_offset;
ret = ompi_datatype_type_size(target_dt, &length);
@ -905,15 +870,14 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
}
length *= target_count;
ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
md_offset = ((char*) result_addr - (char*) md_base);
md_offset = (ptl_size_t) result_addr;
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
ret = PtlGet(md_h,
ret = PtlGet(module->md_h,
md_offset + sent,
msg_length,
peer,
@ -924,8 +888,6 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
sent += msg_length;
} while (sent < length);
} else {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
@ -934,10 +896,8 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
}
length *= origin_count;
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
@ -951,9 +911,9 @@ ompi_osc_portals4_get_accumulate(void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
ret = PtlFetchAtomic(result_md_h,
ret = PtlFetchAtomic(module->md_h,
result_md_offset + sent,
origin_md_h,
module->md_h,
origin_md_offset + sent,
msg_length,
peer,
@ -992,8 +952,6 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr,
size_t length;
size_t offset;
ptl_datatype_t ptl_dt;
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
@ -1014,16 +972,14 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr,
assert(length < module->fetch_atomic_max);
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1);
ret = PtlSwap(result_md_h,
ret = PtlSwap(module->md_h,
result_md_offset,
origin_md_h,
module->md_h,
origin_md_offset,
length,
peer,
@ -1082,18 +1038,14 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
if (MPI_REPLACE == op) {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
ret = PtlSwap(result_md_h,
ret = PtlSwap(module->md_h,
result_md_offset,
origin_md_h,
module->md_h,
origin_md_offset,
length,
peer,
@ -1106,14 +1058,11 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
PTL_SWAP,
ptl_dt);
} else if (MPI_NO_OP == op) {
ptl_handle_md_t md_h;
void *md_base;
ptl_size_t md_offset;
ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
md_offset = ((char*) result_addr - (char*) md_base);
md_offset = (ptl_size_t) result_addr;
ret = PtlGet(md_h,
ret = PtlGet(module->md_h,
md_offset,
length,
peer,
@ -1122,21 +1071,17 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr,
offset,
NULL);
} else {
ptl_handle_md_t result_md_h, origin_md_h;
void *result_md_base, *origin_md_base;
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
result_md_offset = ((char*) result_addr - (char*) result_md_base);
ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
ret = PtlFetchAtomic(result_md_h,
ret = PtlFetchAtomic(module->md_h,
result_md_offset,
origin_md_h,
module->md_h,
origin_md_offset,
length,
peer,

Просмотреть файл

@ -441,64 +441,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
goto error;
}
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = 1ULL << OPAL_PORTALS4_MAX_MD_SIZE;
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
module->md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == module->md_h) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
module->md_h[i] = PTL_INVALID_HANDLE;
}
module->req_md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == module->req_md_h) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
module->req_md_h[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->md_h[0]);
ret = PtlMDBind(module->ni_h, &md, &module->md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
@ -511,18 +459,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h[0]);
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
me.start = 0;
me.length = SIZE_MAX;
me.length = PTL_SIZE_MAX;
} else {
me.start = *base;
me.length = size;
@ -619,12 +566,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
/* BWB: FIX ME: This is all wrong... */
if (0 != module->ct_h) PtlCTFree(module->ct_h);
if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
/* BWB: FIX ME */
#else
if (0 != module->req_md_h) PtlMDRelease(module->req_md_h[0]);
if (0 != module->md_h) PtlMDRelease(module->md_h[0]);
#endif
if (0 != module->req_md_h) PtlMDRelease(module->req_md_h);
if (0 != module->md_h) PtlMDRelease(module->md_h);
if (NULL != module->comm) ompi_comm_free(&module->comm);
if (NULL != module) free(module);
@ -659,12 +602,8 @@ ompi_osc_portals4_free(struct ompi_win_t *win)
/* cleanup */
PtlMEUnlink(module->data_me_h);
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
/* BWB: FIX ME */
#else
PtlMDRelease(module->md_h[0]);
PtlMDRelease(module->req_md_h[0]);
#endif
PtlMDRelease(module->md_h);
PtlMDRelease(module->req_md_h);
PtlCTFree(module->ct_h);
if (NULL != module->disp_units) free(module->disp_units);
ompi_comm_free(&module->comm);

Просмотреть файл

@ -44,18 +44,13 @@ lk_cas64(ompi_osc_portals4_module_t *module,
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t result_md_h, write_md_h;
void *result_base, *write_base;
(void)opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
ret = PtlSwap(result_md_h,
(char*) result_val - (char*) result_base,
write_md_h,
(char*) &write_val - (char*) write_base,
ret = PtlSwap(module->md_h,
(ptl_size_t) result_val,
module->md_h,
(ptl_size_t) &write_val,
sizeof(int64_t),
ompi_osc_portals4_get_peer(module, target),
module->pt_idx,
@ -82,15 +77,11 @@ lk_write64(ompi_osc_portals4_module_t *module,
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t md_h;
void *base;
(void)opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(&write_val, module->md_h, &md_h, &base);
ret = PtlPut(md_h,
(char*) &write_val - (char*) base,
ret = PtlPut(module->md_h,
(ptl_size_t) &write_val,
sizeof(int64_t),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer(module, target),
@ -116,18 +107,13 @@ lk_add64(ompi_osc_portals4_module_t *module,
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t result_md_h, write_md_h;
void *result_base, *write_base;
(void)opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
ret = PtlFetchAtomic(result_md_h,
(char*) result_val - (char*) result_base,
write_md_h,
(char*) &write_val - (char*) write_base,
ret = PtlFetchAtomic(module->md_h,
(ptl_size_t) result_val,
module->md_h,
(ptl_size_t) &write_val,
sizeof(int64_t),
ompi_osc_portals4_get_peer(module, target),
module->pt_idx,

Просмотреть файл

@ -139,52 +139,7 @@ btl_portals4_init_interface(void)
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface));
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = mca_btl_portals4_get_num_mds();
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
portals4_btl->send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == portals4_btl->send_md_hs) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = portals4_btl->recv_eq_h;
md.ct_handle = PTL_CT_NONE;
opal_output_verbose(50, opal_btl_base_framework.framework_output,
"Binding md from %p of length %lx",
md.start, md.length);
ret = PtlMDBind(portals4_btl->portals_ni_h,
&md,
&portals4_btl->send_md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed for NI %d: %d\n",
__FILE__, __LINE__, interface, ret);
goto error;
}
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (all memory) OK for NI %d\n", interface));
}
#else
/* Bind MD across all memory */
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
@ -200,7 +155,6 @@ btl_portals4_init_interface(void)
__FILE__, __LINE__, interface, ret);
goto error;
}
#endif
/* Handle long overflows */
me.start = NULL;
@ -653,27 +607,10 @@ void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl)
mca_btl_portals4_component_progress();
}
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
if (NULL != portals4_btl->send_md_hs) {
int i;
int num_mds = mca_btl_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(portals4_btl->send_md_hs[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(portals4_btl->send_md_hs[i]);
portals4_btl->send_md_hs[i] = PTL_INVALID_HANDLE;
}
}
free(portals4_btl->send_md_hs);
portals4_btl->send_md_hs = NULL;
}
#else
if (!PtlHandleIsEqual(portals4_btl->send_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(portals4_btl->send_md_h);
portals4_btl->send_md_h = PTL_INVALID_HANDLE;
}
#endif
if (!PtlHandleIsEqual(portals4_btl->zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(portals4_btl->zero_md_h);
portals4_btl->zero_md_h = PTL_INVALID_HANDLE;

Просмотреть файл

@ -120,12 +120,8 @@ struct mca_btl_portals4_module_t {
/** MD handle for sending ACKS */
ptl_handle_md_t zero_md_h;
/** Send MD handle(s). Use opal_mtl_portals4_get_md() to get the right md */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *send_md_hs;
#else
/** Send MD handle */
ptl_handle_md_t send_md_h;
#endif
/** long message receive overflow ME. Persistent ME, first in
overflow list on the recv_idx portal table. */
@ -177,36 +173,6 @@ typedef struct mca_btl_portals4_module_t mca_btl_portals4_module_t;
#define REQ_BTL_TABLE_ID 2
/*
* See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with
* platforms that don't allow us to crate an MD that covers all of
* memory.
*/
static inline void
opal_btl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr, mca_btl_portals4_module_t *portals4_btl)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OPAL_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = portals4_btl->send_md_hs[which];
*base_ptr = (void*) (which * (1ULL << (OPAL_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = portals4_btl->send_md_h;
*base_ptr = 0;
#endif
}
static inline int
mca_btl_portals4_get_num_mds(void)
{
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
return (1 << (OPAL_PORTALS4_MAX_VA_SIZE - OPAL_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
int mca_btl_portals4_component_progress(void);
void mca_btl_portals4_free_module(mca_btl_portals4_module_t *portals4_btl);

Просмотреть файл

@ -242,11 +242,7 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
mca_btl_portals4_module.send_md_hs = NULL;
#else
mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
#endif
mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;

Просмотреть файл

@ -36,9 +36,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
ptl_match_bits_t match_bits, msglen_type;
ptl_size_t put_length;
int64_t offset;
ptl_handle_md_t md_h;
void *base;
int ret;
frag->endpoint = endpoint;
@ -51,9 +48,6 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
opal_btl_portals4_get_md(frag->segments[0].base.seg_addr.pval, &md_h, &base, portals4_btl);
offset = (ptl_size_t) ((char*) frag->segments[0].base.seg_addr.pval - (char*) base);
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
@ -71,8 +65,8 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
put_length, (uint64_t)match_bits));
ret = PtlPut(md_h,
(ptl_size_t) offset,
ret = PtlPut(portals4_btl->send_md_h,
(ptl_size_t) frag->segments[0].base.seg_addr.pval,
put_length, /* fragment length */
(mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
endpoint->ptl_proc,
@ -85,8 +79,10 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
opal_output(opal_btl_base_framework.framework_output, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx\n",
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, (void *)offset, put_length, (uint64_t)match_bits));
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx",
(void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
(void *)frag->segments[0].base.seg_addr.pval, put_length, (uint64_t)match_bits));
return OPAL_SUCCESS;
}