* Update Portals 4 MTL's multi-md code to be a bit cleaner (no if statements
in the path) and not create MDs due to boundary crossing * Add the same logic to the Coll component This commit was SVN r28733.
Этот коммит содержится в:
родитель
315da8125d
Коммит
ecbbf888d3
@ -59,6 +59,36 @@ AC_DEFUN([OMPI_CHECK_PORTALS4],[
|
||||
LDFLAGS="$ompi_check_portals4_$1_save_LDFLAGS"
|
||||
LIBS="$ompi_check_portals4_$1_save_LIBS"
|
||||
|
||||
max_md_size=0
|
||||
AC_ARG_WITH([portals4-max-md-size],
|
||||
[AC_HELP_STRING([--with-portals4-max-md-size=SIZE],
|
||||
[Log base 2 of the maximum size in bytes of a memory descriptor. Should only be set for implementations which do not support binding all of virtual address space.])])
|
||||
AS_IF([test "$with_portals4_max_md_size" = "yes" -o "$with_portals4_max_md_size" = "no"],
|
||||
[AC_MSG_ERROR([--with-portals4-max-md-size requires an integer argument])],
|
||||
[AS_IF([test -n "$with_portals4_max_md_size"],
|
||||
[max_md_size="$with_portals4_max_md_size"])])
|
||||
AC_DEFINE_UNQUOTED([OMPI_PORTALS4_MAX_MD_SIZE], [$max_md_size],
|
||||
[Log base 2 of the maximum size in bytes of a memory descriptor. Set to 0 if MD can bind all of memory.])
|
||||
|
||||
max_va_size=0
|
||||
AC_ARG_WITH([portals4-max-va-size],
|
||||
[AC_HELP_STRING([--with-portals4-max-va-size=SIZE],
|
||||
[Log base 2 of the maximum size in bytes of the user virtual address space. Should only be set for implementations which do not support binding all of virtual address space.])])
|
||||
AS_IF([test "$with_portals4_max_va_size" = "yes" -o "$with_portals4_max_va_size" = "no"],
|
||||
[AC_MSG_ERROR([--with-portals4-max-va-size requires an integer argument])],
|
||||
[AS_IF([test -n "$with_portals4_max_va_size"],
|
||||
[max_va_size="$with_portals4_max_va_size"])])
|
||||
AC_DEFINE_UNQUOTED([OMPI_PORTALS4_MAX_VA_SIZE], [$max_va_size],
|
||||
[Log base 2 of the maximum size in bytes of the user virtual address space. Set to 0 if MD can bind all of memory.])
|
||||
|
||||
AS_IF([test \( $max_md_size -eq 0 -a $max_va_size -ne 0 \) -o \( $max_md_size -ne 0 -a $max_va_size -eq 0 \)],
|
||||
[AC_ERROR([If either --with-portals4-max-md-size or --with-portals4-max-va-size is set, both must be set.])])
|
||||
AS_IF([test $max_md_size -ge $max_va_size],
|
||||
[max_md_size=0
|
||||
max_va_size=0])
|
||||
AS_IF([test $max_md_size -ne 0 -a $max_va_size -ne 0],
|
||||
[AC_MSG_NOTICE([Portals 4 address space size: $max_md_size, $max_va_size])])
|
||||
|
||||
AS_IF([test "$ompi_check_portals4_happy" = "yes"],
|
||||
[$2],
|
||||
[AS_IF([test ! -z "$with_portals4" -a "$with_portals4" != "no"],
|
||||
|
@ -35,7 +35,12 @@ struct mca_coll_portals4_component_t {
|
||||
ptl_handle_eq_t eq_h;
|
||||
ptl_handle_me_t barrier_unex_me_h;
|
||||
ptl_handle_me_t finish_me_h;
|
||||
/** Send MD handle(s). Use ompi_coll_portals4_get_md() to get the right md */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
ptl_handle_md_t *md_hs;
|
||||
#else
|
||||
ptl_handle_md_t md_h;
|
||||
#endif
|
||||
|
||||
ompi_free_list_t requests; /* request free list for the i collectives */
|
||||
};
|
||||
@ -66,7 +71,7 @@ struct ompi_coll_portals4_request_t;
|
||||
|
||||
#define COLL_PORTALS4_BARRIER 0x01
|
||||
|
||||
#define MTL_PORTALS4_SET_BITS(match_bits, contextid, eager, type, op_count) \
|
||||
#define COLL_PORTALS4_SET_BITS(match_bits, contextid, eager, type, op_count) \
|
||||
{ \
|
||||
match_bits = contextid; \
|
||||
match_bits = (match_bits << 1); \
|
||||
@ -103,6 +108,34 @@ ompi_coll_portals4_get_nchildren(int cube_dim, int hibit, int rank, int size)
|
||||
return guess;
|
||||
}
|
||||
|
||||
/*
|
||||
* See note in mtl/portals4/mtl_portals4.h for why this exists.
|
||||
*/
|
||||
static inline void
|
||||
ompi_coll_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
|
||||
{
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = mca_coll_portals4_component.md_hs[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = mca_coll_portals4_component.md_h;
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_coll_portals4_get_num_mds(void)
|
||||
{
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
return (1 << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1));
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -35,6 +35,10 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
|
||||
ptl_me_t me;
|
||||
size_t count;
|
||||
ptl_match_bits_t match_bits;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
ompi_coll_portals4_get_md(0, &md_h, &base);
|
||||
|
||||
count = opal_atomic_add_size_t(&portals4_module->barrier_count, 1);
|
||||
|
||||
@ -47,7 +51,7 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
MTL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
|
||||
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
|
||||
0, COLL_PORTALS4_BARRIER, count);
|
||||
|
||||
/* Build "tree" out of hypercube */
|
||||
@ -87,18 +91,18 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
|
||||
/* send to parent when children have sent to us */
|
||||
if (rank > 0) {
|
||||
int parent = rank & ~(1 << hibit);
|
||||
PtlTriggeredPut(mca_coll_portals4_component.md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, parent),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ct_h,
|
||||
num_msgs);
|
||||
ret = PtlTriggeredPut(md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, parent),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ct_h,
|
||||
num_msgs);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlTriggeredPut failed: %d\n",
|
||||
@ -114,18 +118,18 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
|
||||
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
|
||||
int peer = rank | mask;
|
||||
if (peer < size) {
|
||||
PtlTriggeredPut(mca_coll_portals4_component.md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, peer),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ct_h,
|
||||
num_msgs);
|
||||
ret = PtlTriggeredPut(md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, peer),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ct_h,
|
||||
num_msgs);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlTriggeredPut failed: %d\n",
|
||||
@ -177,6 +181,10 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
size_t count;
|
||||
ptl_match_bits_t match_bits;
|
||||
ompi_coll_portals4_request_t *request;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
ompi_coll_portals4_get_md(0, &md_h, &base);
|
||||
|
||||
OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request);
|
||||
if (NULL == request) {
|
||||
@ -199,7 +207,7 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
MTL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
|
||||
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
|
||||
0, COLL_PORTALS4_BARRIER, count);
|
||||
|
||||
/* Build "tree" out of hypercube */
|
||||
@ -238,18 +246,19 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
/* send to parent when children have sent to us */
|
||||
if (rank > 0) {
|
||||
int parent = rank & ~(1 << hibit);
|
||||
PtlTriggeredPut(mca_coll_portals4_component.md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, parent),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
request->ct_h,
|
||||
num_msgs);
|
||||
|
||||
ret = PtlTriggeredPut(md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, parent),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
request->ct_h,
|
||||
num_msgs);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlTriggeredPut failed: %d\n",
|
||||
@ -265,18 +274,18 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
|
||||
int peer = rank | mask;
|
||||
if (peer < size) {
|
||||
PtlTriggeredPut(mca_coll_portals4_component.md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, peer),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
request->ct_h,
|
||||
num_msgs);
|
||||
ret = PtlTriggeredPut(md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, peer),
|
||||
mca_coll_portals4_component.pt_idx,
|
||||
match_bits,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
request->ct_h,
|
||||
num_msgs);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlTriggeredPut failed: %d\n",
|
||||
@ -287,16 +296,18 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
|
||||
}
|
||||
|
||||
/* Send a put to self when we've received all our messages... */
|
||||
PtlPut(mca_coll_portals4_component.md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, rank),
|
||||
mca_coll_portals4_component.finish_pt_idx,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
(uintptr_t) request);
|
||||
ret = PtlTriggeredPut(md_h,
|
||||
0,
|
||||
0,
|
||||
PTL_NO_ACK_REQ,
|
||||
ompi_coll_portals4_get_peer(comm, rank),
|
||||
mca_coll_portals4_component.finish_pt_idx,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
(uintptr_t) request,
|
||||
request->ct_h,
|
||||
num_msgs);
|
||||
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
|
@ -74,12 +74,6 @@ mca_coll_portals4_component_t mca_coll_portals4_component = {
|
||||
portals4_init_query,
|
||||
portals4_comm_query
|
||||
},
|
||||
PTL_INVALID_HANDLE,
|
||||
-1,
|
||||
-1,
|
||||
PTL_INVALID_HANDLE,
|
||||
PTL_INVALID_HANDLE,
|
||||
PTL_INVALID_HANDLE
|
||||
};
|
||||
|
||||
|
||||
@ -102,6 +96,19 @@ static int
|
||||
portals4_open(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mca_coll_portals4_component.ni_h = PTL_INVALID_HANDLE;
|
||||
mca_coll_portals4_component.uid = PTL_UID_ANY;
|
||||
mca_coll_portals4_component.pt_idx = -1;
|
||||
mca_coll_portals4_component.finish_pt_idx = -1;
|
||||
mca_coll_portals4_component.eq_h = PTL_INVALID_HANDLE;
|
||||
mca_coll_portals4_component.barrier_unex_me_h = PTL_INVALID_HANDLE;
|
||||
mca_coll_portals4_component.finish_me_h = PTL_INVALID_HANDLE;
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
mca_coll_portals4_component.md_hs = NULL;
|
||||
#else
|
||||
mca_coll_portals4_component.md_h = PTL_INVALID_HANDLE;
|
||||
#endif
|
||||
|
||||
OBJ_CONSTRUCT(&mca_coll_portals4_component.requests, ompi_free_list_t);
|
||||
ret = ompi_free_list_init(&mca_coll_portals4_component.requests,
|
||||
@ -129,6 +136,25 @@ portals4_close(void)
|
||||
|
||||
OBJ_DESTRUCT(&mca_coll_portals4_component.requests);
|
||||
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != mca_coll_portals4_component.md_hs) {
|
||||
int i;
|
||||
int num_mds = ompi_coll_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(mca_coll_portals4_component.md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMDRelease(mca_coll_portals4_component.md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlMDRelease failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(mca_coll_portals4_component.md_hs);
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(mca_coll_portals4_component.md_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMDRelease(mca_coll_portals4_component.md_h);
|
||||
if (PTL_OK != ret) {
|
||||
@ -137,6 +163,7 @@ portals4_close(void)
|
||||
__FILE__, __LINE__, ret);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h);
|
||||
if (PTL_OK != ret) {
|
||||
@ -274,21 +301,62 @@ portals4_init_query(bool enable_progress_threads,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* send space... */
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
|
||||
&md,
|
||||
&mca_coll_portals4_component.md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return OMPI_ERROR;
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_coll_portals4_get_num_mds();
|
||||
ptl_size_t size = OMPI_PORTALS4_MAX_MD_SIZE;
|
||||
ptl_size_t offset_unit = OMPI_PORTALS4_MAX_MD_SIZE / 2;
|
||||
|
||||
mca_coll_portals4_component.md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == mca_coll_portals4_component.md_hs) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
mca_coll_portals4_component.md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
|
||||
&md,
|
||||
&mca_coll_portals4_component.md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
|
||||
&md,
|
||||
&mca_coll_portals4_component.md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* setup finish ack ME */
|
||||
me.start = NULL;
|
||||
@ -326,7 +394,7 @@ portals4_init_query(bool enable_progress_threads,
|
||||
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
MTL_PORTALS4_SET_BITS(me.match_bits, 0, 0, COLL_PORTALS4_BARRIER, 0);
|
||||
COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, COLL_PORTALS4_BARRIER, 0);
|
||||
me.ignore_bits = COLL_PORTALS4_CID_MASK | COLL_PORTALS4_OP_COUNT_MASK;
|
||||
|
||||
ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
|
||||
|
@ -155,15 +155,20 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
|
||||
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
if (0 != ompi_mtl_portals4.fixed_md_h) {
|
||||
int i, fixed_md_nb;
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
for (i=0; i< fixed_md_nb; i++) PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
|
||||
free(ompi_mtl_portals4.fixed_md_h);
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
#endif
|
||||
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
|
@ -32,9 +32,6 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MEMORY_MAX_SIZE ((long int)1<<48)
|
||||
#define EXTENDED_ADDR (0xffff000000000000)
|
||||
|
||||
struct mca_mtl_portals4_send_request_t;
|
||||
|
||||
struct mca_mtl_portals4_module_t {
|
||||
@ -75,9 +72,12 @@ struct mca_mtl_portals4_module_t {
|
||||
/** MD handle for sending ACKS */
|
||||
ptl_handle_md_t zero_md_h;
|
||||
|
||||
/** Fixed MD handles covering all of memory for sending normal messages */
|
||||
ptl_handle_md_t *fixed_md_h;
|
||||
uint64_t fixed_md_distance;
|
||||
/** Send MD handle(s). Use ompi_mtl_portals4_get_md() to get the right md */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
ptl_handle_md_t *send_md_hs;
|
||||
#else
|
||||
ptl_handle_md_t send_md_h;
|
||||
#endif
|
||||
|
||||
/** long message receive overflow ME. Persistent ME, first in
|
||||
overflow list on the recv_idx portal table. */
|
||||
@ -206,6 +206,64 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
|
||||
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
|
||||
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
|
||||
|
||||
|
||||
/*
|
||||
* Not all implementations of Portals 4 support binding a memory
|
||||
* descriptor which covers all of memory, but all support covering a
|
||||
* large fraction of memory. Therefore, rather than working around
|
||||
* the issue by pinning per message, we use a number of memory
|
||||
* descriptors to cover all of memory. As long as the maximum memory
|
||||
* descriptor is a large fraction of the user virtual address space
|
||||
* (like 46 bit MDs on a platform with 47 bits of user virtual address
|
||||
* space), this works fine.
|
||||
*
|
||||
* Our scheme is to create N memory descriptors which contiguously
|
||||
* cover the entire user address space, then another N-1 contiguous
|
||||
* memory descriptors offset by 1/2 the size of the MD, then a final
|
||||
* memory descriptor of 1/2 the size of the other MDs covering the top
|
||||
* of the memory space, to avoid if statements in the critical path. This
|
||||
* scheme allows for a maximum message size of 1/2 the size of the MD
|
||||
* without ever crossing an MD boundary. Also, because MD sizes are
|
||||
* always on a power of 2 in this scheme, computing the offsets and MD
|
||||
* selection are quick, using only bit shift and mask.q
|
||||
*
|
||||
* ompi_mtl_portals4_get_md() relies heavily on compiler constant folding.
|
||||
* "mask" can be constant folded into a constant. "which" compiler folds
|
||||
* into a bit shift of a register a constant number of times, then masked
|
||||
* by a constant (the input is, unfortunately, not constant).
|
||||
*
|
||||
* In the case where an MD can cover all of memory,
|
||||
* ompi_mtl_portals4_get_md() will be compiled into two assignments.
|
||||
* Assuming the function inlines (and it certainly should be), the two
|
||||
* assignments should be optimized into register assignments for the
|
||||
* Portals call relatively easily.
|
||||
*/
|
||||
static inline void
|
||||
ompi_mtl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
|
||||
{
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
|
||||
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
|
||||
*md_h = ompi_mtl_portals4.send_md_hs[which];
|
||||
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
|
||||
#else
|
||||
*md_h = ompi_mtl_portals4.send_md_h;
|
||||
*base_ptr = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_mtl_portals4_get_num_mds(void)
|
||||
{
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
return (1 << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1));
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* MTL interface functions */
|
||||
extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl);
|
||||
|
||||
|
@ -37,8 +37,6 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component;
|
||||
|
||||
static unsigned int ompi_mtl_portals4_md_size_bit_width;
|
||||
|
||||
mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component = {
|
||||
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
@ -136,18 +134,6 @@ ompi_mtl_portals4_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_portals4.recv_queue_size);
|
||||
|
||||
ompi_mtl_portals4_md_size_bit_width = 48;
|
||||
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||
"md_size_bit_width",
|
||||
"Number of bits used to specify the length of an MD to the portals4 library",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_portals4_md_size_bit_width);
|
||||
|
||||
ompi_mtl_portals4.protocol = eager;
|
||||
mca_base_var_enum_create("mtl_portals4_long_protocol", long_protocol_values, &new_enum);
|
||||
ret = mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||
@ -171,17 +157,10 @@ ompi_mtl_portals4_component_register(void)
|
||||
static int
|
||||
ompi_mtl_portals4_component_open(void)
|
||||
{
|
||||
unsigned int i;
|
||||
uint64_t fixed_md_nb;
|
||||
|
||||
ompi_mtl_portals4.base.mtl_request_size =
|
||||
sizeof(ompi_mtl_portals4_request_t) -
|
||||
sizeof(struct mca_mtl_request_t);
|
||||
|
||||
if (48 < ompi_mtl_portals4_md_size_bit_width) ompi_mtl_portals4_md_size_bit_width = 48;
|
||||
ompi_mtl_portals4.fixed_md_distance = (unsigned long int) 1<<ompi_mtl_portals4_md_size_bit_width;
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"fixed_md_distance=%16.16lx\n", ompi_mtl_portals4.fixed_md_distance);
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"Flow control: "
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
@ -218,14 +197,11 @@ ompi_mtl_portals4_component_open(void)
|
||||
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
if (fixed_md_nb > 32) ompi_mtl_portals4.fixed_md_distance = 0;
|
||||
else {
|
||||
/* Allocate the md_h table */
|
||||
ompi_mtl_portals4.fixed_md_h = malloc(fixed_md_nb * sizeof(ptl_handle_md_t));
|
||||
for (i=0; i<fixed_md_nb; i++) ompi_mtl_portals4.fixed_md_h[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
ompi_mtl_portals4.send_md_hs = NULL;
|
||||
#else
|
||||
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
|
||||
#endif
|
||||
|
||||
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
|
||||
@ -369,35 +345,42 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* bind fixed md across all of memory */
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
ptl_size_t size = 1ULL << OMPI_PORTALS4_MAX_MD_SIZE;
|
||||
ptl_size_t offset_unit = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
if (ompi_mtl_portals4.fixed_md_distance) {
|
||||
unsigned int i;
|
||||
uint64_t fixed_md_nb, fixed_md_distance;
|
||||
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == ompi_mtl_portals4.send_md_hs) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
fixed_md_distance = ompi_mtl_portals4.fixed_md_distance;
|
||||
if (MEMORY_MAX_SIZE > fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Fixed MDs :\n");
|
||||
|
||||
/* Bind the fixed MDs */
|
||||
for (i=0; i<fixed_md_nb; i++) {
|
||||
uint64_t offset = i * fixed_md_distance;
|
||||
/* if the most significant bit of the address space is set, set the extended address bits */
|
||||
if (offset & (MEMORY_MAX_SIZE >> 1)) offset += EXTENDED_ADDR;
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, " %2d: [ %16lx - %16lx ]\n", i, offset, offset + fixed_md_distance - 2);
|
||||
|
||||
md.start = (char *) offset;
|
||||
md.length = fixed_md_distance - 1;
|
||||
md.options = 0;
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.fixed_md_h[i]);
|
||||
&ompi_mtl_portals4.send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
@ -406,7 +389,23 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
}
|
||||
}
|
||||
}
|
||||
else opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "No fixed MD\n");
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
@ -479,18 +478,24 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
}
|
||||
if (ompi_mtl_portals4.fixed_md_distance) {
|
||||
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != ompi_mtl_portals4.send_md_hs) {
|
||||
int i;
|
||||
int fixed_md_nb;
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
for (i=0; i<fixed_md_nb; i++) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.fixed_md_h[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
}
|
||||
#endif
|
||||
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
}
|
||||
|
@ -80,6 +80,9 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
|
||||
ptl_md_t md;
|
||||
|
||||
/* FIX ME: This needs to fit into the send eq somehow;
|
||||
this won't trigger flow control, which could cause
|
||||
badness... */
|
||||
md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
|
||||
md.length = ((msg_length > ptl_request->delivery_len) ?
|
||||
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;
|
||||
|
@ -48,7 +48,6 @@ struct ompi_mtl_portals4_isend_request_t {
|
||||
ompi_mtl_portals4_base_request_t super;
|
||||
void *buffer_ptr;
|
||||
ptl_handle_me_t me_h;
|
||||
ptl_handle_md_t md_h;
|
||||
uint64_t opcount;
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
struct ompi_mtl_portals4_pending_request_t *pending;
|
||||
|
@ -32,12 +32,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
static int ompi_mtl_portals4_try_to_use_fixed_md(void *start,
|
||||
int length,
|
||||
ptl_handle_md_t *md_h,
|
||||
int64_t *offset,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request,
|
||||
bool unlink_me);
|
||||
static inline int
|
||||
ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
ompi_mtl_portals4_base_request_t* ptl_base_request,
|
||||
@ -62,8 +56,6 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
}
|
||||
pending->fc_notified = 1;
|
||||
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
|
||||
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMEUnlink(ptl_request->me_h);
|
||||
if (PTL_OK != ret) {
|
||||
@ -120,15 +112,6 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
if (NULL != ptl_request->buffer_ptr) {
|
||||
free(ptl_request->buffer_ptr);
|
||||
}
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) {
|
||||
ret = PtlMDRelease(ptl_request->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: send callback PtlMDRelease returned %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
retval = OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "send %lu completed",
|
||||
ptl_request->opcount));
|
||||
@ -199,8 +182,8 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_match_bits_t match_bits;
|
||||
ptl_me_t me;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
int64_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
void *base;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_SHORT_MSG);
|
||||
@ -248,13 +231,15 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_request->opcount, hdr_data, match_bits));
|
||||
}
|
||||
|
||||
ret = ompi_mtl_portals4_try_to_use_fixed_md(start, length, &md_h, &offset, ptl_request, MCA_PML_BASE_SEND_SYNCHRONOUS == mode ? true : false);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
ompi_mtl_portals4_get_md(start, &md_h, &base);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||
"Send %lu, start: %p, base: %p, offset: %lx",
|
||||
ptl_request->opcount, start, base,
|
||||
(ptl_size_t) ((char*) start - (char*) base)));
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
endpoint->ptl_proc,
|
||||
@ -270,7 +255,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
}
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
|
||||
@ -289,7 +273,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
ptl_hdr_data_t hdr_data;
|
||||
ptl_size_t put_length;
|
||||
ptl_handle_md_t md_h;
|
||||
int64_t offset;
|
||||
void *base;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_LONG_MSG);
|
||||
@ -330,13 +314,10 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
put_length = (rndv == ompi_mtl_portals4.protocol) ?
|
||||
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
|
||||
|
||||
ompi_mtl_portals4_try_to_use_fixed_md(start, put_length, &md_h, &offset, ptl_request, true);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
ompi_mtl_portals4_get_md(start, &md_h, &base);
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
put_length,
|
||||
PTL_ACK_REQ,
|
||||
endpoint->ptl_proc,
|
||||
@ -350,7 +331,6 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
|
||||
@ -565,59 +545,3 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
ompi_mtl_portals4_try_to_use_fixed_md(void *start,
|
||||
int length,
|
||||
ptl_handle_md_t *md_h,
|
||||
int64_t *offset,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request,
|
||||
bool unlink_me)
|
||||
{
|
||||
int ret;
|
||||
ptl_md_t md;
|
||||
int64_t addr;
|
||||
|
||||
addr = ((int64_t)start & ~EXTENDED_ADDR);
|
||||
|
||||
/* If fixed_md_distance is defined for MD and if the memory buffer is strictly contained in one of them, then use one */
|
||||
if ((0 != ompi_mtl_portals4.fixed_md_distance) &&
|
||||
(((addr % ompi_mtl_portals4.fixed_md_distance) + length) < ompi_mtl_portals4.fixed_md_distance)) {
|
||||
if (0 == length) OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, " Memory : [ %16lx - (len = 0) ] is in fixed MD number: %d\n\n",
|
||||
start, addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, " Memory : [ %16lx - %16lx ] is in fixed MD number: %d\n\n",
|
||||
start, (long int)start + length - 1, addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
/* Use the fixed MD */
|
||||
*md_h = ompi_mtl_portals4.fixed_md_h[addr / ompi_mtl_portals4.fixed_md_distance];
|
||||
*offset = (addr % ompi_mtl_portals4.fixed_md_distance);
|
||||
ptl_request->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
else {
|
||||
if (0 == ompi_mtl_portals4.fixed_md_distance)
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "\nWARNING: Memory cannot be connected to a fixed MD\n"));
|
||||
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "\nWARNING: Memory outside the scope of the fixed MD %x\n\n", addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
|
||||
/* Bind the MD (and unbind it where necessary) */
|
||||
md.start = start;
|
||||
md.length = length;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ptl_request->md_h);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
if (unlink_me) {
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
}
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
*md_h = ptl_request->md_h;
|
||||
*offset = 0;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user