1
1

* Update Portals 4 MTL's multi-md code to be a bit cleaner (no if statements

in the path) and not create MDs due to boundary crossing
* Add the same logic to the Coll component

This commit was SVN r28733.
Этот коммит содержится в:
Brian Barrett 2013-07-08 21:27:37 +00:00
родитель 315da8125d
Коммит ecbbf888d3
10 изменённых файлов: 376 добавлений и 240 удалений

Просмотреть файл

@ -59,6 +59,36 @@ AC_DEFUN([OMPI_CHECK_PORTALS4],[
LDFLAGS="$ompi_check_portals4_$1_save_LDFLAGS"
LIBS="$ompi_check_portals4_$1_save_LIBS"
max_md_size=0
AC_ARG_WITH([portals4-max-md-size],
[AC_HELP_STRING([--with-portals4-max-md-size=SIZE],
[Log base 2 of the maximum size in bytes of a memory descriptor. Should only be set for implementations which do not support binding all of virtual address space.])])
AS_IF([test "$with_portals4_max_md_size" = "yes" -o "$with_portals4_max_md_size" = "no"],
[AC_MSG_ERROR([--with-portals4-max-md-size requires an integer argument])],
[AS_IF([test -n "$with_portals4_max_md_size"],
[max_md_size="$with_portals4_max_md_size"])])
AC_DEFINE_UNQUOTED([OMPI_PORTALS4_MAX_MD_SIZE], [$max_md_size],
[Log base 2 of the maximum size in bytes of a memory descriptor. Set to 0 if MD can bind all of memory.])
max_va_size=0
AC_ARG_WITH([portals4-max-va-size],
[AC_HELP_STRING([--with-portals4-max-va-size=SIZE],
[Log base 2 of the maximum size in bytes of the user virtual address space. Should only be set for implementations which do not support binding all of virtual address space.])])
AS_IF([test "$with_portals4_max_va_size" = "yes" -o "$with_portals4_max_va_size" = "no"],
[AC_MSG_ERROR([--with-portals4-max-va-size requires an integer argument])],
[AS_IF([test -n "$with_portals4_max_va_size"],
[max_va_size="$with_portals4_max_va_size"])])
AC_DEFINE_UNQUOTED([OMPI_PORTALS4_MAX_VA_SIZE], [$max_va_size],
[Log base 2 of the maximum size in bytes of the user virtual address space. Set to 0 if MD can bind all of memory.])
AS_IF([test \( $max_md_size -eq 0 -a $max_va_size -ne 0 \) -o \( $max_md_size -ne 0 -a $max_va_size -eq 0 \)],
[AC_ERROR([If either --with-portals4-max-md-size or --with-portals4-max-va-size is set, both must be set.])])
AS_IF([test $max_md_size -ge $max_va_size],
[max_md_size=0
max_va_size=0])
AS_IF([test $max_md_size -ne 0 -a $max_va_size -ne 0],
[AC_MSG_NOTICE([Portals 4 address space size: $max_md_size, $max_va_size])])
AS_IF([test "$ompi_check_portals4_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_portals4" -a "$with_portals4" != "no"],

Просмотреть файл

@ -35,7 +35,12 @@ struct mca_coll_portals4_component_t {
ptl_handle_eq_t eq_h;
ptl_handle_me_t barrier_unex_me_h;
ptl_handle_me_t finish_me_h;
/** Send MD handle(s). Use ompi_coll_portals4_get_md() to get the right md */
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *md_hs;
#else
ptl_handle_md_t md_h;
#endif
ompi_free_list_t requests; /* request free list for the i collectives */
};
@ -66,7 +71,7 @@ struct ompi_coll_portals4_request_t;
#define COLL_PORTALS4_BARRIER 0x01
#define MTL_PORTALS4_SET_BITS(match_bits, contextid, eager, type, op_count) \
#define COLL_PORTALS4_SET_BITS(match_bits, contextid, eager, type, op_count) \
{ \
match_bits = contextid; \
match_bits = (match_bits << 1); \
@ -103,6 +108,34 @@ ompi_coll_portals4_get_nchildren(int cube_dim, int hibit, int rank, int size)
return guess;
}
/*
* See note in mtl/portals4/mtl_portals4.h for why this exists.
*/
static inline void
ompi_coll_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = mca_coll_portals4_component.md_hs[which];
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = mca_coll_portals4_component.md_h;
*base_ptr = 0;
#endif
}
static inline int
ompi_coll_portals4_get_num_mds(void)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
return (1 << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
END_C_DECLS

Просмотреть файл

@ -35,6 +35,10 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
ptl_me_t me;
size_t count;
ptl_match_bits_t match_bits;
ptl_handle_md_t md_h;
void *base;
ompi_coll_portals4_get_md(0, &md_h, &base);
count = opal_atomic_add_size_t(&portals4_module->barrier_count, 1);
@ -47,7 +51,7 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
MTL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
0, COLL_PORTALS4_BARRIER, count);
/* Build "tree" out of hypercube */
@ -87,18 +91,18 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
/* send to parent when children have sent to us */
if (rank > 0) {
int parent = rank & ~(1 << hibit);
PtlTriggeredPut(mca_coll_portals4_component.md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, parent),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
ct_h,
num_msgs);
ret = PtlTriggeredPut(md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, parent),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
ct_h,
num_msgs);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
@ -114,18 +118,18 @@ ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
int peer = rank | mask;
if (peer < size) {
PtlTriggeredPut(mca_coll_portals4_component.md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, peer),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
ct_h,
num_msgs);
ret = PtlTriggeredPut(md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, peer),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
ct_h,
num_msgs);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
@ -177,6 +181,10 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
size_t count;
ptl_match_bits_t match_bits;
ompi_coll_portals4_request_t *request;
ptl_handle_md_t md_h;
void *base;
ompi_coll_portals4_get_md(0, &md_h, &base);
OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request);
if (NULL == request) {
@ -199,7 +207,7 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
MTL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
0, COLL_PORTALS4_BARRIER, count);
/* Build "tree" out of hypercube */
@ -238,18 +246,19 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
/* send to parent when children have sent to us */
if (rank > 0) {
int parent = rank & ~(1 << hibit);
PtlTriggeredPut(mca_coll_portals4_component.md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, parent),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
request->ct_h,
num_msgs);
ret = PtlTriggeredPut(md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, parent),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
request->ct_h,
num_msgs);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
@ -265,18 +274,18 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
int peer = rank | mask;
if (peer < size) {
PtlTriggeredPut(mca_coll_portals4_component.md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, peer),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
request->ct_h,
num_msgs);
ret = PtlTriggeredPut(md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, peer),
mca_coll_portals4_component.pt_idx,
match_bits,
0,
NULL,
0,
request->ct_h,
num_msgs);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
@ -287,16 +296,18 @@ ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
}
/* Send a put to self when we've received all our messages... */
PtlPut(mca_coll_portals4_component.md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, rank),
mca_coll_portals4_component.finish_pt_idx,
0,
0,
NULL,
(uintptr_t) request);
ret = PtlTriggeredPut(md_h,
0,
0,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, rank),
mca_coll_portals4_component.finish_pt_idx,
0,
0,
NULL,
(uintptr_t) request,
request->ct_h,
num_msgs);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,

Просмотреть файл

@ -74,12 +74,6 @@ mca_coll_portals4_component_t mca_coll_portals4_component = {
portals4_init_query,
portals4_comm_query
},
PTL_INVALID_HANDLE,
-1,
-1,
PTL_INVALID_HANDLE,
PTL_INVALID_HANDLE,
PTL_INVALID_HANDLE
};
@ -102,6 +96,19 @@ static int
portals4_open(void)
{
int ret;
mca_coll_portals4_component.ni_h = PTL_INVALID_HANDLE;
mca_coll_portals4_component.uid = PTL_UID_ANY;
mca_coll_portals4_component.pt_idx = -1;
mca_coll_portals4_component.finish_pt_idx = -1;
mca_coll_portals4_component.eq_h = PTL_INVALID_HANDLE;
mca_coll_portals4_component.barrier_unex_me_h = PTL_INVALID_HANDLE;
mca_coll_portals4_component.finish_me_h = PTL_INVALID_HANDLE;
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
mca_coll_portals4_component.md_hs = NULL;
#else
mca_coll_portals4_component.md_h = PTL_INVALID_HANDLE;
#endif
OBJ_CONSTRUCT(&mca_coll_portals4_component.requests, ompi_free_list_t);
ret = ompi_free_list_init(&mca_coll_portals4_component.requests,
@ -129,6 +136,25 @@ portals4_close(void)
OBJ_DESTRUCT(&mca_coll_portals4_component.requests);
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
if (NULL != mca_coll_portals4_component.md_hs) {
int i;
int num_mds = ompi_coll_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(mca_coll_portals4_component.md_hs[i], PTL_INVALID_HANDLE)) {
ret = PtlMDRelease(mca_coll_portals4_component.md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlMDRelease failed: %d\n",
__FILE__, __LINE__, ret);
}
}
}
free(mca_coll_portals4_component.md_hs);
}
#else
if (!PtlHandleIsEqual(mca_coll_portals4_component.md_h, PTL_INVALID_HANDLE)) {
ret = PtlMDRelease(mca_coll_portals4_component.md_h);
if (PTL_OK != ret) {
@ -137,6 +163,7 @@ portals4_close(void)
__FILE__, __LINE__, ret);
}
}
#endif
if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) {
ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h);
if (PTL_OK != ret) {
@ -274,21 +301,62 @@ portals4_init_query(bool enable_progress_threads,
return OMPI_ERROR;
}
/* send space... */
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
&md,
&mca_coll_portals4_component.md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_coll_portals4_get_num_mds();
ptl_size_t size = OMPI_PORTALS4_MAX_MD_SIZE;
ptl_size_t offset_unit = OMPI_PORTALS4_MAX_MD_SIZE / 2;
mca_coll_portals4_component.md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == mca_coll_portals4_component.md_hs) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
for (i = 0 ; i < num_mds ; ++i) {
mca_coll_portals4_component.md_hs[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
&md,
&mca_coll_portals4_component.md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
}
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_coll_portals4_component.ni_h,
&md,
&mca_coll_portals4_component.md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
#endif
/* setup finish ack ME */
me.start = NULL;
@ -326,7 +394,7 @@ portals4_init_query(bool enable_progress_threads,
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
MTL_PORTALS4_SET_BITS(me.match_bits, 0, 0, COLL_PORTALS4_BARRIER, 0);
COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, COLL_PORTALS4_BARRIER, 0);
me.ignore_bits = COLL_PORTALS4_CID_MASK | COLL_PORTALS4_OP_COUNT_MASK;
ret = PtlMEAppend(mca_coll_portals4_component.ni_h,

Просмотреть файл

@ -155,15 +155,20 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
if (0 != ompi_mtl_portals4.fixed_md_h) {
int i, fixed_md_nb;
for (i = 0 ; i < num_mds ; ++i) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
for (i=0; i< fixed_md_nb; i++) PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
free(ompi_mtl_portals4.fixed_md_h);
free(ompi_mtl_portals4.send_md_hs);
}
#else
PtlMDRelease(ompi_mtl_portals4.send_md_h);
#endif
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);

Просмотреть файл

@ -32,9 +32,6 @@
BEGIN_C_DECLS
#define MEMORY_MAX_SIZE ((long int)1<<48)
#define EXTENDED_ADDR (0xffff000000000000)
struct mca_mtl_portals4_send_request_t;
struct mca_mtl_portals4_module_t {
@ -75,9 +72,12 @@ struct mca_mtl_portals4_module_t {
/** MD handle for sending ACKS */
ptl_handle_md_t zero_md_h;
/** Fixed MD handles covering all of memory for sending normal messages */
ptl_handle_md_t *fixed_md_h;
uint64_t fixed_md_distance;
/** Send MD handle(s). Use ompi_mtl_portals4_get_md() to get the right md */
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *send_md_hs;
#else
ptl_handle_md_t send_md_h;
#endif
/** long message receive overflow ME. Persistent ME, first in
overflow list on the recv_idx portal table. */
@ -206,6 +206,64 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
/*
* Not all implementations of Portals 4 support binding a memory
* descriptor which covers all of memory, but all support covering a
* large fraction of memory. Therefore, rather than working around
* the issue by pinning per message, we use a number of memory
* descriptors to cover all of memory. As long as the maximum memory
* descriptor is a large fraction of the user virtual address space
* (like 46 bit MDs on a platform with 47 bits of user virtual address
* space), this works fine.
*
* Our scheme is to create N memory descriptors which contiguously
* cover the entire user address space, then another N-1 contiguous
* memory descriptors offset by 1/2 the size of the MD, then a final
* memory descriptor of 1/2 the size of the other MDs covering the top
* of the memory space, to avoid if statements in the critical path. This
* scheme allows for a maximum message size of 1/2 the size of the MD
* without ever crossing an MD boundary. Also, because MD sizes are
* always on a power of 2 in this scheme, computing the offsets and MD
* selection are quick, using only bit shift and mask.q
*
* ompi_mtl_portals4_get_md() relies heavily on compiler constant folding.
* "mask" can be constant folded into a constant. "which" compiler folds
* into a bit shift of a register a constant number of times, then masked
* by a constant (the input is, unfortunately, not constant).
*
* In the case where an MD can cover all of memory,
* ompi_mtl_portals4_get_md() will be compiled into two assignments.
* Assuming the function inlines (and it certainly should be), the two
* assignments should be optimized into register assignments for the
* Portals call relatively easily.
*/
static inline void
ompi_mtl_portals4_get_md(const void *ptr, ptl_handle_md_t *md_h, void **base_ptr)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = ompi_mtl_portals4.send_md_hs[which];
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = ompi_mtl_portals4.send_md_h;
*base_ptr = 0;
#endif
}
static inline int
ompi_mtl_portals4_get_num_mds(void)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
return (1 << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
/* MTL interface functions */
extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl);

Просмотреть файл

@ -37,8 +37,6 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
OMPI_MODULE_DECLSPEC extern mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component;
static unsigned int ompi_mtl_portals4_md_size_bit_width;
mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component = {
/* First, the mca_base_component_t struct containing meta
@ -136,18 +134,6 @@ ompi_mtl_portals4_component_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_portals4.recv_queue_size);
ompi_mtl_portals4_md_size_bit_width = 48;
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
"md_size_bit_width",
"Number of bits used to specify the length of an MD to the portals4 library",
MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_portals4_md_size_bit_width);
ompi_mtl_portals4.protocol = eager;
mca_base_var_enum_create("mtl_portals4_long_protocol", long_protocol_values, &new_enum);
ret = mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
@ -171,17 +157,10 @@ ompi_mtl_portals4_component_register(void)
static int
ompi_mtl_portals4_component_open(void)
{
unsigned int i;
uint64_t fixed_md_nb;
ompi_mtl_portals4.base.mtl_request_size =
sizeof(ompi_mtl_portals4_request_t) -
sizeof(struct mca_mtl_request_t);
if (48 < ompi_mtl_portals4_md_size_bit_width) ompi_mtl_portals4_md_size_bit_width = 48;
ompi_mtl_portals4.fixed_md_distance = (unsigned long int) 1<<ompi_mtl_portals4_md_size_bit_width;
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"fixed_md_distance=%16.16lx\n", ompi_mtl_portals4.fixed_md_distance);
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Flow control: "
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
@ -218,14 +197,11 @@ ompi_mtl_portals4_component_open(void)
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
if (fixed_md_nb > 32) ompi_mtl_portals4.fixed_md_distance = 0;
else {
/* Allocate the md_h table */
ompi_mtl_portals4.fixed_md_h = malloc(fixed_md_nb * sizeof(ptl_handle_md_t));
for (i=0; i<fixed_md_nb; i++) ompi_mtl_portals4.fixed_md_h[i] = PTL_INVALID_HANDLE;
}
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
ompi_mtl_portals4.send_md_hs = NULL;
#else
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
#endif
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
@ -369,35 +345,42 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
goto error;
}
/* bind fixed md across all of memory */
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = 1ULL << OMPI_PORTALS4_MAX_MD_SIZE;
ptl_size_t offset_unit = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) / 2;
if (ompi_mtl_portals4.fixed_md_distance) {
unsigned int i;
uint64_t fixed_md_nb, fixed_md_distance;
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == ompi_mtl_portals4.send_md_hs) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
fixed_md_distance = ompi_mtl_portals4.fixed_md_distance;
if (MEMORY_MAX_SIZE > fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/fixed_md_distance;
else fixed_md_nb = 1;
for (i = 0 ; i < num_mds ; ++i) {
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
}
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Fixed MDs :\n");
/* Bind the fixed MDs */
for (i=0; i<fixed_md_nb; i++) {
uint64_t offset = i * fixed_md_distance;
/* if the most significant bit of the address space is set, set the extended address bits */
if (offset & (MEMORY_MAX_SIZE >> 1)) offset += EXTENDED_ADDR;
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, " %2d: [ %16lx - %16lx ]\n", i, offset, offset + fixed_md_distance - 2);
md.start = (char *) offset;
md.length = fixed_md_distance - 1;
md.options = 0;
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"Binding md from %p of length %lx",
md.start, md.length);
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.fixed_md_h[i]);
&ompi_mtl_portals4.send_md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
@ -406,7 +389,23 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
}
}
}
else opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "No fixed MD\n");
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
/* Handle long overflows */
me.start = NULL;
@ -479,18 +478,24 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
}
if (ompi_mtl_portals4.fixed_md_distance) {
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
if (NULL != ompi_mtl_portals4.send_md_hs) {
int i;
int fixed_md_nb;
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
int num_mds = ompi_mtl_portals4_get_num_mds();
for (i=0; i<fixed_md_nb; i++) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.fixed_md_h[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
}
free(ompi_mtl_portals4.send_md_hs);
}
#else
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_h);
}
#endif
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
}

Просмотреть файл

@ -80,6 +80,9 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
ptl_md_t md;
/* FIX ME: This needs to fit into the send eq somehow;
this won't trigger flow control, which could cause
badness... */
md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
md.length = ((msg_length > ptl_request->delivery_len) ?
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;

Просмотреть файл

@ -48,7 +48,6 @@ struct ompi_mtl_portals4_isend_request_t {
ompi_mtl_portals4_base_request_t super;
void *buffer_ptr;
ptl_handle_me_t me_h;
ptl_handle_md_t md_h;
uint64_t opcount;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
struct ompi_mtl_portals4_pending_request_t *pending;

Просмотреть файл

@ -32,12 +32,6 @@
#endif
static int ompi_mtl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
ompi_mtl_portals4_isend_request_t *ptl_request,
bool unlink_me);
static inline int
ompi_mtl_portals4_callback(ptl_event_t *ev,
ompi_mtl_portals4_base_request_t* ptl_base_request,
@ -62,8 +56,6 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
}
pending->fc_notified = 1;
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
ret = PtlMEUnlink(ptl_request->me_h);
if (PTL_OK != ret) {
@ -120,15 +112,6 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
if (NULL != ptl_request->buffer_ptr) {
free(ptl_request->buffer_ptr);
}
if (PTL_INVALID_HANDLE != ptl_request->md_h) {
ret = PtlMDRelease(ptl_request->md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: send callback PtlMDRelease returned %d",
__FILE__, __LINE__, ret);
retval = OMPI_ERROR;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "send %lu completed",
ptl_request->opcount));
@ -199,8 +182,8 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_match_bits_t match_bits;
ptl_me_t me;
ptl_hdr_data_t hdr_data;
int64_t offset;
ptl_handle_md_t md_h;
void *base;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_SHORT_MSG);
@ -248,13 +231,15 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_request->opcount, hdr_data, match_bits));
}
ret = ompi_mtl_portals4_try_to_use_fixed_md(start, length, &md_h, &offset, ptl_request, MCA_PML_BASE_SEND_SYNCHRONOUS == mode ? true : false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
ompi_mtl_portals4_get_md(start, &md_h, &base);
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Send %lu, start: %p, base: %p, offset: %lx",
ptl_request->opcount, start, base,
(ptl_size_t) ((char*) start - (char*) base)));
ret = PtlPut(md_h,
(ptl_size_t) offset,
(ptl_size_t) ((char*) start - (char*) base),
length,
PTL_ACK_REQ,
endpoint->ptl_proc,
@ -270,7 +255,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
PtlMEUnlink(ptl_request->me_h);
}
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
return ompi_mtl_portals4_get_error(ret);
}
@ -289,7 +273,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
ptl_hdr_data_t hdr_data;
ptl_size_t put_length;
ptl_handle_md_t md_h;
int64_t offset;
void *base;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_LONG_MSG);
@ -330,13 +314,10 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
put_length = (rndv == ompi_mtl_portals4.protocol) ?
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
ompi_mtl_portals4_try_to_use_fixed_md(start, put_length, &md_h, &offset, ptl_request, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
ompi_mtl_portals4_get_md(start, &md_h, &base);
ret = PtlPut(md_h,
(ptl_size_t) offset,
(ptl_size_t) ((char*) start - (char*) base),
put_length,
PTL_ACK_REQ,
endpoint->ptl_proc,
@ -350,7 +331,6 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
PtlMEUnlink(ptl_request->me_h);
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
return ompi_mtl_portals4_get_error(ret);
}
@ -565,59 +545,3 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
return ret;
}
static int
ompi_mtl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
ompi_mtl_portals4_isend_request_t *ptl_request,
bool unlink_me)
{
int ret;
ptl_md_t md;
int64_t addr;
addr = ((int64_t)start & ~EXTENDED_ADDR);
/* If fixed_md_distance is defined for MD and if the memory buffer is strictly contained in one of them, then use one */
if ((0 != ompi_mtl_portals4.fixed_md_distance) &&
(((addr % ompi_mtl_portals4.fixed_md_distance) + length) < ompi_mtl_portals4.fixed_md_distance)) {
if (0 == length) OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, " Memory : [ %16lx - (len = 0) ] is in fixed MD number: %d\n\n",
start, addr / ompi_mtl_portals4.fixed_md_distance));
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, " Memory : [ %16lx - %16lx ] is in fixed MD number: %d\n\n",
start, (long int)start + length - 1, addr / ompi_mtl_portals4.fixed_md_distance));
/* Use the fixed MD */
*md_h = ompi_mtl_portals4.fixed_md_h[addr / ompi_mtl_portals4.fixed_md_distance];
*offset = (addr % ompi_mtl_portals4.fixed_md_distance);
ptl_request->md_h = PTL_INVALID_HANDLE;
}
else {
if (0 == ompi_mtl_portals4.fixed_md_distance)
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "\nWARNING: Memory cannot be connected to a fixed MD\n"));
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "\nWARNING: Memory outside the scope of the fixed MD %x\n\n", addr / ompi_mtl_portals4.fixed_md_distance));
/* Bind the MD (and unbind it where necessary) */
md.start = start;
md.length = length;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
if (unlink_me) {
PtlMEUnlink(ptl_request->me_h);
}
return ompi_mtl_portals4_get_error(ret);
}
*md_h = ptl_request->md_h;
*offset = 0;
}
return OMPI_SUCCESS;
}