workaround for case when MD can't span all of memory (sigh)
This commit was SVN r28132.
Этот коммит содержится в:
родитель
a951fde1ec
Коммит
1370d4569a
@ -155,7 +155,16 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
|
||||
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
PtlMDRelease(ompi_mtl_portals4.md_h);
|
||||
|
||||
if (0 != ompi_mtl_portals4.fixed_md_h) {
|
||||
int i, fixed_md_nb;
|
||||
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
for (i=0; i< fixed_md_nb; i++) PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
|
||||
free(ompi_mtl_portals4.fixed_md_h);
|
||||
}
|
||||
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
PtlEQFree(ompi_mtl_portals4.send_eq_h);
|
||||
|
@ -32,6 +32,9 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MEMORY_MAX_SIZE ((long int)1<<48)
|
||||
#define EXTENDED_ADDR (0xffff000000000000)
|
||||
|
||||
struct mca_mtl_portals4_send_request_t;
|
||||
|
||||
struct mca_mtl_portals4_module_t {
|
||||
@ -70,8 +73,9 @@ struct mca_mtl_portals4_module_t {
|
||||
/** MD handle for sending ACKS */
|
||||
ptl_handle_md_t zero_md_h;
|
||||
|
||||
/** MD handle covering all of memory for sending normal messages */
|
||||
ptl_handle_md_t md_h;
|
||||
/** Fixed MD handles covering all of memory for sending normal messages */
|
||||
ptl_handle_md_t *fixed_md_h;
|
||||
uint64_t fixed_md_distance;
|
||||
|
||||
/** long message receive overflow ME. Persistent ME, first in
|
||||
overflow list on the recv_idx portal table. */
|
||||
|
@ -67,6 +67,8 @@ ompi_mtl_portals4_component_open(void)
|
||||
{
|
||||
int tmp;
|
||||
char *tmp_proto;
|
||||
int i;
|
||||
uint64_t fixed_md_nb;
|
||||
|
||||
ompi_mtl_portals4.base.mtl_request_size =
|
||||
sizeof(ompi_mtl_portals4_request_t) -
|
||||
@ -117,6 +119,18 @@ ompi_mtl_portals4_component_open(void)
|
||||
&tmp);
|
||||
ompi_mtl_portals4.recv_queue_size = tmp;
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_portals4_component.mtl_version,
|
||||
"md_size_bit_width",
|
||||
"Number of bits used to specify the length of an MD to the portals4 library",
|
||||
false,
|
||||
false,
|
||||
48,
|
||||
&tmp);
|
||||
if (48 < tmp) tmp = 48;
|
||||
ompi_mtl_portals4.fixed_md_distance = (unsigned long int) 1<<tmp;
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"fixed_md_distance=%16.16lx\n", ompi_mtl_portals4.fixed_md_distance);
|
||||
|
||||
mca_base_param_reg_string(&mca_mtl_portals4_component.mtl_version,
|
||||
"long_protocol",
|
||||
"Protocol to use for long messages. Valid entries are eager and rndv",
|
||||
@ -169,7 +183,16 @@ ompi_mtl_portals4_component_open(void)
|
||||
ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
if (fixed_md_nb > 32) ompi_mtl_portals4.fixed_md_distance = 0;
|
||||
else {
|
||||
/* Allocate the md_h table */
|
||||
ompi_mtl_portals4.fixed_md_h = malloc(fixed_md_nb * sizeof(ptl_handle_md_t));
|
||||
for (i=0; i<fixed_md_nb; i++) ompi_mtl_portals4.fixed_md_h[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
|
||||
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
|
||||
@ -304,22 +327,44 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* bind md across all of memory */
|
||||
md.start = 0;
|
||||
md.length = SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
/* bind fixed md across all of memory */
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
if (ompi_mtl_portals4.fixed_md_distance) {
|
||||
int i;
|
||||
uint64_t fixed_md_nb, fixed_md_distance;
|
||||
|
||||
fixed_md_distance = ompi_mtl_portals4.fixed_md_distance;
|
||||
if (MEMORY_MAX_SIZE > fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_output, "Fixed MDs :\n");
|
||||
|
||||
/* Bind the fixed MDs */
|
||||
for (i=0; i<fixed_md_nb; i++) {
|
||||
uint64_t offset = i * fixed_md_distance;
|
||||
/* if the most significant bit of the address space is set, set the extended address bits */
|
||||
if (offset & (MEMORY_MAX_SIZE >> 1)) offset += EXTENDED_ADDR;
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_output, " %2d: [ %16lx - %16lx ]\n", i, offset, offset + fixed_md_distance - 2);
|
||||
|
||||
md.start = (char *) offset;
|
||||
md.length = fixed_md_distance - 1;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.fixed_md_h[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
else opal_output_verbose(1, ompi_mtl_base_output, "No fixed MD\n");
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
@ -392,8 +437,17 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.md_h);
|
||||
if (ompi_mtl_portals4.fixed_md_distance) {
|
||||
int i;
|
||||
int fixed_md_nb;
|
||||
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
|
||||
else fixed_md_nb = 1;
|
||||
|
||||
for (i=0; i<fixed_md_nb; i++) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.fixed_md_h[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
|
@ -48,6 +48,7 @@ struct ompi_mtl_portals4_isend_request_t {
|
||||
ompi_mtl_portals4_base_request_t super;
|
||||
void *buffer_ptr;
|
||||
ptl_handle_me_t me_h;
|
||||
ptl_handle_md_t md_h;
|
||||
uint64_t opcount;
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
struct ompi_mtl_portals4_pending_request_t *pending;
|
||||
|
@ -32,6 +32,12 @@
|
||||
#endif
|
||||
|
||||
|
||||
static int ompi_mtl_portals4_try_to_use_fixed_md(void *start,
|
||||
int length,
|
||||
ptl_handle_md_t *md_h,
|
||||
int64_t *offset,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request,
|
||||
bool unlink_me);
|
||||
static inline int
|
||||
ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
ompi_mtl_portals4_base_request_t* ptl_base_request,
|
||||
@ -56,6 +62,8 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
}
|
||||
pending->fc_notified = 1;
|
||||
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
|
||||
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
|
||||
ret = PtlMEUnlink(ptl_request->me_h);
|
||||
if (PTL_OK != ret) {
|
||||
@ -112,8 +120,19 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
||||
if (NULL != ptl_request->buffer_ptr) {
|
||||
free(ptl_request->buffer_ptr);
|
||||
}
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) {
|
||||
ret = PtlMDRelease(ptl_request->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: send callback PtlMDRelease returned %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
retval = OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send %lu completed",
|
||||
ptl_request->opcount));
|
||||
|
||||
*complete = true;
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||
@ -180,6 +199,8 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_match_bits_t match_bits;
|
||||
ptl_me_t me;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
int64_t offset;
|
||||
ptl_handle_md_t md_h;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_SHORT_MSG);
|
||||
@ -227,8 +248,13 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
ptl_request->opcount, hdr_data, match_bits));
|
||||
}
|
||||
|
||||
ret = PtlPut(ompi_mtl_portals4.md_h,
|
||||
(ptl_size_t) start,
|
||||
ret = ompi_mtl_portals4_try_to_use_fixed_md(start, length, &md_h, &offset, ptl_request, MCA_PML_BASE_SEND_SYNCHRONOUS == mode ? true : false);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
endpoint->ptl_proc,
|
||||
@ -244,6 +270,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
}
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
|
||||
@ -261,6 +288,8 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
ptl_me_t me;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
ptl_size_t put_length;
|
||||
ptl_handle_md_t md_h;
|
||||
int64_t offset;
|
||||
|
||||
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
|
||||
MTL_PORTALS4_LONG_MSG);
|
||||
@ -300,8 +329,14 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
|
||||
put_length = (rndv == ompi_mtl_portals4.protocol) ?
|
||||
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
|
||||
ret = PtlPut(ompi_mtl_portals4.md_h,
|
||||
(ptl_size_t) start,
|
||||
|
||||
ompi_mtl_portals4_try_to_use_fixed_md(start, put_length, &md_h, &offset, ptl_request, true);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
(ptl_size_t) offset,
|
||||
put_length,
|
||||
PTL_ACK_REQ,
|
||||
endpoint->ptl_proc,
|
||||
@ -315,6 +350,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
|
||||
@ -530,3 +566,58 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
ompi_mtl_portals4_try_to_use_fixed_md(void *start,
|
||||
int length,
|
||||
ptl_handle_md_t *md_h,
|
||||
int64_t *offset,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request,
|
||||
bool unlink_me)
|
||||
{
|
||||
int ret;
|
||||
ptl_md_t md;
|
||||
int64_t addr;
|
||||
|
||||
addr = ((int64_t)start & ~EXTENDED_ADDR);
|
||||
|
||||
/* If fixed_md_distance is defined for MD and if the memory buffer is strictly contained in one of them, then use one */
|
||||
if ((0 != ompi_mtl_portals4.fixed_md_distance) &&
|
||||
(((addr % ompi_mtl_portals4.fixed_md_distance) + length) < ompi_mtl_portals4.fixed_md_distance)) {
|
||||
if (0 == length) OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, " Memory : [ %16lx - (len = 0) ] is in fixed MD number: %d\n\n",
|
||||
start, addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, " Memory : [ %16lx - %16lx ] is in fixed MD number: %d\n\n",
|
||||
start, (long int)start + length - 1, addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
/* Use the fixed MD */
|
||||
*md_h = ompi_mtl_portals4.fixed_md_h[addr / ompi_mtl_portals4.fixed_md_distance];
|
||||
*offset = (addr % ompi_mtl_portals4.fixed_md_distance);
|
||||
ptl_request->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
else {
|
||||
if (0 == ompi_mtl_portals4.fixed_md_distance)
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "\nWARNING: Memory cannot be connected to a fixed MD\n"));
|
||||
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "\nWARNING: Memory outside the scope of the fixed MD %x\n\n", addr / ompi_mtl_portals4.fixed_md_distance));
|
||||
|
||||
/* Bind the MD (and unbind it where necessary) */
|
||||
md.start = start;
|
||||
md.length = length;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ptl_request->md_h);
|
||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
if (unlink_me) {
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
}
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
*md_h = ptl_request->md_h;
|
||||
*offset = 0;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user