1
1

workaround for case when MD can't span all of memory (sigh)

This commit was SVN r28132.
Этот коммит содержится в:
Brian Barrett 2013-02-27 17:02:45 +00:00
родитель a951fde1ec
Коммит 1370d4569a
5 изменённых файлов: 183 добавлений и 24 удалений

Просмотреть файл

@ -155,7 +155,16 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
PtlMDRelease(ompi_mtl_portals4.md_h);
if (0 != ompi_mtl_portals4.fixed_md_h) {
int i, fixed_md_nb;
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
for (i=0; i< fixed_md_nb; i++) PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
free(ompi_mtl_portals4.fixed_md_h);
}
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
PtlEQFree(ompi_mtl_portals4.send_eq_h);

Просмотреть файл

@ -32,6 +32,9 @@
BEGIN_C_DECLS
#define MEMORY_MAX_SIZE ((long int)1<<48)
#define EXTENDED_ADDR (0xffff000000000000)
struct mca_mtl_portals4_send_request_t;
struct mca_mtl_portals4_module_t {
@ -70,8 +73,9 @@ struct mca_mtl_portals4_module_t {
/** MD handle for sending ACKS */
ptl_handle_md_t zero_md_h;
/** MD handle covering all of memory for sending normal messages */
ptl_handle_md_t md_h;
/** Fixed MD handles covering all of memory for sending normal messages */
ptl_handle_md_t *fixed_md_h;
uint64_t fixed_md_distance;
/** long message receive overflow ME. Persistent ME, first in
overflow list on the recv_idx portal table. */

Просмотреть файл

@ -67,6 +67,8 @@ ompi_mtl_portals4_component_open(void)
{
int tmp;
char *tmp_proto;
int i;
uint64_t fixed_md_nb;
ompi_mtl_portals4.base.mtl_request_size =
sizeof(ompi_mtl_portals4_request_t) -
@ -117,6 +119,18 @@ ompi_mtl_portals4_component_open(void)
&tmp);
ompi_mtl_portals4.recv_queue_size = tmp;
mca_base_param_reg_int(&mca_mtl_portals4_component.mtl_version,
"md_size_bit_width",
"Number of bits used to specify the length of an MD to the portals4 library",
false,
false,
48,
&tmp);
if (48 < tmp) tmp = 48;
ompi_mtl_portals4.fixed_md_distance = (unsigned long int) 1<<tmp;
opal_output_verbose(1, ompi_mtl_base_output,
"fixed_md_distance=%16.16lx\n", ompi_mtl_portals4.fixed_md_distance);
mca_base_param_reg_string(&mca_mtl_portals4_component.mtl_version,
"long_protocol",
"Protocol to use for long messages. Valid entries are eager and rndv",
@ -169,7 +183,16 @@ ompi_mtl_portals4_component_open(void)
ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.md_h = PTL_INVALID_HANDLE;
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
if (fixed_md_nb > 32) ompi_mtl_portals4.fixed_md_distance = 0;
else {
/* Allocate the md_h table */
ompi_mtl_portals4.fixed_md_h = malloc(fixed_md_nb * sizeof(ptl_handle_md_t));
for (i=0; i<fixed_md_nb; i++) ompi_mtl_portals4.fixed_md_h[i] = PTL_INVALID_HANDLE;
}
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
@ -304,22 +327,44 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
goto error;
}
/* bind md across all of memory */
md.start = 0;
md.length = SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
/* bind fixed md across all of memory */
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
if (ompi_mtl_portals4.fixed_md_distance) {
int i;
uint64_t fixed_md_nb, fixed_md_distance;
fixed_md_distance = ompi_mtl_portals4.fixed_md_distance;
if (MEMORY_MAX_SIZE > fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/fixed_md_distance;
else fixed_md_nb = 1;
opal_output_verbose(1, ompi_mtl_base_output, "Fixed MDs :\n");
/* Bind the fixed MDs */
for (i=0; i<fixed_md_nb; i++) {
uint64_t offset = i * fixed_md_distance;
/* if the most significant bit of the address space is set, set the extended address bits */
if (offset & (MEMORY_MAX_SIZE >> 1)) offset += EXTENDED_ADDR;
opal_output_verbose(1, ompi_mtl_base_output, " %2d: [ %16lx - %16lx ]\n", i, offset, offset + fixed_md_distance - 2);
md.start = (char *) offset;
md.length = fixed_md_distance - 1;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.fixed_md_h[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
}
else opal_output_verbose(1, ompi_mtl_base_output, "No fixed MD\n");
/* Handle long overflows */
me.start = NULL;
@ -392,8 +437,17 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.md_h);
if (ompi_mtl_portals4.fixed_md_distance) {
int i;
int fixed_md_nb;
if (MEMORY_MAX_SIZE > ompi_mtl_portals4.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/ompi_mtl_portals4.fixed_md_distance;
else fixed_md_nb = 1;
for (i=0; i<fixed_md_nb; i++) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.fixed_md_h[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.fixed_md_h[i]);
}
}
}
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);

Просмотреть файл

@ -48,6 +48,7 @@ struct ompi_mtl_portals4_isend_request_t {
ompi_mtl_portals4_base_request_t super;
void *buffer_ptr;
ptl_handle_me_t me_h;
ptl_handle_md_t md_h;
uint64_t opcount;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
struct ompi_mtl_portals4_pending_request_t *pending;

Просмотреть файл

@ -32,6 +32,12 @@
#endif
static int ompi_mtl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
ompi_mtl_portals4_isend_request_t *ptl_request,
bool unlink_me);
static inline int
ompi_mtl_portals4_callback(ptl_event_t *ev,
ompi_mtl_portals4_base_request_t* ptl_base_request,
@ -56,6 +62,8 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
}
pending->fc_notified = 1;
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
ret = PtlMEUnlink(ptl_request->me_h);
if (PTL_OK != ret) {
@ -112,8 +120,19 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
if (NULL != ptl_request->buffer_ptr) {
free(ptl_request->buffer_ptr);
}
if (PTL_INVALID_HANDLE != ptl_request->md_h) {
ret = PtlMDRelease(ptl_request->md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: send callback PtlMDRelease returned %d",
__FILE__, __LINE__, ret);
retval = OMPI_ERROR;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send %lu completed",
ptl_request->opcount));
*complete = true;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
@ -180,6 +199,8 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_match_bits_t match_bits;
ptl_me_t me;
ptl_hdr_data_t hdr_data;
int64_t offset;
ptl_handle_md_t md_h;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_SHORT_MSG);
@ -227,8 +248,13 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ptl_request->opcount, hdr_data, match_bits));
}
ret = PtlPut(ompi_mtl_portals4.md_h,
(ptl_size_t) start,
ret = ompi_mtl_portals4_try_to_use_fixed_md(start, length, &md_h, &offset, ptl_request, MCA_PML_BASE_SEND_SYNCHRONOUS == mode ? true : false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
ret = PtlPut(md_h,
(ptl_size_t) offset,
length,
PTL_ACK_REQ,
endpoint->ptl_proc,
@ -244,6 +270,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
PtlMEUnlink(ptl_request->me_h);
}
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
return ompi_mtl_portals4_get_error(ret);
}
@ -261,6 +288,8 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
ptl_me_t me;
ptl_hdr_data_t hdr_data;
ptl_size_t put_length;
ptl_handle_md_t md_h;
int64_t offset;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_LONG_MSG);
@ -300,8 +329,14 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
put_length = (rndv == ompi_mtl_portals4.protocol) ?
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
ret = PtlPut(ompi_mtl_portals4.md_h,
(ptl_size_t) start,
ompi_mtl_portals4_try_to_use_fixed_md(start, put_length, &md_h, &offset, ptl_request, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
ret = PtlPut(md_h,
(ptl_size_t) offset,
put_length,
PTL_ACK_REQ,
endpoint->ptl_proc,
@ -315,6 +350,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
PtlMEUnlink(ptl_request->me_h);
if (PTL_INVALID_HANDLE != ptl_request->md_h) PtlMDRelease(ptl_request->md_h);
return ompi_mtl_portals4_get_error(ret);
}
@ -530,3 +566,58 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
return ret;
}
static int
ompi_mtl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
ompi_mtl_portals4_isend_request_t *ptl_request,
bool unlink_me)
{
int ret;
ptl_md_t md;
int64_t addr;
addr = ((int64_t)start & ~EXTENDED_ADDR);
/* If fixed_md_distance is defined for MD and if the memory buffer is strictly contained in one of them, then use one */
if ((0 != ompi_mtl_portals4.fixed_md_distance) &&
(((addr % ompi_mtl_portals4.fixed_md_distance) + length) < ompi_mtl_portals4.fixed_md_distance)) {
if (0 == length) OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, " Memory : [ %16lx - (len = 0) ] is in fixed MD number: %d\n\n",
start, addr / ompi_mtl_portals4.fixed_md_distance));
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, " Memory : [ %16lx - %16lx ] is in fixed MD number: %d\n\n",
start, (long int)start + length - 1, addr / ompi_mtl_portals4.fixed_md_distance));
/* Use the fixed MD */
*md_h = ompi_mtl_portals4.fixed_md_h[addr / ompi_mtl_portals4.fixed_md_distance];
*offset = (addr % ompi_mtl_portals4.fixed_md_distance);
ptl_request->md_h = PTL_INVALID_HANDLE;
}
else {
if (0 == ompi_mtl_portals4.fixed_md_distance)
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "\nWARNING: Memory cannot be connected to a fixed MD\n"));
else OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "\nWARNING: Memory outside the scope of the fixed MD %x\n\n", addr / ompi_mtl_portals4.fixed_md_distance));
/* Bind the MD (and unbind it where necessary) */
md.start = start;
md.length = length;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
if (unlink_me) {
PtlMEUnlink(ptl_request->me_h);
}
return ompi_mtl_portals4_get_error(ret);
}
*md_h = ptl_request->md_h;
*offset = 0;
}
return OMPI_SUCCESS;
}