1
1

mtl-portals4: add the option to use the Portals4 logical to physical table

This commit adds an MCA variable to select Portals4 logical
addressing, populates the logical-to-physical mapping table and
initializes the NI in this mode.
Этот коммит содержится в:
Todd Kordenbrock 2015-04-14 09:48:07 -05:00
родитель 5ea1f1c12b
Коммит 35e5ffd001
9 изменённых файлов: 393 добавлений и 270 удалений

Просмотреть файл

@ -56,6 +56,235 @@ mca_mtl_portals4_module_t ompi_mtl_portals4 = {
}
};
static int
portals4_init_interface(void)
{
unsigned int ret;
ptl_md_t md;
ptl_me_t me;
/* create event queues */
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.send_queue_size,
&ompi_mtl_portals4.send_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.recv_queue_size,
&ompi_mtl_portals4.recv_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* Create send and long message (read) portal table entries */
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
PTL_PT_ONLY_USE_ONCE |
PTL_PT_ONLY_TRUNCATE |
PTL_PT_FLOWCTRL,
ompi_mtl_portals4.recv_eq_h,
REQ_RECV_TABLE_ID,
&ompi_mtl_portals4.recv_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
PTL_PT_ONLY_USE_ONCE |
PTL_PT_ONLY_TRUNCATE,
ompi_mtl_portals4.send_eq_h,
REQ_READ_TABLE_ID,
&ompi_mtl_portals4.read_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* bind zero-length md for sending acks */
md.start = NULL;
md.length = 0;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.zero_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == ompi_mtl_portals4.send_md_hs) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"Binding md from %p of length %lx",
md.start, md.length);
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
/* Handle long overflows */
me.start = NULL;
me.length = 0;
me.ct_handle = PTL_CT_NONE;
me.min_free = 0;
me.uid = ompi_mtl_portals4.uid;
me.options = PTL_ME_OP_PUT |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_COMM_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
if (ompi_mtl_portals4.use_logical) {
me.match_id.rank = PTL_RANK_ANY;
} else {
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
}
me.match_bits = MTL_PORTALS4_LONG_MSG;
me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
MTL_PORTALS4_SOURCE_MASK |
MTL_PORTALS4_TAG_MASK;
ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.recv_idx,
&me,
PTL_OVERFLOW_LIST,
NULL,
&ompi_mtl_portals4.long_overflow_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* attach short unex recv blocks */
ret = ompi_mtl_portals4_recv_short_init();
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: short receive block initialization failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ompi_mtl_portals4.opcount = 0;
#if OPAL_ENABLE_DEBUG
ompi_mtl_portals4.recv_opcount = 0;
#endif
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
ret = ompi_mtl_portals4_flowctl_init();
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
error:
if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
}
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
if (NULL != ompi_mtl_portals4.send_md_hs) {
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
}
free(ompi_mtl_portals4.send_md_hs);
}
#else
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_h);
}
#endif
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
}
if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
PtlEQFree(ompi_mtl_portals4.send_eq_h);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
PtlEQFree(ompi_mtl_portals4.recv_eq_h);
}
return OMPI_ERROR;
}
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
@ -65,6 +294,17 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
int ret, me;
size_t i;
bool new_found = false;
ptl_process_t *maptable;
if (ompi_mtl_portals4.use_logical) {
maptable = malloc(sizeof(ptl_process_t) * nprocs);
if (NULL == maptable) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: malloc failed\n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
/* Get the list of ptl_process_id_t from the runtime and copy into structure */
for (i = 0 ; i < nprocs ; ++i) {
@ -108,14 +348,34 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
__FILE__, __LINE__, ret);
return OMPI_ERR_OUT_OF_RESOURCE;
}
*peer_id = *modex_id;
if (ompi_mtl_portals4.use_logical) {
peer_id->rank = i;
maptable[i].phys.pid = modex_id->phys.pid;
maptable[i].phys.nid = modex_id->phys.nid;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"logical: global rank=%d pid=%d nid=%d\n",
(int)i, maptable[i].phys.pid, maptable[i].phys.nid);
} else {
*peer_id = *modex_id;
}
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;
new_found = true;
} else {
ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
if (proc->phys.nid != modex_id->phys.nid ||
proc->phys.pid != modex_id->phys.pid) {
if (ompi_mtl_portals4.use_logical) {
if ((size_t)proc->rank != i) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: existing peer and rank don't match\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
maptable[i].phys.pid = modex_id->phys.pid;
maptable[i].phys.nid = modex_id->phys.nid;
}
else if (proc->phys.nid != modex_id->phys.nid ||
proc->phys.pid != modex_id->phys.pid) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: existing peer and modex peer don't match\n",
__FILE__, __LINE__);
@ -124,6 +384,30 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
}
}
if (ompi_mtl_portals4.use_logical) {
ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: logical mapping failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"logical mapping OK\n");
free(maptable);
}
portals4_init_interface();
/* activate progress callback */
ret = opal_progress_register(ompi_mtl_portals4_progress);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: opal_progress_register failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
if (new_found) {
ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);

Просмотреть файл

@ -38,6 +38,9 @@ struct mca_mtl_portals4_send_request_t;
struct mca_mtl_portals4_module_t {
mca_mtl_base_module_t base;
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
int use_logical;
/** Eager limit; messages greater than this use a rendezvous protocol */
unsigned long long eager_limit;
/** Size of short message blocks */

Просмотреть файл

@ -80,6 +80,18 @@ ompi_mtl_portals4_component_register(void)
mca_base_var_enum_t *new_enum;
int ret;
ompi_mtl_portals4.use_logical = 0;
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
"use_logical",
"Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false)",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_portals4.use_logical);
param_priority = 10;
(void) mca_base_component_var_register (&mca_mtl_portals4_component.mtl_version,
"priority", "Priority of the Portals4 MTL component",
@ -253,8 +265,6 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
{
int ret;
ptl_process_t id;
ptl_md_t md;
ptl_me_t me;
/* Initialize Portals and create a physical, matching interface */
ret = PtlInit();
@ -265,7 +275,14 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
return NULL;
}
ret = PtlNIInit(PTL_IFACE_DEFAULT,
if (ompi_mtl_portals4.use_logical)
ret = PtlNIInit(PTL_IFACE_DEFAULT,
PTL_NI_LOGICAL | PTL_NI_MATCHING,
PTL_PID_ANY,
NULL,
NULL,
&ompi_mtl_portals4.ni_h);
else ret = PtlNIInit(PTL_IFACE_DEFAULT,
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
PTL_PID_ANY,
NULL,
@ -287,10 +304,10 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
}
/* Publish our NID/PID in the modex */
ret = PtlGetId(ompi_mtl_portals4.ni_h, &id);
ret = PtlGetPhysId(ompi_mtl_portals4.ni_h, &id);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlGetId failed: %d\n",
"%s:%d: PtlGetPhysId failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
@ -309,233 +326,9 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
"My nid,pid = %x,%x",
id.phys.nid, id.phys.pid));
/* create event queues */
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.send_queue_size,
&ompi_mtl_portals4.send_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.recv_queue_size,
&ompi_mtl_portals4.recv_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* Create send and long message (read) portal table entries */
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
PTL_PT_ONLY_USE_ONCE |
PTL_PT_ONLY_TRUNCATE |
PTL_PT_FLOWCTRL,
ompi_mtl_portals4.recv_eq_h,
REQ_RECV_TABLE_ID,
&ompi_mtl_portals4.recv_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
PTL_PT_ONLY_USE_ONCE |
PTL_PT_ONLY_TRUNCATE,
ompi_mtl_portals4.send_eq_h,
REQ_READ_TABLE_ID,
&ompi_mtl_portals4.read_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* bind zero-length md for sending acks */
md.start = NULL;
md.length = 0;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.zero_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
to have a single MD across all of memory */
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == ompi_mtl_portals4.send_md_hs) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: Error allocating MD array",
__FILE__, __LINE__);
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"Binding md from %p of length %lx",
md.start, md.length);
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_hs[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = 0;
md.eq_handle = ompi_mtl_portals4.send_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ompi_mtl_portals4.send_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
/* Handle long overflows */
me.start = NULL;
me.length = 0;
me.ct_handle = PTL_CT_NONE;
me.min_free = 0;
me.uid = ompi_mtl_portals4.uid;
me.options = PTL_ME_OP_PUT |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_COMM_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = MTL_PORTALS4_LONG_MSG;
me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
MTL_PORTALS4_SOURCE_MASK |
MTL_PORTALS4_TAG_MASK;
ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.recv_idx,
&me,
PTL_OVERFLOW_LIST,
NULL,
&ompi_mtl_portals4.long_overflow_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* attach short unex recv blocks */
ret = ompi_mtl_portals4_recv_short_init();
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: short receive block initialization failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
ompi_mtl_portals4.opcount = 0;
#if OPAL_ENABLE_DEBUG
ompi_mtl_portals4.recv_opcount = 0;
#endif
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
ret = ompi_mtl_portals4_flowctl_init();
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
/* activate progress callback */
ret = opal_progress_register(ompi_mtl_portals4_progress);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: opal_progress_register failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
return &ompi_mtl_portals4.base;
error:
if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
}
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
if (NULL != ompi_mtl_portals4.send_md_hs) {
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
for (i = 0 ; i < num_mds ; ++i) {
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
}
}
free(ompi_mtl_portals4.send_md_hs);
}
#else
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
PtlMDRelease(ompi_mtl_portals4.send_md_h);
}
#endif
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
}
if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
PtlEQFree(ompi_mtl_portals4.send_eq_h);
}
if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
PtlEQFree(ompi_mtl_portals4.recv_eq_h);
}
return NULL;
}

Просмотреть файл

@ -85,8 +85,12 @@ ompi_mtl_portals4_flowctl_init(void)
me.length = 0;
me.min_free = 0;
me.uid = ompi_mtl_portals4.uid;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.use_logical) {
me.match_id.rank = PTL_RANK_ANY;
} else {
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
}
me.ignore_bits = 0;
me.options = PTL_ME_OP_PUT |
@ -245,24 +249,35 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me,
ompi_mtl_portals4.flowctl.epoch_counter = 0;
ompi_mtl_portals4.flowctl.num_procs = npeers;
ompi_mtl_portals4.flowctl.root =
*((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
if (0 == me) {
ompi_mtl_portals4.flowctl.i_am_root = true;
} else {
ompi_mtl_portals4.flowctl.i_am_root = false;
ompi_mtl_portals4.flowctl.parent =
*((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
if (0 == me) ompi_mtl_portals4.flowctl.i_am_root = true;
else ompi_mtl_portals4.flowctl.i_am_root = false;
if (ompi_mtl_portals4.use_logical) {
ompi_mtl_portals4.flowctl.root.rank = 0;
if (false == ompi_mtl_portals4.flowctl.i_am_root) {
ompi_mtl_portals4.flowctl.parent.rank = (me - 1) / 2;
}
ompi_mtl_portals4.flowctl.me.rank = me;
}
else {
ompi_mtl_portals4.flowctl.root =
*((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
if (false == ompi_mtl_portals4.flowctl.i_am_root) {
ompi_mtl_portals4.flowctl.parent =
*((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
ompi_mtl_portals4.flowctl.me =
*((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
ompi_mtl_portals4.flowctl.me =
*((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
for (i = 0 ; i < 2 ; ++i) {
size_t tmp = (2 * me) + i + 1;
if (tmp < npeers) {
ompi_mtl_portals4.flowctl.num_children++;
ompi_mtl_portals4.flowctl.children[i] =
*((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
if (ompi_mtl_portals4.use_logical)
ompi_mtl_portals4.flowctl.children[i].rank = tmp;
else ompi_mtl_portals4.flowctl.children[i] =
*((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
}

Просмотреть файл

@ -26,7 +26,7 @@ struct ompi_mtl_portals4_pending_request_t {
int tag;
int my_rank;
int fc_notified;
ptl_process_t *proc;
ptl_process_t ptl_proc;
struct ompi_mtl_portals4_isend_request_t *ptl_request;
};
typedef struct ompi_mtl_portals4_pending_request_t ompi_mtl_portals4_pending_request_t;

Просмотреть файл

@ -68,8 +68,14 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl,
int ret;
if (MPI_ANY_SOURCE == src) {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.use_logical) {
remote_proc.rank = PTL_RANK_ANY;
} else {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
}
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
@ -140,8 +146,14 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl,
__FILE__, __LINE__, comm->c_contextid, src, tag);
if (MPI_ANY_SOURCE == src) {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.use_logical) {
remote_proc.rank = PTL_RANK_ANY;
} else {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
}
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);

Просмотреть файл

@ -357,8 +357,14 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_me_t me;
if (MPI_ANY_SOURCE == src) {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.use_logical) {
remote_proc.rank = PTL_RANK_ANY;
} else {
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
}
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);

Просмотреть файл

@ -123,8 +123,12 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_MANAGE_LOCAL |
PTL_ME_MAY_ALIGN;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.use_logical) {
me.match_id.rank = PTL_RANK_ANY;
} else {
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
}
me.match_bits = match_bits;
me.ignore_bits = ignore_bits;

Просмотреть файл

@ -177,7 +177,7 @@ static inline int
ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
void *start, int length, int contextid, int tag,
int localrank,
ptl_process_t *proc,
ptl_process_t ptl_proc,
ompi_mtl_portals4_isend_request_t *ptl_request)
{
int ret;
@ -204,7 +204,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
PTL_ME_USE_ONCE |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id = *proc;
me.match_id = ptl_proc;
me.match_bits = hdr_data;
me.ignore_bits = 0;
@ -244,7 +244,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
(ptl_size_t) ((char*) start - (char*) base),
length,
PTL_ACK_REQ,
*proc,
ptl_proc,
ompi_mtl_portals4.recv_idx,
match_bits,
0,
@ -266,7 +266,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
static inline int
ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
int localrank,
ptl_process_t *proc,
ptl_process_t ptl_proc,
ompi_mtl_portals4_isend_request_t *ptl_request)
{
int ret;
@ -292,7 +292,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
PTL_ME_USE_ONCE |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id = *proc;
me.match_id = ptl_proc;
me.match_bits = hdr_data;
me.ignore_bits = 0;
@ -322,7 +322,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
(ptl_size_t) ((char*) start - (char*) base),
put_length,
PTL_ACK_REQ,
*proc,
ptl_proc,
ompi_mtl_portals4.recv_idx,
match_bits,
0,
@ -370,7 +370,7 @@ ompi_mtl_portals4_pending_list_progress()
pending->contextid,
pending->tag,
pending->my_rank,
pending->proc,
pending->ptl_proc,
pending->ptl_request);
} else {
ret = ompi_mtl_portals4_long_isend(pending->start,
@ -378,7 +378,7 @@ ompi_mtl_portals4_pending_list_progress()
pending->contextid,
pending->tag,
pending->my_rank,
pending->proc,
pending->ptl_proc,
pending->ptl_request);
}
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -404,13 +404,19 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
void *start;
size_t length;
bool free_after;
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
ptl_process_t *proc = (ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
ptl_process_t ptl_proc;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
opal_free_list_item_t *item;
ompi_mtl_portals4_pending_request_t *pending;
#endif
if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
ptl_proc.rank = dest;
} else {
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) return ret;
@ -421,8 +427,8 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Send %lu to %x,%x of length %d\n",
ptl_request->opcount,
proc->phys.nid,
proc->phys.pid,
ptl_proc.phys.nid,
ptl_proc.phys.pid,
(int)length));
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
@ -438,7 +444,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
pending->tag = tag;
pending->my_rank = comm->c_my_rank;
pending->fc_notified = 0;
pending->proc = proc;
pending->ptl_proc = ptl_proc;
pending->ptl_request = ptl_request;
if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
@ -470,7 +476,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
comm->c_contextid,
tag,
comm->c_my_rank,
proc,
ptl_proc,
ptl_request);
} else {
ret = ompi_mtl_portals4_long_isend(start,
@ -478,7 +484,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
comm->c_contextid,
tag,
comm->c_my_rank,
proc,
ptl_proc,
ptl_request);
}