mtl-portals4: add the option to use the Portals4 logical to physical table
This commit adds an MCA variable to select Portals4 logical addressing, populates the logical-to-physical mapping table and initializes the NI in this mode.
Этот коммит содержится в:
родитель
5ea1f1c12b
Коммит
35e5ffd001
@ -56,6 +56,235 @@ mca_mtl_portals4_module_t ompi_mtl_portals4 = {
|
||||
}
|
||||
};
|
||||
|
||||
static int
|
||||
portals4_init_interface(void)
|
||||
{
|
||||
unsigned int ret;
|
||||
ptl_md_t md;
|
||||
ptl_me_t me;
|
||||
|
||||
/* create event queues */
|
||||
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.send_queue_size,
|
||||
&ompi_mtl_portals4.send_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlEQAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.recv_queue_size,
|
||||
&ompi_mtl_portals4.recv_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlEQAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Create send and long message (read) portal table entries */
|
||||
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE |
|
||||
PTL_PT_FLOWCTRL,
|
||||
ompi_mtl_portals4.recv_eq_h,
|
||||
REQ_RECV_TABLE_ID,
|
||||
&ompi_mtl_portals4.recv_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE,
|
||||
ompi_mtl_portals4.send_eq_h,
|
||||
REQ_READ_TABLE_ID,
|
||||
&ompi_mtl_portals4.read_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* bind zero-length md for sending acks */
|
||||
md.start = NULL;
|
||||
md.length = 0;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.zero_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == ompi_mtl_portals4.send_md_hs) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
me.length = 0;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = ompi_mtl_portals4.uid;
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
me.match_id.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
me.match_bits = MTL_PORTALS4_LONG_MSG;
|
||||
me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
|
||||
MTL_PORTALS4_SOURCE_MASK |
|
||||
MTL_PORTALS4_TAG_MASK;
|
||||
ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
&me,
|
||||
PTL_OVERFLOW_LIST,
|
||||
NULL,
|
||||
&ompi_mtl_portals4.long_overflow_me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* attach short unex recv blocks */
|
||||
ret = ompi_mtl_portals4_recv_short_init();
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: short receive block initialization failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ompi_mtl_portals4.opcount = 0;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
ompi_mtl_portals4.recv_opcount = 0;
|
||||
#endif
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
ret = ompi_mtl_portals4_flowctl_init();
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
error:
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
}
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != ompi_mtl_portals4.send_md_hs) {
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
}
|
||||
#endif
|
||||
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
}
|
||||
if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
|
||||
PtlEQFree(ompi_mtl_portals4.send_eq_h);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
|
||||
PtlEQFree(ompi_mtl_portals4.recv_eq_h);
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
int
|
||||
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
@ -65,6 +294,17 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
int ret, me;
|
||||
size_t i;
|
||||
bool new_found = false;
|
||||
ptl_process_t *maptable;
|
||||
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
maptable = malloc(sizeof(ptl_process_t) * nprocs);
|
||||
if (NULL == maptable) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: malloc failed\n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the list of ptl_process_id_t from the runtime and copy into structure */
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
@ -108,14 +348,34 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
__FILE__, __LINE__, ret);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
*peer_id = *modex_id;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
peer_id->rank = i;
|
||||
maptable[i].phys.pid = modex_id->phys.pid;
|
||||
maptable[i].phys.nid = modex_id->phys.nid;
|
||||
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
|
||||
"logical: global rank=%d pid=%d nid=%d\n",
|
||||
(int)i, maptable[i].phys.pid, maptable[i].phys.nid);
|
||||
} else {
|
||||
*peer_id = *modex_id;
|
||||
}
|
||||
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;
|
||||
|
||||
new_found = true;
|
||||
} else {
|
||||
ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
|
||||
if (proc->phys.nid != modex_id->phys.nid ||
|
||||
proc->phys.pid != modex_id->phys.pid) {
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
if ((size_t)proc->rank != i) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: existing peer and rank don't match\n",
|
||||
__FILE__, __LINE__);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
maptable[i].phys.pid = modex_id->phys.pid;
|
||||
maptable[i].phys.nid = modex_id->phys.nid;
|
||||
}
|
||||
else if (proc->phys.nid != modex_id->phys.nid ||
|
||||
proc->phys.pid != modex_id->phys.pid) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: existing peer and modex peer don't match\n",
|
||||
__FILE__, __LINE__);
|
||||
@ -124,6 +384,30 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
}
|
||||
}
|
||||
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: logical mapping failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ret;
|
||||
}
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"logical mapping OK\n");
|
||||
free(maptable);
|
||||
}
|
||||
|
||||
portals4_init_interface();
|
||||
|
||||
/* activate progress callback */
|
||||
ret = opal_progress_register(ompi_mtl_portals4_progress);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: opal_progress_register failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
if (new_found) {
|
||||
ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);
|
||||
|
@ -38,6 +38,9 @@ struct mca_mtl_portals4_send_request_t;
|
||||
struct mca_mtl_portals4_module_t {
|
||||
mca_mtl_base_module_t base;
|
||||
|
||||
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
|
||||
int use_logical;
|
||||
|
||||
/** Eager limit; messages greater than this use a rendezvous protocol */
|
||||
unsigned long long eager_limit;
|
||||
/** Size of short message blocks */
|
||||
|
@ -80,6 +80,18 @@ ompi_mtl_portals4_component_register(void)
|
||||
mca_base_var_enum_t *new_enum;
|
||||
int ret;
|
||||
|
||||
ompi_mtl_portals4.use_logical = 0;
|
||||
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||
"use_logical",
|
||||
"Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false)",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_portals4.use_logical);
|
||||
|
||||
param_priority = 10;
|
||||
(void) mca_base_component_var_register (&mca_mtl_portals4_component.mtl_version,
|
||||
"priority", "Priority of the Portals4 MTL component",
|
||||
@ -253,8 +265,6 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
{
|
||||
int ret;
|
||||
ptl_process_t id;
|
||||
ptl_md_t md;
|
||||
ptl_me_t me;
|
||||
|
||||
/* Initialize Portals and create a physical, matching interface */
|
||||
ret = PtlInit();
|
||||
@ -265,7 +275,14 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT,
|
||||
if (ompi_mtl_portals4.use_logical)
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT,
|
||||
PTL_NI_LOGICAL | PTL_NI_MATCHING,
|
||||
PTL_PID_ANY,
|
||||
NULL,
|
||||
NULL,
|
||||
&ompi_mtl_portals4.ni_h);
|
||||
else ret = PtlNIInit(PTL_IFACE_DEFAULT,
|
||||
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
|
||||
PTL_PID_ANY,
|
||||
NULL,
|
||||
@ -287,10 +304,10 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
}
|
||||
|
||||
/* Publish our NID/PID in the modex */
|
||||
ret = PtlGetId(ompi_mtl_portals4.ni_h, &id);
|
||||
ret = PtlGetPhysId(ompi_mtl_portals4.ni_h, &id);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlGetId failed: %d\n",
|
||||
"%s:%d: PtlGetPhysId failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
@ -309,233 +326,9 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
||||
"My nid,pid = %x,%x",
|
||||
id.phys.nid, id.phys.pid));
|
||||
|
||||
/* create event queues */
|
||||
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.send_queue_size,
|
||||
&ompi_mtl_portals4.send_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlEQAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.recv_queue_size,
|
||||
&ompi_mtl_portals4.recv_eq_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlEQAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Create send and long message (read) portal table entries */
|
||||
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE |
|
||||
PTL_PT_FLOWCTRL,
|
||||
ompi_mtl_portals4.recv_eq_h,
|
||||
REQ_RECV_TABLE_ID,
|
||||
&ompi_mtl_portals4.recv_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
|
||||
PTL_PT_ONLY_USE_ONCE |
|
||||
PTL_PT_ONLY_TRUNCATE,
|
||||
ompi_mtl_portals4.send_eq_h,
|
||||
REQ_READ_TABLE_ID,
|
||||
&ompi_mtl_portals4.read_idx);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlPTAlloc failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* bind zero-length md for sending acks */
|
||||
md.start = NULL;
|
||||
md.length = 0;
|
||||
md.options = 0;
|
||||
md.eq_handle = PTL_EQ_NONE;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.zero_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Bind MD/MDs across all memory. We prefer (for obvious reasons)
|
||||
to have a single MD across all of memory */
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
{
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1;
|
||||
ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2;
|
||||
|
||||
ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds);
|
||||
if (NULL == ompi_mtl_portals4.send_md_hs) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Error allocating MD array",
|
||||
__FILE__, __LINE__);
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
md.start = (char*) (offset_unit * i);
|
||||
md.length = (i - 1 == num_mds) ? size / 2 : size;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
|
||||
"Binding md from %p of length %lx",
|
||||
md.start, md.length);
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_hs[i]);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
md.start = 0;
|
||||
md.length = PTL_SIZE_MAX;
|
||||
md.options = 0;
|
||||
md.eq_handle = ompi_mtl_portals4.send_eq_h;
|
||||
md.ct_handle = PTL_CT_NONE;
|
||||
|
||||
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
|
||||
&md,
|
||||
&ompi_mtl_portals4.send_md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMDBind failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle long overflows */
|
||||
me.start = NULL;
|
||||
me.length = 0;
|
||||
me.ct_handle = PTL_CT_NONE;
|
||||
me.min_free = 0;
|
||||
me.uid = ompi_mtl_portals4.uid;
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_COMM_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
me.match_bits = MTL_PORTALS4_LONG_MSG;
|
||||
me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
|
||||
MTL_PORTALS4_SOURCE_MASK |
|
||||
MTL_PORTALS4_TAG_MASK;
|
||||
ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
&me,
|
||||
PTL_OVERFLOW_LIST,
|
||||
NULL,
|
||||
&ompi_mtl_portals4.long_overflow_me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: PtlMEAppend failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* attach short unex recv blocks */
|
||||
ret = ompi_mtl_portals4_recv_short_init();
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: short receive block initialization failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ompi_mtl_portals4.opcount = 0;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
ompi_mtl_portals4.recv_opcount = 0;
|
||||
#endif
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
ret = ompi_mtl_portals4_flowctl_init();
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* activate progress callback */
|
||||
ret = opal_progress_register(ompi_mtl_portals4_progress);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: opal_progress_register failed: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
return &ompi_mtl_portals4.base;
|
||||
|
||||
error:
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.zero_md_h);
|
||||
}
|
||||
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
|
||||
if (NULL != ompi_mtl_portals4.send_md_hs) {
|
||||
int i;
|
||||
int num_mds = ompi_mtl_portals4_get_num_mds();
|
||||
|
||||
for (i = 0 ; i < num_mds ; ++i) {
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(ompi_mtl_portals4.send_md_hs);
|
||||
}
|
||||
#else
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
|
||||
PtlMDRelease(ompi_mtl_portals4.send_md_h);
|
||||
}
|
||||
#endif
|
||||
if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
|
||||
}
|
||||
if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
|
||||
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
|
||||
PtlEQFree(ompi_mtl_portals4.send_eq_h);
|
||||
}
|
||||
if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
|
||||
PtlEQFree(ompi_mtl_portals4.recv_eq_h);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -85,8 +85,12 @@ ompi_mtl_portals4_flowctl_init(void)
|
||||
me.length = 0;
|
||||
me.min_free = 0;
|
||||
me.uid = ompi_mtl_portals4.uid;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
me.match_id.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
me.ignore_bits = 0;
|
||||
|
||||
me.options = PTL_ME_OP_PUT |
|
||||
@ -245,24 +249,35 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me,
|
||||
ompi_mtl_portals4.flowctl.epoch_counter = 0;
|
||||
|
||||
ompi_mtl_portals4.flowctl.num_procs = npeers;
|
||||
ompi_mtl_portals4.flowctl.root =
|
||||
*((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
if (0 == me) {
|
||||
ompi_mtl_portals4.flowctl.i_am_root = true;
|
||||
} else {
|
||||
ompi_mtl_portals4.flowctl.i_am_root = false;
|
||||
ompi_mtl_portals4.flowctl.parent =
|
||||
*((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
if (0 == me) ompi_mtl_portals4.flowctl.i_am_root = true;
|
||||
else ompi_mtl_portals4.flowctl.i_am_root = false;
|
||||
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
ompi_mtl_portals4.flowctl.root.rank = 0;
|
||||
if (false == ompi_mtl_portals4.flowctl.i_am_root) {
|
||||
ompi_mtl_portals4.flowctl.parent.rank = (me - 1) / 2;
|
||||
}
|
||||
ompi_mtl_portals4.flowctl.me.rank = me;
|
||||
}
|
||||
else {
|
||||
ompi_mtl_portals4.flowctl.root =
|
||||
*((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
if (false == ompi_mtl_portals4.flowctl.i_am_root) {
|
||||
ompi_mtl_portals4.flowctl.parent =
|
||||
*((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
}
|
||||
ompi_mtl_portals4.flowctl.me =
|
||||
*((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
}
|
||||
ompi_mtl_portals4.flowctl.me =
|
||||
*((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
|
||||
for (i = 0 ; i < 2 ; ++i) {
|
||||
size_t tmp = (2 * me) + i + 1;
|
||||
if (tmp < npeers) {
|
||||
ompi_mtl_portals4.flowctl.num_children++;
|
||||
ompi_mtl_portals4.flowctl.children[i] =
|
||||
*((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
if (ompi_mtl_portals4.use_logical)
|
||||
ompi_mtl_portals4.flowctl.children[i].rank = tmp;
|
||||
else ompi_mtl_portals4.flowctl.children[i] =
|
||||
*((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,7 +26,7 @@ struct ompi_mtl_portals4_pending_request_t {
|
||||
int tag;
|
||||
int my_rank;
|
||||
int fc_notified;
|
||||
ptl_process_t *proc;
|
||||
ptl_process_t ptl_proc;
|
||||
struct ompi_mtl_portals4_isend_request_t *ptl_request;
|
||||
};
|
||||
typedef struct ompi_mtl_portals4_pending_request_t ompi_mtl_portals4_pending_request_t;
|
||||
|
@ -68,8 +68,14 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
int ret;
|
||||
|
||||
if (MPI_ANY_SOURCE == src) {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
remote_proc.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
|
||||
remote_proc.rank = src;
|
||||
} else {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
@ -140,8 +146,14 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
__FILE__, __LINE__, comm->c_contextid, src, tag);
|
||||
|
||||
if (MPI_ANY_SOURCE == src) {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
remote_proc.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
|
||||
remote_proc.rank = src;
|
||||
} else {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
|
@ -357,8 +357,14 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
ptl_me_t me;
|
||||
|
||||
if (MPI_ANY_SOURCE == src) {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
remote_proc.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
} else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
|
||||
remote_proc.rank = src;
|
||||
} else {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
|
@ -123,8 +123,12 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_MANAGE_LOCAL |
|
||||
PTL_ME_MAY_ALIGN;
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.use_logical) {
|
||||
me.match_id.rank = PTL_RANK_ANY;
|
||||
} else {
|
||||
me.match_id.phys.nid = PTL_NID_ANY;
|
||||
me.match_id.phys.pid = PTL_PID_ANY;
|
||||
}
|
||||
me.match_bits = match_bits;
|
||||
me.ignore_bits = ignore_bits;
|
||||
|
||||
|
@ -177,7 +177,7 @@ static inline int
|
||||
ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
void *start, int length, int contextid, int tag,
|
||||
int localrank,
|
||||
ptl_process_t *proc,
|
||||
ptl_process_t ptl_proc,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request)
|
||||
{
|
||||
int ret;
|
||||
@ -204,7 +204,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
PTL_ME_USE_ONCE |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id = *proc;
|
||||
me.match_id = ptl_proc;
|
||||
me.match_bits = hdr_data;
|
||||
me.ignore_bits = 0;
|
||||
|
||||
@ -244,7 +244,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
length,
|
||||
PTL_ACK_REQ,
|
||||
*proc,
|
||||
ptl_proc,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
match_bits,
|
||||
0,
|
||||
@ -266,7 +266,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
||||
static inline int
|
||||
ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
int localrank,
|
||||
ptl_process_t *proc,
|
||||
ptl_process_t ptl_proc,
|
||||
ompi_mtl_portals4_isend_request_t *ptl_request)
|
||||
{
|
||||
int ret;
|
||||
@ -292,7 +292,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
PTL_ME_USE_ONCE |
|
||||
PTL_ME_EVENT_LINK_DISABLE |
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
me.match_id = *proc;
|
||||
me.match_id = ptl_proc;
|
||||
me.match_bits = hdr_data;
|
||||
me.ignore_bits = 0;
|
||||
|
||||
@ -322,7 +322,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
|
||||
(ptl_size_t) ((char*) start - (char*) base),
|
||||
put_length,
|
||||
PTL_ACK_REQ,
|
||||
*proc,
|
||||
ptl_proc,
|
||||
ompi_mtl_portals4.recv_idx,
|
||||
match_bits,
|
||||
0,
|
||||
@ -370,7 +370,7 @@ ompi_mtl_portals4_pending_list_progress()
|
||||
pending->contextid,
|
||||
pending->tag,
|
||||
pending->my_rank,
|
||||
pending->proc,
|
||||
pending->ptl_proc,
|
||||
pending->ptl_request);
|
||||
} else {
|
||||
ret = ompi_mtl_portals4_long_isend(pending->start,
|
||||
@ -378,7 +378,7 @@ ompi_mtl_portals4_pending_list_progress()
|
||||
pending->contextid,
|
||||
pending->tag,
|
||||
pending->my_rank,
|
||||
pending->proc,
|
||||
pending->ptl_proc,
|
||||
pending->ptl_request);
|
||||
}
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
@ -404,13 +404,19 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
void *start;
|
||||
size_t length;
|
||||
bool free_after;
|
||||
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
|
||||
ptl_process_t *proc = (ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
|
||||
ptl_process_t ptl_proc;
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
opal_free_list_item_t *item;
|
||||
ompi_mtl_portals4_pending_request_t *pending;
|
||||
#endif
|
||||
|
||||
if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
|
||||
ptl_proc.rank = dest;
|
||||
} else {
|
||||
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
|
||||
ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
}
|
||||
|
||||
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
|
||||
if (OMPI_SUCCESS != ret) return ret;
|
||||
|
||||
@ -421,8 +427,8 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||
"Send %lu to %x,%x of length %d\n",
|
||||
ptl_request->opcount,
|
||||
proc->phys.nid,
|
||||
proc->phys.pid,
|
||||
ptl_proc.phys.nid,
|
||||
ptl_proc.phys.pid,
|
||||
(int)length));
|
||||
|
||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||
@ -438,7 +444,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
pending->tag = tag;
|
||||
pending->my_rank = comm->c_my_rank;
|
||||
pending->fc_notified = 0;
|
||||
pending->proc = proc;
|
||||
pending->ptl_proc = ptl_proc;
|
||||
pending->ptl_request = ptl_request;
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||
@ -470,7 +476,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
comm->c_contextid,
|
||||
tag,
|
||||
comm->c_my_rank,
|
||||
proc,
|
||||
ptl_proc,
|
||||
ptl_request);
|
||||
} else {
|
||||
ret = ompi_mtl_portals4_long_isend(start,
|
||||
@ -478,7 +484,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
||||
comm->c_contextid,
|
||||
tag,
|
||||
comm->c_my_rank,
|
||||
proc,
|
||||
ptl_proc,
|
||||
ptl_request);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user