diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index 6628e5f694..944ed0b52e 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -56,6 +56,237 @@ mca_mtl_portals4_module_t ompi_mtl_portals4 = { } }; +static int +portals4_init_interface(void) +{ + unsigned int ret; + ptl_md_t md; + ptl_me_t me; + + /* create event queues */ + ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, + ompi_mtl_portals4.send_queue_size, + &ompi_mtl_portals4.send_eq_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlEQAlloc failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, + ompi_mtl_portals4.recv_queue_size, + &ompi_mtl_portals4.recv_eq_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlEQAlloc failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + /* Create send and long message (read) portal table entries */ + ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, + PTL_PT_ONLY_USE_ONCE | + PTL_PT_ONLY_TRUNCATE | + PTL_PT_FLOWCTRL, + ompi_mtl_portals4.recv_eq_h, + REQ_RECV_TABLE_ID, + &ompi_mtl_portals4.recv_idx); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlPTAlloc failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, + PTL_PT_ONLY_USE_ONCE | + PTL_PT_ONLY_TRUNCATE, + ompi_mtl_portals4.send_eq_h, + REQ_READ_TABLE_ID, + &ompi_mtl_portals4.read_idx); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlPTAlloc failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + /* bind zero-length md for sending acks */ + md.start = NULL; + md.length = 0; + md.options = 0; + md.eq_handle = PTL_EQ_NONE; + md.ct_handle = PTL_CT_NONE; + + ret = PtlMDBind(ompi_mtl_portals4.ni_h, + &md, + &ompi_mtl_portals4.zero_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlMDBind failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + /* Bind MD/MDs across all memory. We prefer (for obvious reasons) + to have a single MD across all of memory */ +#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE + { + int i; + int num_mds = ompi_mtl_portals4_get_num_mds(); + ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1; + ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2; + + ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds); + if (NULL == ompi_mtl_portals4.send_md_hs) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: Error allocating MD array", + __FILE__, __LINE__); + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + for (i = 0 ; i < num_mds ; ++i) { + ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE; + } + + for (i = 0 ; i < num_mds ; ++i) { + md.start = (char*) (offset_unit * i); + md.length = (i - 1 == num_mds) ? size / 2 : size; + md.options = 0; + md.eq_handle = ompi_mtl_portals4.send_eq_h; + md.ct_handle = PTL_CT_NONE; + + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "Binding md from %p of length %lx", + md.start, md.length); + + ret = PtlMDBind(ompi_mtl_portals4.ni_h, + &md, + &ompi_mtl_portals4.send_md_hs[i]); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlMDBind failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + } + } +#else + md.start = 0; + md.length = PTL_SIZE_MAX; + md.options = 0; + md.eq_handle = ompi_mtl_portals4.send_eq_h; + md.ct_handle = PTL_CT_NONE; + + ret = PtlMDBind(ompi_mtl_portals4.ni_h, + &md, + &ompi_mtl_portals4.send_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlMDBind failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } +#endif + + /* Handle long overflows */ + me.start = NULL; + me.length = 0; + me.ct_handle = PTL_CT_NONE; + me.min_free = 0; + me.uid = ompi_mtl_portals4.uid; + me.options = PTL_ME_OP_PUT | + PTL_ME_EVENT_LINK_DISABLE | + PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_EVENT_UNLINK_DISABLE; + if (ompi_mtl_portals4.use_logical) { + me.match_id.rank = PTL_RANK_ANY; + } else { + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + } + me.match_bits = MTL_PORTALS4_LONG_MSG; + me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | + MTL_PORTALS4_SOURCE_MASK | + MTL_PORTALS4_TAG_MASK; + ret = PtlMEAppend(ompi_mtl_portals4.ni_h, + ompi_mtl_portals4.recv_idx, + &me, + PTL_OVERFLOW_LIST, + NULL, + &ompi_mtl_portals4.long_overflow_me_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlMEAppend failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + /* attach short unex recv blocks */ + ret = ompi_mtl_portals4_recv_short_init(); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: short receive block initialization failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + ompi_mtl_portals4.opcount = 0; +#if OPAL_ENABLE_DEBUG + ompi_mtl_portals4.recv_opcount = 0; +#endif + +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + ret = ompi_mtl_portals4_flowctl_init(); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } +#endif + + return OMPI_SUCCESS; + + error: + if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { + PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.zero_md_h); + } +#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE + if (NULL != ompi_mtl_portals4.send_md_hs) { + int i; + int num_mds = ompi_mtl_portals4_get_num_mds(); + + for (i = 0 ; i < num_mds ; ++i) { + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]); + } + } + + free(ompi_mtl_portals4.send_md_hs); + } +#else + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.send_md_h); + } +#endif + if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); + } + if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.send_eq_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.recv_eq_h); + } + return OMPI_ERROR; +} int ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, @@ -65,6 +296,17 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, int ret, me; size_t i; bool new_found = false; + ptl_process_t *maptable; + + if (ompi_mtl_portals4.use_logical) { + maptable = malloc(sizeof(ptl_process_t) * nprocs); + if (NULL == maptable) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: malloc failed\n", + __FILE__, __LINE__); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } /* Get the list of ptl_process_id_t from the runtime and copy into structure */ for (i = 0 ; i < nprocs ; ++i) { @@ -108,14 +350,34 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } - *peer_id = *modex_id; + if (ompi_mtl_portals4.use_logical) { + peer_id->rank = i; + maptable[i].phys.pid = modex_id->phys.pid; + maptable[i].phys.nid = modex_id->phys.nid; + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "logical: global rank=%d pid=%d nid=%d\n", + (int)i, maptable[i].phys.pid, maptable[i].phys.nid); + } else { + *peer_id = *modex_id; + } + procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id; new_found = true; } else { ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; - if (proc->phys.nid != modex_id->phys.nid || - proc->phys.pid != modex_id->phys.pid) { + if (ompi_mtl_portals4.use_logical) { + if ((size_t)proc->rank != i) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: existing peer and rank don't match\n", + __FILE__, __LINE__); + return OMPI_ERROR; + } + maptable[i].phys.pid = modex_id->phys.pid; + maptable[i].phys.nid = modex_id->phys.nid; + } + else if (proc->phys.nid != modex_id->phys.nid || + proc->phys.pid != modex_id->phys.pid) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and modex peer don't match\n", __FILE__, __LINE__); @@ -124,6 +386,30 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, } } + if (ompi_mtl_portals4.use_logical) { + ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: logical mapping failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "logical mapping OK\n"); + free(maptable); + } + + portals4_init_interface(); + + /* activate progress callback */ + ret = opal_progress_register(ompi_mtl_portals4_progress); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: opal_progress_register failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + #if OMPI_MTL_PORTALS4_FLOW_CONTROL if (new_found) { ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs); diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index fd618dff7c..e57f6da097 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -38,6 +38,9 @@ struct mca_mtl_portals4_send_request_t; struct mca_mtl_portals4_module_t { mca_mtl_base_module_t base; + /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ + int use_logical; + /** Eager limit; messages greater than this use a rendezvous protocol */ unsigned long long eager_limit; /** Size of short message blocks */ diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 3544f135ab..a9d2e57315 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -31,6 +31,7 @@ #include "mtl_portals4_request.h" #include "mtl_portals4_recv_short.h" #include "mtl_portals4_message.h" +#include "ompi/runtime/mpiruntime.h" static int param_priority; @@ -80,6 +81,18 @@ ompi_mtl_portals4_component_register(void) mca_base_var_enum_t *new_enum; int ret; + ompi_mtl_portals4.use_logical = 0; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "use_logical", + "Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false)", + MCA_BASE_VAR_TYPE_INT, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.use_logical); + param_priority = 10; (void) mca_base_component_var_register (&mca_mtl_portals4_component.mtl_version, "priority", "Priority of the Portals4 MTL component", @@ -253,8 +266,12 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, { int ret; ptl_process_t id; - ptl_md_t md; - ptl_me_t me; + + if (enable_mpi_threads && ompi_mpi_thread_multiple) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "mtl portals4 is initialized for threads"); + } + /* Initialize Portals and create a physical, matching interface */ ret = PtlInit(); @@ -265,7 +282,14 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, return NULL; } - ret = PtlNIInit(PTL_IFACE_DEFAULT, + if (ompi_mtl_portals4.use_logical) + ret = PtlNIInit(PTL_IFACE_DEFAULT, + PTL_NI_LOGICAL | PTL_NI_MATCHING, + PTL_PID_ANY, + NULL, + NULL, + &ompi_mtl_portals4.ni_h); + else ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, @@ -287,10 +311,10 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, } /* Publish our NID/PID in the modex */ - ret = PtlGetId(ompi_mtl_portals4.ni_h, &id); + ret = PtlGetPhysId(ompi_mtl_portals4.ni_h, &id); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlGetId failed: %d\n", + "%s:%d: PtlGetPhysId failed: %d\n", __FILE__, __LINE__, ret); goto error; } @@ -309,233 +333,9 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, "My nid,pid = %x,%x", id.phys.nid, id.phys.pid)); - /* create event queues */ - ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, - ompi_mtl_portals4.send_queue_size, - &ompi_mtl_portals4.send_eq_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlEQAlloc failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, - ompi_mtl_portals4.recv_queue_size, - &ompi_mtl_portals4.recv_eq_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlEQAlloc failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - /* Create send and long message (read) portal table entries */ - ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, - PTL_PT_ONLY_USE_ONCE | - PTL_PT_ONLY_TRUNCATE | - PTL_PT_FLOWCTRL, - ompi_mtl_portals4.recv_eq_h, - REQ_RECV_TABLE_ID, - &ompi_mtl_portals4.recv_idx); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlPTAlloc failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, - PTL_PT_ONLY_USE_ONCE | - PTL_PT_ONLY_TRUNCATE, - ompi_mtl_portals4.send_eq_h, - REQ_READ_TABLE_ID, - &ompi_mtl_portals4.read_idx); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlPTAlloc failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - /* bind zero-length md for sending acks */ - md.start = NULL; - md.length = 0; - md.options = 0; - md.eq_handle = PTL_EQ_NONE; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &ompi_mtl_portals4.zero_md_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - /* Bind MD/MDs across all memory. We prefer (for obvious reasons) - to have a single MD across all of memory */ -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - ptl_size_t size = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) - 1; - ptl_size_t offset_unit = (1ULL << OPAL_PORTALS4_MAX_MD_SIZE) / 2; - - ompi_mtl_portals4.send_md_hs = malloc(sizeof(ptl_handle_md_t) * num_mds); - if (NULL == ompi_mtl_portals4.send_md_hs) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: Error allocating MD array", - __FILE__, __LINE__); - ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; - goto error; - } - - for (i = 0 ; i < num_mds ; ++i) { - ompi_mtl_portals4.send_md_hs[i] = PTL_INVALID_HANDLE; - } - - for (i = 0 ; i < num_mds ; ++i) { - md.start = (char*) (offset_unit * i); - md.length = (i - 1 == num_mds) ? size / 2 : size; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; - - opal_output_verbose(50, ompi_mtl_base_framework.framework_output, - "Binding md from %p of length %lx", - md.start, md.length); - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &ompi_mtl_portals4.send_md_hs[i]); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - } - } -#else - md.start = 0; - md.length = PTL_SIZE_MAX; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &ompi_mtl_portals4.send_md_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } -#endif - - /* Handle long overflows */ - me.start = NULL; - me.length = 0; - me.ct_handle = PTL_CT_NONE; - me.min_free = 0; - me.uid = ompi_mtl_portals4.uid; - me.options = PTL_ME_OP_PUT | - PTL_ME_EVENT_LINK_DISABLE | - PTL_ME_EVENT_COMM_DISABLE | - PTL_ME_EVENT_UNLINK_DISABLE; - me.match_id.phys.nid = PTL_NID_ANY; - me.match_id.phys.pid = PTL_PID_ANY; - me.match_bits = MTL_PORTALS4_LONG_MSG; - me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | - MTL_PORTALS4_SOURCE_MASK | - MTL_PORTALS4_TAG_MASK; - ret = PtlMEAppend(ompi_mtl_portals4.ni_h, - ompi_mtl_portals4.recv_idx, - &me, - PTL_OVERFLOW_LIST, - NULL, - &ompi_mtl_portals4.long_overflow_me_h); - if (PTL_OK != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMEAppend failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - /* attach short unex recv blocks */ - ret = ompi_mtl_portals4_recv_short_init(); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: short receive block initialization failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - - ompi_mtl_portals4.opcount = 0; -#if OPAL_ENABLE_DEBUG - ompi_mtl_portals4.recv_opcount = 0; -#endif - -#if OMPI_MTL_PORTALS4_FLOW_CONTROL - ret = ompi_mtl_portals4_flowctl_init(); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } -#endif - - /* activate progress callback */ - ret = opal_progress_register(ompi_mtl_portals4_progress); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: opal_progress_register failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - return &ompi_mtl_portals4.base; error: - if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { - PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); - } - if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { - PtlMDRelease(ompi_mtl_portals4.zero_md_h); - } -#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE - if (NULL != ompi_mtl_portals4.send_md_hs) { - int i; - int num_mds = ompi_mtl_portals4_get_num_mds(); - - for (i = 0 ; i < num_mds ; ++i) { - if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_hs[i], PTL_INVALID_HANDLE)) { - PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]); - } - } - - free(ompi_mtl_portals4.send_md_hs); - } -#else - if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { - PtlMDRelease(ompi_mtl_portals4.send_md_h); - } -#endif - if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); - } - if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); - } - if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { - PtlEQFree(ompi_mtl_portals4.send_eq_h); - } - if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { - PtlEQFree(ompi_mtl_portals4.recv_eq_h); - } return NULL; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index 9d6663ff19..b66524e81a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -85,8 +85,12 @@ ompi_mtl_portals4_flowctl_init(void) me.length = 0; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; - me.match_id.phys.nid = PTL_NID_ANY; - me.match_id.phys.pid = PTL_PID_ANY; + if (ompi_mtl_portals4.use_logical) { + me.match_id.rank = PTL_RANK_ANY; + } else { + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + } me.ignore_bits = 0; me.options = PTL_ME_OP_PUT | @@ -245,24 +249,35 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, ompi_mtl_portals4.flowctl.epoch_counter = 0; ompi_mtl_portals4.flowctl.num_procs = npeers; - ompi_mtl_portals4.flowctl.root = - *((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); - if (0 == me) { - ompi_mtl_portals4.flowctl.i_am_root = true; - } else { - ompi_mtl_portals4.flowctl.i_am_root = false; - ompi_mtl_portals4.flowctl.parent = - *((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + if (0 == me) ompi_mtl_portals4.flowctl.i_am_root = true; + else ompi_mtl_portals4.flowctl.i_am_root = false; + + if (ompi_mtl_portals4.use_logical) { + ompi_mtl_portals4.flowctl.root.rank = 0; + if (false == ompi_mtl_portals4.flowctl.i_am_root) { + ompi_mtl_portals4.flowctl.parent.rank = (me - 1) / 2; + } + ompi_mtl_portals4.flowctl.me.rank = me; + } + else { + ompi_mtl_portals4.flowctl.root = + *((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + if (false == ompi_mtl_portals4.flowctl.i_am_root) { + ompi_mtl_portals4.flowctl.parent = + *((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + } + ompi_mtl_portals4.flowctl.me = + *((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); } - ompi_mtl_portals4.flowctl.me = - *((ptl_process_t*) procs[me]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); for (i = 0 ; i < 2 ; ++i) { size_t tmp = (2 * me) + i + 1; if (tmp < npeers) { ompi_mtl_portals4.flowctl.num_children++; - ompi_mtl_portals4.flowctl.children[i] = - *((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + if (ompi_mtl_portals4.use_logical) + ompi_mtl_portals4.flowctl.children[i].rank = tmp; + else ompi_mtl_portals4.flowctl.children[i] = + *((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); } } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h index 69ffd6fc61..102659a8a2 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h @@ -26,7 +26,7 @@ struct ompi_mtl_portals4_pending_request_t { int tag; int my_rank; int fc_notified; - ptl_process_t *proc; + ptl_process_t ptl_proc; struct ompi_mtl_portals4_isend_request_t *ptl_request; }; typedef struct ompi_mtl_portals4_pending_request_t ompi_mtl_portals4_pending_request_t; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index 5760c80e6e..ee761237c2 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -68,8 +68,14 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl, int ret; if (MPI_ANY_SOURCE == src) { - remote_proc.phys.nid = PTL_NID_ANY; - remote_proc.phys.pid = PTL_PID_ANY; + if (ompi_mtl_portals4.use_logical) { + remote_proc.rank = PTL_RANK_ANY; + } else { + remote_proc.phys.nid = PTL_NID_ANY; + remote_proc.phys.pid = PTL_PID_ANY; + } + } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { + remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); @@ -140,8 +146,14 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl, __FILE__, __LINE__, comm->c_contextid, src, tag); if (MPI_ANY_SOURCE == src) { - remote_proc.phys.nid = PTL_NID_ANY; - remote_proc.phys.pid = PTL_PID_ANY; + if (ompi_mtl_portals4.use_logical) { + remote_proc.rank = PTL_RANK_ANY; + } else { + remote_proc.phys.nid = PTL_NID_ANY; + remote_proc.phys.pid = PTL_PID_ANY; + } + } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { + remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index fcb0eb3cdb..a95175348e 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -77,6 +77,7 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); + PtlMDRelease(request->md_h); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -311,7 +312,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ptl_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); - PtlMDRelease(ptl_request->md_h); goto callback_error; } } @@ -357,8 +357,14 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_me_t me; if (MPI_ANY_SOURCE == src) { - remote_proc.phys.nid = PTL_NID_ANY; - remote_proc.phys.pid = PTL_PID_ANY; + if (ompi_mtl_portals4.use_logical) { + remote_proc.rank = PTL_RANK_ANY; + } else { + remote_proc.phys.nid = PTL_NID_ANY; + remote_proc.phys.pid = PTL_PID_ANY; + } + } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { + remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c index 12a0839d16..49d1186ade 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c @@ -123,8 +123,12 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) PTL_ME_EVENT_LINK_DISABLE | PTL_ME_MANAGE_LOCAL | PTL_ME_MAY_ALIGN; - me.match_id.phys.nid = PTL_NID_ANY; - me.match_id.phys.pid = PTL_PID_ANY; + if (ompi_mtl_portals4.use_logical) { + me.match_id.rank = PTL_RANK_ANY; + } else { + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + } me.match_bits = match_bits; me.ignore_bits = ignore_bits; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 42484a0495..345eaaa8b8 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -177,7 +177,7 @@ static inline int ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int length, int contextid, int tag, int localrank, - ptl_process_t *proc, + ptl_process_t ptl_proc, ompi_mtl_portals4_isend_request_t *ptl_request) { int ret; @@ -204,7 +204,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, PTL_ME_USE_ONCE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; - me.match_id = *proc; + me.match_id = ptl_proc; me.match_bits = hdr_data; me.ignore_bits = 0; @@ -244,7 +244,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, (ptl_size_t) ((char*) start - (char*) base), length, PTL_ACK_REQ, - *proc, + ptl_proc, ompi_mtl_portals4.recv_idx, match_bits, 0, @@ -266,7 +266,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, static inline int ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag, int localrank, - ptl_process_t *proc, + ptl_process_t ptl_proc, ompi_mtl_portals4_isend_request_t *ptl_request) { int ret; @@ -292,7 +292,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag, PTL_ME_USE_ONCE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; - me.match_id = *proc; + me.match_id = ptl_proc; me.match_bits = hdr_data; me.ignore_bits = 0; @@ -322,7 +322,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag, (ptl_size_t) ((char*) start - (char*) base), put_length, PTL_ACK_REQ, - *proc, + ptl_proc, ompi_mtl_portals4.recv_idx, match_bits, 0, @@ -370,7 +370,7 @@ ompi_mtl_portals4_pending_list_progress() pending->contextid, pending->tag, pending->my_rank, - pending->proc, + pending->ptl_proc, pending->ptl_request); } else { ret = ompi_mtl_portals4_long_isend(pending->start, @@ -378,7 +378,7 @@ ompi_mtl_portals4_pending_list_progress() pending->contextid, pending->tag, pending->my_rank, - pending->proc, + pending->ptl_proc, pending->ptl_request); } if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -404,13 +404,19 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, void *start; size_t length; bool free_after; - ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); - ptl_process_t *proc = (ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; + ptl_process_t ptl_proc; #if OMPI_MTL_PORTALS4_FLOW_CONTROL opal_free_list_item_t *item; ompi_mtl_portals4_pending_request_t *pending; #endif + if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { + ptl_proc.rank = dest; + } else { + ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); + ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + } + ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; @@ -421,8 +427,8 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu to %x,%x of length %d\n", ptl_request->opcount, - proc->phys.nid, - proc->phys.pid, + ptl_proc.phys.nid, + ptl_proc.phys.pid, (int)length)); #if OMPI_MTL_PORTALS4_FLOW_CONTROL @@ -438,7 +444,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, pending->tag = tag; pending->my_rank = comm->c_my_rank; pending->fc_notified = 0; - pending->proc = proc; + pending->ptl_proc = ptl_proc; pending->ptl_request = ptl_request; if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { @@ -470,7 +476,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, comm->c_contextid, tag, comm->c_my_rank, - proc, + ptl_proc, ptl_request); } else { ret = ompi_mtl_portals4_long_isend(start, @@ -478,7 +484,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, comm->c_contextid, tag, comm->c_my_rank, - proc, + ptl_proc, ptl_request); }