1
1

portals4: add support for dynamic add_procs() to all Portals4 components

In the default mode of operation, the Portals4 components support
dynamic add_procs().

The Portals4 components have two alternate modes (flow control and
logical-to-physical) that require knowledge of all procs at startup.
In these modes, mtl-portals4 sets the MCA_MTL_BASE_FLAG_REQUIRE_WORLD
flag and btl-portals4 sets the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag
to tell the PML that we need all the procs in one add_procs() call.
Этот коммит содержится в:
Todd Kordenbrock 2015-09-24 19:19:06 -05:00
родитель 3afac9e37d
Коммит 3e63a3458c
15 изменённых файлов: 488 добавлений и 171 удалений

Просмотреть файл

@ -29,6 +29,8 @@
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/mtl/portals4/mtl_portals4.h"
BEGIN_C_DECLS
#define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
@ -178,11 +180,7 @@ ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *re
static inline ptl_process_t
ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
{
ompi_proc_t *proc = ompi_comm_peer_lookup(comm, rank);
if (proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] == NULL) {
printf("ompi_coll_portals4_get_peer failure\n");
}
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
return ompi_mtl_portals4_get_peer(comm, rank);
}

Просмотреть файл

@ -22,6 +22,7 @@
#include <portals4.h>
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/mtl/mtl.h"
#include "opal/class/opal_list.h"
@ -241,45 +242,26 @@ portals4_init_interface(void)
return OMPI_ERROR;
}
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
size_t nprocs,
struct ompi_proc_t** procs)
static int
create_maptable(size_t nprocs,
ompi_proc_t **procs)
{
int ret, me;
int ret;
size_t i;
bool new_found = false;
ptl_process_t *maptable;
if (ompi_mtl_portals4.use_logical) {
maptable = malloc(sizeof(ptl_process_t) * nprocs);
if (NULL == maptable) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: malloc failed\n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
maptable = malloc(sizeof(ptl_process_t) * nprocs);
if (NULL == maptable) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: malloc failed\n",
__FILE__, __LINE__);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Get the list of ptl_process_id_t from the runtime and copy into structure */
for (i = 0 ; i < nprocs ; ++i) {
for (i=0;i<nprocs;i++) {
ptl_process_t *modex_id;
size_t size;
if( procs[i] == ompi_proc_local_proc ) {
me = i;
}
if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Portals 4 MTL does not support heterogeneous operations.");
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Proc %s architecture %x, mine %x.",
OMPI_NAME_PRINT(&procs[i]->super.proc_name),
procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch);
return OMPI_ERR_NOT_SUPPORTED;
}
OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
&procs[i]->super.proc_name, (uint8_t**)&modex_id, &size);
if (OMPI_SUCCESS != ret) {
@ -294,40 +276,159 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
return OMPI_ERR_BAD_PARAM;
}
if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
ptl_process_t *peer_id;
peer_id = malloc(sizeof(ptl_process_t));
if (NULL == peer_id) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: malloc failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (ompi_mtl_portals4.use_logical) {
peer_id->rank = i;
maptable[i].phys.pid = modex_id->phys.pid;
maptable[i].phys.nid = modex_id->phys.nid;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"logical: global rank=%d pid=%d nid=%d\n",
(int)i, maptable[i].phys.pid, maptable[i].phys.nid);
} else {
*peer_id = *modex_id;
}
maptable[i].phys.pid = modex_id->phys.pid;
maptable[i].phys.nid = modex_id->phys.nid;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"logical: global rank=%d pid=%d nid=%d\n",
(int)i, maptable[i].phys.pid, maptable[i].phys.nid);
}
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;
ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: logical mapping failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"logical mapping OK\n");
new_found = true;
free(maptable);
return OMPI_SUCCESS;
}
static int
create_endpoint(ompi_proc_t *proc)
{
ptl_process_t *endpoint;
endpoint = malloc(sizeof(ptl_process_t));
if (NULL == endpoint) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: malloc failed: %s\n",
__FILE__, __LINE__, strerror(errno));
return OMPI_ERR_OUT_OF_RESOURCE;
} else {
if (ompi_mtl_portals4.use_logical) {
endpoint->rank = proc->super.proc_name.vpid;
} else {
int ret;
ptl_process_t *modex_id;
size_t size;
OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
&proc->super.proc_name, (uint8_t**)&modex_id, &size);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_modex_recv failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
} else if (sizeof(ptl_process_t) != size) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_modex_recv failed (size mismatch): %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERR_BAD_PARAM;
}
*endpoint = *modex_id;
}
}
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = endpoint;
return OMPI_SUCCESS;
}
ompi_proc_t *
ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank)
{
int ret;
ompi_proc_t *proc = ompi_group_peer_lookup (group, rank);
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
ret = create_endpoint(proc);
if (OMPI_SUCCESS != ret) {
return NULL;
}
#if 0
} else {
/*
* sanity check
*/
int ret;
ptl_process_t *modex_id;
size_t size;
OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
&proc->super.proc_name, (uint8_t**)&modex_id, &size);
ptl_process_t *peer = (ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
if (ompi_mtl_portals4.use_logical) {
if ((size_t)peer->rank != proc->super.proc_name.vpid) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: existing peer and rank don't match\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
}
else if (peer->phys.nid != modex_id->phys.nid ||
peer->phys.pid != modex_id->phys.pid) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: existing peer and modex peer don't match\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
#endif
}
return proc;
}
static int
add_endpoints(size_t nprocs,
ompi_proc_t **procs)
{
int ret;
size_t i;
/* Get the list of ptl_process_id_t from the runtime and copy into structure */
for (i = 0 ; i < nprocs ; ++i) {
if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Portals 4 MTL does not support heterogeneous operations.");
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"Proc %s architecture %x, mine %x.",
OMPI_NAME_PRINT(&procs[i]->super.proc_name),
procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch);
return OMPI_ERR_NOT_SUPPORTED;
}
if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
ret = create_endpoint(procs[i]);
if (OMPI_SUCCESS != ret) {
return ret;
}
#if 0
} else {
/*
* sanity check
*/
int ret;
ptl_process_t *modex_id;
size_t size;
OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
&procs[i]->super.proc_name, (uint8_t**)&modex_id, &size);
ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
if (ompi_mtl_portals4.use_logical) {
if ((size_t)proc->rank != i) {
if ((size_t)proc->rank != procs[i]->super.proc_name.vpid) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: existing peer and rank don't match\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
maptable[i].phys.pid = modex_id->phys.pid;
maptable[i].phys.nid = modex_id->phys.nid;
}
else if (proc->phys.nid != modex_id->phys.nid ||
proc->phys.pid != modex_id->phys.pid) {
@ -336,45 +437,82 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
__FILE__, __LINE__);
return OMPI_ERROR;
}
#endif
}
}
if (ompi_mtl_portals4.use_logical) {
ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: logical mapping failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"logical mapping OK\n");
free(maptable);
}
return OMPI_SUCCESS;
}
portals4_init_interface();
#define NEED_ALL_PROCS (ompi_mtl_portals4.use_logical || ompi_mtl_portals4.use_flowctl)
/* activate progress callback */
ret = opal_progress_register(ompi_mtl_portals4_progress);
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
size_t nprocs,
struct ompi_proc_t** procs)
{
int ret;
/*
* The PML handed us a list of procs that need Portals4
* peer info. Complete those procs here.
*/
ret = add_endpoints(nprocs,
procs);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: opal_progress_register failed: %d\n",
"%s:%d: add_endpoints failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
if (1 == ompi_mtl_portals4.need_init) {
if (1 == ompi_mtl_portals4.use_logical) {
ret = create_maptable(nprocs, procs);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_mtl_portals4_add_procs::create_maptable() failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
}
/*
* This is the first time through here. Initialize
* Portals4 and register the progress thread.
*/
portals4_init_interface();
/* activate progress callback */
ret = opal_progress_register(ompi_mtl_portals4_progress);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: opal_progress_register failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
if (new_found) {
ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"add_procs() - me=%d\n", ompi_proc_local_proc->super.proc_name.vpid);
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"add_procs() - adding flowctl procs\n");
ret = ompi_mtl_portals4_flowctl_add_procs(ompi_proc_local_proc->super.proc_name.vpid,
nprocs,
procs);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: flowctl_add_procs failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
}
#endif
ompi_mtl_portals4.need_init = 0;
}
return OMPI_SUCCESS;
}
@ -386,6 +524,9 @@ ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl,
{
size_t i;
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"del_procs() - enter\n");
for (i = 0 ; i < nprocs ; ++i) {
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
free(procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
@ -393,6 +534,9 @@ ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl,
}
}
opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
"del_procs() - exit\n");
return OMPI_SUCCESS;
}

Просмотреть файл

@ -26,9 +26,12 @@
#include "opal/class/opal_free_list.h"
#include "opal/class/opal_list.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/communicator/communicator.h"
#include "mtl_portals4_flowctl.h"
BEGIN_C_DECLS
@ -38,8 +41,13 @@ struct mca_mtl_portals4_send_request_t;
struct mca_mtl_portals4_module_t {
mca_mtl_base_module_t base;
/* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */
int need_init;
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
int use_logical;
/* Use flow control: 1 (true) : 0 (false) */
int use_flowctl;
/** Eager limit; messages greater than this use a rendezvous protocol */
unsigned long long eager_limit;
@ -209,6 +217,29 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
/* mtl-portals4 helpers */
OMPI_DECLSPEC ompi_proc_t *
ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank);
static inline ptl_process_t
ompi_mtl_portals4_get_peer_group(struct ompi_group_t *group, int rank)
{
return *((ptl_process_t*)ompi_mtl_portals4_get_proc_group(group, rank));
}
static inline ompi_proc_t *
ompi_mtl_portals4_get_proc(struct ompi_communicator_t *comm, int rank)
{
return ompi_mtl_portals4_get_proc_group(comm->c_remote_group, rank);
}
static inline ptl_process_t
ompi_mtl_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
{
return *((ptl_process_t*)ompi_mtl_portals4_get_proc(comm, rank));
}
/* MTL interface functions */
extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl);

Просмотреть файл

@ -229,6 +229,14 @@ ompi_mtl_portals4_component_open(void)
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
ompi_mtl_portals4.need_init=1;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
ompi_mtl_portals4.use_flowctl=1;
#else
ompi_mtl_portals4.use_flowctl=0;
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -25,4 +25,14 @@ struct mca_mtl_base_endpoint_t {
};
typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t;
static inline mca_mtl_base_endpoint_t *
ompi_mtl_portals4_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4])) {
ompi_mtl_portals4_add_procs (mtl, 1, &ompi_proc);
}
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
}
#endif

Просмотреть файл

@ -22,6 +22,7 @@
#include "ompi/message/message.h"
#include "mtl_portals4.h"
#include "mtl_portals4_endpoint.h"
#include "mtl_portals4_request.h"
#include "mtl_portals4_message.h"
@ -78,7 +79,7 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl,
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
}
MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
@ -156,7 +157,7 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl,
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
}
MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,

Просмотреть файл

@ -29,6 +29,7 @@
#include "ompi/message/message.h"
#include "mtl_portals4.h"
#include "mtl_portals4_endpoint.h"
#include "mtl_portals4_request.h"
#include "mtl_portals4_recv_short.h"
#include "mtl_portals4_message.h"
@ -367,7 +368,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
remote_proc.rank = src;
} else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
}
MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,

Просмотреть файл

@ -28,6 +28,7 @@
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "mtl_portals4.h"
#include "mtl_portals4_endpoint.h"
#include "mtl_portals4_request.h"
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
#include "mtl_portals4_flowctl.h"
@ -405,7 +406,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
ptl_proc.rank = dest;
} else {
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
}
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);

Просмотреть файл

@ -19,6 +19,8 @@
#include "ompi/group/group.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/mtl/portals4/mtl_portals4.h"
#define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL
#define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL
@ -290,17 +292,15 @@ ompi_osc_portals4_complete_all(ompi_osc_portals4_module_t *module)
}
static inline ptl_process_t
ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
{
ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, rank);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
return ompi_mtl_portals4_get_peer_group(group, rank);
}
static inline ptl_process_t
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
{
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank, true);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
return ompi_osc_portals4_get_peer_group(module->comm->c_remote_group, rank);
}
#endif

Просмотреть файл

@ -15,8 +15,6 @@
#include "osc_portals4.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
int
ompi_osc_portals4_fence(int assert, struct ompi_win_t *win)

Просмотреть файл

@ -21,8 +21,6 @@
#include "osc_portals4.h"
#include "osc_portals4_request.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
static int
ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op)

Просмотреть файл

@ -18,8 +18,6 @@
#include "osc_portals4.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
enum locktype_t {
lock_nocheck,
lock_exclusive,

Просмотреть файл

@ -217,6 +217,161 @@ btl_portals4_init_interface(void)
return OPAL_ERROR;
}
static int
create_endpoint(int interface,
opal_proc_t *proc,
mca_btl_base_endpoint_t **endpoint)
{
int ret;
size_t size;
ptl_process_t *id;
OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version,
&proc->proc_name, (void**) &id, &size);
if (OPAL_ERR_NOT_FOUND == ret) {
OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output,
"btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret)));
return ret;
} else if (OPAL_SUCCESS != ret) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret));
return ret;
}
if (size < sizeof(ptl_process_t)) { /* no available connection */
return OPAL_ERROR;
}
if ((size % sizeof(ptl_process_t)) != 0) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: invalid format in modex");
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t))));
*endpoint = malloc(sizeof(mca_btl_base_endpoint_t));
if (NULL == *endpoint) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
(*endpoint)->ptl_proc = id[interface];
return OPAL_SUCCESS;
}
static int
create_peer_and_endpoint(int interface,
opal_proc_t *proc,
ptl_process_t *phys_peer,
mca_btl_base_endpoint_t **endpoint)
{
int ret;
size_t size;
ptl_process_t *id;
OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version,
&proc->proc_name, (void**) &id, &size);
if (OPAL_ERR_NOT_FOUND == ret) {
OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output,
"btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret)));
return ret;
} else if (OPAL_SUCCESS != ret) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret));
return ret;
}
if (size < sizeof(ptl_process_t)) { /* no available connection */
return OPAL_ERROR;
}
if ((size % sizeof(ptl_process_t)) != 0) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: invalid format in modex");
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t))));
if (NULL == *endpoint) {
*endpoint = malloc(sizeof(mca_btl_base_endpoint_t));
if (NULL == *endpoint) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
(*endpoint)->ptl_proc.rank = proc->proc_name.vpid;
}
phys_peer->phys.pid = id[interface].phys.pid;
phys_peer->phys.nid = id[interface].phys.nid;
opal_output_verbose(50, opal_btl_base_framework.framework_output,
"logical: global rank=%d pid=%d nid=%d\n",
proc->proc_name.vpid, phys_peer->phys.pid, phys_peer->phys.nid);
return OPAL_SUCCESS;
}
static int
create_maptable(struct mca_btl_portals4_module_t *portals4_btl,
size_t nprocs,
opal_proc_t **procs,
mca_btl_base_endpoint_t **endpoint)
{
int ret;
ptl_process_t *maptable;
maptable = malloc(sizeof(ptl_process_t) * nprocs);
if (NULL == maptable) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: malloc failed\n",
__FILE__, __LINE__);
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (uint32_t i = 0 ; i < nprocs ; i++) {
struct opal_proc_t *curr_proc;
curr_proc = procs[i];
/* portals doesn't support heterogeneous yet... */
if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"Portals 4 BTL does not support heterogeneous operations.");
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"Proc %s architecture %x, mine %x.",
OPAL_NAME_PRINT(curr_proc->proc_name),
curr_proc->proc_arch, opal_proc_local_get()->proc_arch);
return OPAL_ERR_NOT_SUPPORTED;
}
ret = create_peer_and_endpoint(portals4_btl->interface_num,
curr_proc,
&maptable[i],
&endpoint[i]);
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: create_maptable::create_peer_and_endpoint failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
}
ret = PtlSetMap(portals4_btl->portals_ni_h,
nprocs,
maptable);
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: logical mapping failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
opal_output_verbose(90, opal_btl_base_framework.framework_output,
"logical mapping OK\n");
free(maptable);
return OPAL_SUCCESS;
}
#define NEED_ALL_PROCS (mca_btl_portals4_component.use_logical)
int
mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
size_t nprocs,
@ -226,105 +381,64 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
{
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
int ret;
struct opal_proc_t *curr_proc = NULL;
ptl_process_t *id;
size_t i, size;
size_t i;
bool need_activate = false;
ptl_process_t *maptable;
opal_output_verbose(50, opal_btl_base_framework.framework_output,
"mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", (int) nprocs,
(int) portals4_btl->portals_num_procs, portals4_btl->interface_num);
if (mca_btl_portals4_component.use_logical) {
maptable = malloc(sizeof(ptl_process_t) * nprocs);
if (NULL == maptable) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: malloc failed\n",
__FILE__, __LINE__);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
"mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d",
(int) nprocs,
(int) portals4_btl->portals_num_procs,
portals4_btl->interface_num);
if (0 == portals4_btl->portals_num_procs) {
need_activate = true;
}
/*
* The PML handed us a list of procs that need Portals4
* peer info. Complete those procs here.
*/
for (i = 0 ; i < nprocs ; ++i) {
curr_proc = procs[i];
struct opal_proc_t *curr_proc = procs[i];
/* portals doesn't support heterogeneous yet... */
if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) {
continue;
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"Portals 4 BTL does not support heterogeneous operations.");
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"Proc %s architecture %x, mine %x.",
OPAL_NAME_PRINT(curr_proc->proc_name),
curr_proc->proc_arch, opal_proc_local_get()->proc_arch);
return OPAL_ERR_NOT_SUPPORTED;
}
OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version,
&curr_proc->proc_name, (void**) &id, &size);
if (OPAL_ERR_NOT_FOUND == ret) {
OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output,
"btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret)));
continue;
} else if (OPAL_SUCCESS != ret) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret));
return ret;
}
if (size < sizeof(ptl_process_t)) { /* no available connection */
return OPAL_ERROR;
}
if ((size % sizeof(ptl_process_t)) != 0) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,
"btl/portals4: invalid format in modex");
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t))));
btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t));
if (NULL == btl_peer_data[i]) return OPAL_ERROR;
/* The modex may receive more than one id (this is the
normal case if there is more than one interface). Store the id of the corresponding
interface */
if (mca_btl_portals4_component.use_logical) {
btl_peer_data[i]->ptl_proc.rank = i;
maptable[i].phys.pid = id[portals4_btl->interface_num].phys.pid;
maptable[i].phys.nid = id[portals4_btl->interface_num].phys.nid;
opal_output_verbose(50, opal_btl_base_framework.framework_output,
"logical: global rank=%d pid=%d nid=%d\n",
(int)i, maptable[i].phys.pid, maptable[i].phys.nid);
} else {
btl_peer_data[i]->ptl_proc = id[portals4_btl->interface_num];
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"add_procs: rank=%x nid=%x pid=%x for NI %d\n",
btl_peer_data[i]->ptl_proc.rank,
btl_peer_data[i]->ptl_proc.phys.nid,
btl_peer_data[i]->ptl_proc.phys.pid,
portals4_btl->interface_num));
ret = create_endpoint(portals4_btl->interface_num,
curr_proc,
&btl_peer_data[i]);
OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1);
/* and here we can reach */
opal_bitmap_set_bit(reachable, i);
}
if (mca_btl_portals4_component.use_logical) {
ret = PtlSetMap(portals4_btl->portals_ni_h, nprocs, maptable);
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: logical mapping failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
opal_output_verbose(90, opal_btl_base_framework.framework_output,
"logical mapping OK\n");
free(maptable);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"add_procs: rank=%x nid=%x pid=%x for NI %d\n",
i,
btl_peer_data[i]->ptl_proc.phys.nid,
btl_peer_data[i]->ptl_proc.phys.pid,
portals4_btl->interface_num));
}
if (need_activate && portals4_btl->portals_num_procs > 0) {
if (mca_btl_portals4_component.use_logical) {
ret = create_maptable(portals4_btl, nprocs, procs, btl_peer_data);
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: mca_btl_portals4_add_procs::create_maptable() failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
}
ret = btl_portals4_init_interface();
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
@ -333,6 +447,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
return ret;
}
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -48,6 +48,9 @@ struct mca_btl_portals4_component_t {
struct mca_btl_portals4_module_t** btls; /* array of available BTL modules */
/* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */
int need_init;
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
int use_logical;

Просмотреть файл

@ -252,6 +252,15 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.portals_outstanding_ops = 0;
mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL;
if (1 == mca_btl_portals4_component.use_logical) {
/*
* set the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag here in the default
* module, so it gets copied into the module for each Portals4
* interface during init().
*/
mca_btl_portals4_module.super.btl_flags |= MCA_BTL_FLAGS_SINGLE_ADD_PROCS;
}
return OPAL_SUCCESS;
}
@ -442,6 +451,8 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
opal_output_verbose(1, opal_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)",
mca_btl_portals4_component.num_btls);
mca_btl_portals4_component.need_init = 1;
return btls;
error: