add support for mxm 2.0
This commit was SVN r27661.
Этот коммит содержится в:
родитель
4db2c698c1
Коммит
a454341e2b
@ -84,6 +84,7 @@ static uint32_t ompi_mtl_mxm_get_job_id(void)
|
|||||||
|
|
||||||
int ompi_mtl_mxm_progress(void);
|
int ompi_mtl_mxm_progress(void);
|
||||||
|
|
||||||
|
#if MXM_API < MXM_VERSION(2, 0)
|
||||||
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
|
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
|
||||||
{
|
{
|
||||||
size_t addrlen;
|
size_t addrlen;
|
||||||
@ -91,15 +92,37 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm
|
|||||||
|
|
||||||
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
|
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
|
||||||
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
|
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
|
||||||
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
|
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
|
||||||
if (MXM_OK != err) {
|
if (MXM_OK != err) {
|
||||||
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||||
true, (int)ptlid, mxm_error_string(err));
|
true, (int)ptlid, mxm_error_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info,
|
||||||
|
mxm_domain_id_t domain)
|
||||||
|
{
|
||||||
|
size_t addrlen;
|
||||||
|
mxm_error_t err;
|
||||||
|
|
||||||
|
addrlen = sizeof(ep_info->dest_addr[domain]);
|
||||||
|
err = mxm_ep_address(ompi_mtl_mxm.ep, domain,
|
||||||
|
(struct sockaddr *) &ep_info->dest_addr[domain], &addrlen);
|
||||||
|
if (MXM_OK == err) {
|
||||||
|
ep_info->domain_bitmap |= MXM_BIT(domain);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
} else if (MXM_ERR_UNREACHABLE == err) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
} else {
|
||||||
|
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||||
|
true, (int)domain, mxm_error_string(err));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define max(a,b) ((a)>(b)?(a):(b))
|
#define max(a,b) ((a)>(b)?(a):(b))
|
||||||
|
|
||||||
@ -159,6 +182,17 @@ static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MXM_API >= MXM_VERSION(2,0)
|
||||||
|
static void ompi_mtl_mxm_set_conn_req(mxm_conn_req_t *conn_req, ompi_mtl_mxm_ep_conn_info_t *ep_info,
|
||||||
|
mxm_domain_id_t domain)
|
||||||
|
{
|
||||||
|
if (ep_info->domain_bitmap & MXM_BIT(domain)) {
|
||||||
|
conn_req->addr[domain] = (struct sockaddr *)&(ep_info->dest_addr[domain]);
|
||||||
|
} else {
|
||||||
|
conn_req->addr[domain] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int ompi_mtl_mxm_module_init(void)
|
int ompi_mtl_mxm_module_init(void)
|
||||||
{
|
{
|
||||||
@ -168,7 +202,7 @@ int ompi_mtl_mxm_module_init(void)
|
|||||||
uint32_t jobid;
|
uint32_t jobid;
|
||||||
uint64_t mxlr;
|
uint64_t mxlr;
|
||||||
ompi_proc_t *mp, **procs;
|
ompi_proc_t *mp, **procs;
|
||||||
unsigned ptl_bitmap;
|
unsigned ptl_bitmap;
|
||||||
size_t totps, proc;
|
size_t totps, proc;
|
||||||
int lr, nlps;
|
int lr, nlps;
|
||||||
|
|
||||||
@ -210,8 +244,10 @@ int ompi_mtl_mxm_module_init(void)
|
|||||||
/* Setup the endpoint options and local addresses to bind to. */
|
/* Setup the endpoint options and local addresses to bind to. */
|
||||||
#if MXM_API < MXM_VERSION(1,5)
|
#if MXM_API < MXM_VERSION(1,5)
|
||||||
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap;
|
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap;
|
||||||
#else
|
#elif MXM_API < MXM_VERSION(2,0)
|
||||||
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
|
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
|
||||||
|
#else
|
||||||
|
ptl_bitmap = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Open MXM endpoint */
|
/* Open MXM endpoint */
|
||||||
@ -227,6 +263,7 @@ int ompi_mtl_mxm_module_init(void)
|
|||||||
/*
|
/*
|
||||||
* Get address for each PTL on this endpoint, and share it with other ranks.
|
* Get address for each PTL on this endpoint, and share it with other ranks.
|
||||||
*/
|
*/
|
||||||
|
#if MXM_API < MXM_VERSION(2,0)
|
||||||
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
|
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
|
||||||
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
|
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
@ -239,6 +276,18 @@ int ompi_mtl_mxm_module_init(void)
|
|||||||
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
|
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
ep_info.domain_bitmap = 0;
|
||||||
|
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SELF)) {
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SHM)) {
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_IB)) {
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* send information using modex (in some case there is limitation on data size for example ess/pmi)
|
* send information using modex (in some case there is limitation on data size for example ess/pmi)
|
||||||
@ -350,9 +399,15 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
|||||||
free(modex_name);
|
free(modex_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MXM_API < MXM_VERSION(2,0)
|
||||||
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
|
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
|
||||||
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
|
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
|
||||||
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
|
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
|
||||||
|
#else
|
||||||
|
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SELF);
|
||||||
|
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SHM);
|
||||||
|
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_IB);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Connect to remote peers */
|
/* Connect to remote peers */
|
||||||
|
@ -39,7 +39,12 @@ typedef struct mca_mtl_mxm_module_t {
|
|||||||
|
|
||||||
|
|
||||||
typedef struct ompi_mtl_mxm_ep_conn_info_t {
|
typedef struct ompi_mtl_mxm_ep_conn_info_t {
|
||||||
|
#if MXM_API < MXM_VERSION(2,0)
|
||||||
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
|
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
|
||||||
|
#else
|
||||||
|
unsigned domain_bitmap;
|
||||||
|
struct sockaddr_storage dest_addr[MXM_DOMAIN_LAST];
|
||||||
|
#endif
|
||||||
} ompi_mtl_mxm_ep_conn_info_t;
|
} ompi_mtl_mxm_ep_conn_info_t;
|
||||||
|
|
||||||
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
|
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user