* add mode for utcp compat code where modex is not used. Instead, use the
"run-time" api for the reference implementation. * Make the non-modex utcp and redstorm compat code do the same things in the same order This commit was SVN r6556.
Этот коммит содержится в:
родитель
f7efce87d8
Коммит
c95eacdff7
@ -30,16 +30,6 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
|||||||
uint32_t i;
|
uint32_t i;
|
||||||
struct mca_btl_portals_module_t *btl;
|
struct mca_btl_portals_module_t *btl;
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize Portals interface
|
|
||||||
*/
|
|
||||||
ret = PtlInit(&max_interfaces);
|
|
||||||
if (PTL_OK != ret) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"PtlInit failed, returning %d\n", ret);
|
|
||||||
return OMPI_ERR_FATAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create module - only ever one "NIC" on red storm
|
* create module - only ever one "NIC" on red storm
|
||||||
*/
|
*/
|
||||||
@ -64,6 +54,16 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
|||||||
/* the defaults are good enough for the rest */
|
/* the defaults are good enough for the rest */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize Portals interface
|
||||||
|
*/
|
||||||
|
ret = PtlInit(&max_interfaces);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"PtlInit failed, returning %d\n", ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize a network device
|
* Initialize a network device
|
||||||
*/
|
*/
|
||||||
|
@ -39,13 +39,16 @@ extern unsigned int utcp_my_nid(const char *if_str);
|
|||||||
FILE* utcp_api_out;
|
FILE* utcp_api_out;
|
||||||
FILE* utcp_lib_out;
|
FILE* utcp_lib_out;
|
||||||
|
|
||||||
|
static bool use_modex = true;
|
||||||
|
|
||||||
int
|
int
|
||||||
mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
||||||
{
|
{
|
||||||
ptl_process_id_t info;
|
ptl_process_id_t info;
|
||||||
int ret;
|
int ret, max_interfaces;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
#if 0
|
struct mca_btl_portals_module_t *btl;
|
||||||
|
#if 0 /* send all the portals internal debug to a file or stderr */
|
||||||
FILE *output;
|
FILE *output;
|
||||||
char *tmp;
|
char *tmp;
|
||||||
|
|
||||||
@ -60,18 +63,15 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
|||||||
utcp_api_out = stderr;
|
utcp_api_out = stderr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
info.nid = htonl(utcp_my_nid(mca_btl_portals_component.portals_ifname));
|
/* if the environment variables for the utcp implementation are
|
||||||
info.pid = htonl((ptl_pid_t) getpid());
|
already set, assume the user is running without the full Open
|
||||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
RTE and is doing RTE testing for a more tightly-coupled
|
||||||
"contact info: %u, %u", ntohl(info.nid),
|
platform (like, say, Red Storm). Otherwise, be nice and use
|
||||||
ntohl(info.pid));
|
the modex to setup everything for the user */
|
||||||
|
if (NULL == getenv("PTL_MY_RID")) {
|
||||||
ret = mca_base_modex_send(&mca_btl_portals_component.super.btl_version,
|
use_modex = true;
|
||||||
&info, sizeof(ptl_process_id_t));
|
} else {
|
||||||
if (OMPI_SUCCESS != ret) {
|
use_modex = false;
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"mca_base_modex_send failed: %d", ret);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* with the utcp interface, only ever one "NIC" */
|
/* with the utcp interface, only ever one "NIC" */
|
||||||
@ -83,6 +83,7 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
|||||||
"malloc failed in mca_btl_portals_init");
|
"malloc failed in mca_btl_portals_init");
|
||||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
btl = &(comp->portals_modules[0]);
|
||||||
|
|
||||||
/* compat code is responsible for copying over the "template" onto
|
/* compat code is responsible for copying over the "template" onto
|
||||||
each module instance. The calling code will create the free
|
each module instance. The calling code will create the free
|
||||||
@ -95,6 +96,48 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
|||||||
/* the defaults are good enough for the rest */
|
/* the defaults are good enough for the rest */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_modex) {
|
||||||
|
/* post our contact info in the registry */
|
||||||
|
info.nid = htonl(utcp_my_nid(mca_btl_portals_component.portals_ifname));
|
||||||
|
info.pid = htonl((ptl_pid_t) getpid());
|
||||||
|
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||||
|
"contact info: %u, %u", ntohl(info.nid),
|
||||||
|
ntohl(info.pid));
|
||||||
|
|
||||||
|
ret = mca_base_modex_send(&mca_btl_portals_component.super.btl_version,
|
||||||
|
&info, sizeof(ptl_process_id_t));
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"mca_base_modex_send failed: %d", ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Initialize Portals interface
|
||||||
|
*/
|
||||||
|
ret = PtlInit(&max_interfaces);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"PtlInit failed, returning %d\n", ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize a network device
|
||||||
|
*/
|
||||||
|
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
||||||
|
PTL_PID_ANY, /* let library assign our pid */
|
||||||
|
NULL, /* no desired limits */
|
||||||
|
&(btl->portals_ni_limits), /* save our limits somewhere */
|
||||||
|
&(btl->portals_ni_h) /* our interface handle */
|
||||||
|
);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"PtlNIInit failed, returning %d\n", ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,107 +159,147 @@ mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl,
|
|||||||
ompi_proc_t* proc_self = ompi_proc_local();
|
ompi_proc_t* proc_self = ompi_proc_local();
|
||||||
int max_interfaces;
|
int max_interfaces;
|
||||||
|
|
||||||
/*
|
if (use_modex) {
|
||||||
* Do all the NID/PID map setup
|
/*
|
||||||
*/
|
* Do all the NID/PID map setup
|
||||||
/* each nid is a int, so need 10 there, plus the : */
|
*/
|
||||||
nidmap = malloc(map_size);
|
/* each nid is a int, so need 10 there, plus the : */
|
||||||
pidmap = malloc(map_size);
|
nidmap = malloc(map_size);
|
||||||
nid_str = malloc(12 + 1);
|
pidmap = malloc(map_size);
|
||||||
pid_str = malloc(12 + 1);
|
nid_str = malloc(12 + 1);
|
||||||
if (NULL == nidmap || NULL == pidmap || NULL == nid_str || NULL == pid_str)
|
pid_str = malloc(12 + 1);
|
||||||
return OMPI_ERROR;
|
if (NULL == nidmap || NULL == pidmap ||
|
||||||
|
NULL == nid_str || NULL == pid_str)
|
||||||
/* get space for the portals procs list */
|
|
||||||
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
|
||||||
if (NULL == *portals_procs) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
|
||||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0 ; i < nprocs ; ++i) {
|
|
||||||
if (proc_self == procs[i]) my_rid = i;
|
|
||||||
|
|
||||||
ret = mca_base_modex_recv(&mca_btl_portals_component.super.btl_version,
|
|
||||||
procs[i], (void**) &info, &size);
|
|
||||||
if (OMPI_SUCCESS != ret) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"mca_base_modex_recv failed: %d", ret);
|
|
||||||
return ret;
|
|
||||||
} else if (sizeof(ptl_process_id_t) != size) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"mca_base_modex_recv returned size %d, expected %d",
|
|
||||||
size, sizeof(ptl_process_id_t));
|
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
|
/* get space for the portals procs list */
|
||||||
|
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
||||||
|
if (NULL == *portals_procs) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
||||||
|
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0 ; i < nprocs ; ++i) {
|
||||||
|
if (proc_self == procs[i]) my_rid = i;
|
||||||
|
|
||||||
|
ret = mca_base_modex_recv(&mca_btl_portals_component.super.btl_version,
|
||||||
|
procs[i], (void**) &info, &size);
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"mca_base_modex_recv failed: %d", ret);
|
||||||
|
return ret;
|
||||||
|
} else if (sizeof(ptl_process_id_t) != size) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"mca_base_modex_recv returned size %d, expected %d",
|
||||||
|
size, sizeof(ptl_process_id_t));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
snprintf(nidmap, map_size, "%u", ntohl(info->nid));
|
||||||
|
snprintf(pidmap, map_size, "%u", ntohl(info->pid));
|
||||||
|
} else {
|
||||||
|
snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid));
|
||||||
|
snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid));
|
||||||
|
strncat(nidmap, nid_str, 12);
|
||||||
|
strncat(pidmap, pid_str, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* update my local array of proc structs */
|
||||||
|
(*portals_procs)[i].nid = ntohl(info->nid);
|
||||||
|
(*portals_procs)[i].pid = ntohl(info->pid);
|
||||||
|
|
||||||
|
free(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == 0) {
|
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||||
snprintf(nidmap, map_size, "%u", ntohl(info->nid));
|
"my rid: %u", my_rid);
|
||||||
snprintf(pidmap, map_size, "%u", ntohl(info->pid));
|
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||||
} else {
|
"nid map: %s", nidmap);
|
||||||
snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid));
|
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||||
snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid));
|
"pid map: %s", pidmap);
|
||||||
strncat(nidmap, nid_str, 12);
|
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||||
strncat(pidmap, pid_str, 12);
|
"iface: %s",
|
||||||
|
mca_btl_portals_component.portals_ifname);
|
||||||
|
|
||||||
|
asprintf(&tmp, "PTL_MY_RID=%u", my_rid);
|
||||||
|
putenv(tmp);
|
||||||
|
asprintf(&tmp, "PTL_NIDMAP=%s", nidmap);
|
||||||
|
putenv(tmp);
|
||||||
|
asprintf(&tmp, "PTL_PIDMAP=%s", pidmap);
|
||||||
|
putenv(tmp);
|
||||||
|
asprintf(&tmp, "PTL_IFACE=%s", mca_btl_portals_component.portals_ifname);
|
||||||
|
putenv(tmp);
|
||||||
|
|
||||||
|
free(pidmap);
|
||||||
|
free(nidmap);
|
||||||
|
free(pid_str);
|
||||||
|
free(nid_str);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize Portals
|
||||||
|
*/
|
||||||
|
ret = PtlInit(&max_interfaces);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"PtlInit failed, returning %d\n", ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update my local array of proc structs */
|
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
||||||
(*portals_procs)[i].nid = ntohl(info->nid);
|
PTL_PID_ANY, /* let library assign our pid */
|
||||||
(*portals_procs)[i].pid = ntohl(info->pid);
|
NULL, /* no desired limits */
|
||||||
|
&(btl->portals_ni_limits), /* save our limits somewhere */
|
||||||
free(info);
|
&(btl->portals_ni_h) /* our interface handle */
|
||||||
}
|
);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
"my rid: %u", my_rid);
|
"PtlNIInit failed, returning %d\n", ret);
|
||||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
return OMPI_ERR_FATAL;
|
||||||
"nid map: %s", nidmap);
|
}
|
||||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
|
||||||
"pid map: %s", pidmap);
|
|
||||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
|
||||||
"iface: %s",
|
|
||||||
mca_btl_portals_component.portals_ifname);
|
|
||||||
|
|
||||||
asprintf(&tmp, "PTL_MY_RID=%u", my_rid);
|
|
||||||
putenv(tmp);
|
|
||||||
asprintf(&tmp, "PTL_NIDMAP=%s", nidmap);
|
|
||||||
putenv(tmp);
|
|
||||||
asprintf(&tmp, "PTL_PIDMAP=%s", pidmap);
|
|
||||||
putenv(tmp);
|
|
||||||
asprintf(&tmp, "PTL_IFACE=%s", mca_btl_portals_component.portals_ifname);
|
|
||||||
putenv(tmp);
|
|
||||||
|
|
||||||
free(pidmap);
|
|
||||||
free(nidmap);
|
|
||||||
free(pid_str);
|
|
||||||
free(nid_str);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize Portals
|
|
||||||
*/
|
|
||||||
ret = PtlInit(&max_interfaces);
|
|
||||||
if (PTL_OK != ret) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"PtlInit failed, returning %d\n", ret);
|
|
||||||
return OMPI_ERR_FATAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
|
||||||
PTL_PID_ANY, /* let library assign our pid */
|
|
||||||
NULL, /* no desired limits */
|
|
||||||
&(btl->portals_ni_limits), /* save our limits somewhere */
|
|
||||||
&(btl->portals_ni_h) /* our interface handle */
|
|
||||||
);
|
|
||||||
if (PTL_OK != ret) {
|
|
||||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
|
||||||
"PtlNIInit failed, returning %d\n", ret);
|
|
||||||
return OMPI_ERR_FATAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
PtlNIDebug(btl->portals_ni_h, PTL_DBG_ALL | PTL_DBG_NI_ALL);
|
PtlNIDebug(btl->portals_ni_h, PTL_DBG_ALL | PTL_DBG_NI_ALL);
|
||||||
#endif
|
#endif
|
||||||
|
} else { /* use_modex */
|
||||||
|
int nptl_procs = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME - XXX - FIXME
|
||||||
|
* BWB - implicit assumption that cnos procs list will match our
|
||||||
|
* procs list. Don't know what to do about that...
|
||||||
|
*/
|
||||||
|
ret = PtlGetRank(&my_rid, &nptl_procs);
|
||||||
|
if (ret != PTL_OK) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"PtlGetRank() returned %d", ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
} else if (nptl_procs != nprocs) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"nptl_procs != nprocs (%d, %d)", nptl_procs,
|
||||||
|
nprocs);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* create enough space for all the proc info structs */
|
||||||
|
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
||||||
|
if (NULL == *portals_procs) {
|
||||||
|
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||||
|
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
||||||
|
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
/* fill in all the proc info structs */
|
||||||
|
for (i = 0 ; i < nprocs ; ++i) {
|
||||||
|
ret = PtlGetRankId(i, &((*portals_procs)[i]));
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(10,
|
||||||
|
mca_btl_portals_component.portals_output,
|
||||||
|
"PtlGetRankId(%d) failed: %d\n", i, ret);
|
||||||
|
return OMPI_ERR_FATAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user