* revert out some of the attempts to better use the Portals 3.3.2-2 user-space
run-time support, as it appears to be doing bad things to memory. Update the hack to get the local nid to match the recent TCP nal changes, and update the P3RT api useage This commit was SVN r9005.
Этот коммит содержится в:
родитель
bc6a82839d
Коммит
20d06e889e
@ -37,29 +37,15 @@
|
||||
|
||||
/* how's this for source code diving? - find private method for
|
||||
getting interface */
|
||||
extern unsigned int utcp_my_nid(const char *if_str);
|
||||
|
||||
/* these need to be defined, or things get "unhappy" */
|
||||
FILE* utcp_api_out;
|
||||
FILE* utcp_lib_out;
|
||||
extern int p3tcp_my_nid(const char *if_str, unsigned int *nid);
|
||||
|
||||
static bool use_modex = true;
|
||||
|
||||
int
|
||||
mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
||||
{
|
||||
ptl_process_id_t info;
|
||||
int ret, max_interfaces;
|
||||
#if 0 /* send all the portals internal debug to a file or stderr */
|
||||
FILE *output;
|
||||
char *tmp;
|
||||
|
||||
asprintf(&tmp, "portals.%d", getpid());
|
||||
output = fopen(tmp, "w");
|
||||
free(tmp);
|
||||
|
||||
utcp_lib_out = output;
|
||||
utcp_api_out = output;
|
||||
#endif
|
||||
|
||||
/* if the environment variables for the utcp implementation are
|
||||
already set, assume the user is running without the full Open
|
||||
@ -67,68 +53,56 @@ mca_btl_portals_init_compat(mca_btl_portals_component_t *comp)
|
||||
platform (like, say, Red Storm). Otherwise, be nice and use
|
||||
the modex to setup everything for the user */
|
||||
if (NULL == getenv("PTL_MY_RID")) {
|
||||
char *iface_name;
|
||||
use_modex = true;
|
||||
asprintf(&iface_name, "PTL_IFACE=%s",
|
||||
mca_btl_portals_component.portals_ifname);
|
||||
putenv(iface_name);
|
||||
} else {
|
||||
use_modex = false;
|
||||
}
|
||||
|
||||
/* Initialize Portals interface */
|
||||
ret = PtlInit(&max_interfaces);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlInit failed, returning %d", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* Initialize a network device */
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* no need to have limits around */
|
||||
&mca_btl_portals_module.portals_ni_h /* our interface handle */
|
||||
);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlNIInit failed, returning %d", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
if (use_modex) {
|
||||
ptl_process_id_t proc_id;
|
||||
unsigned int nid;
|
||||
|
||||
ret = PtlGetId(mca_btl_portals_module.portals_ni_h, &proc_id);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlGetId failed, returning %d", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
p3tcp_my_nid(mca_btl_portals_component.portals_ifname, &nid);
|
||||
|
||||
/* post our contact info in the registry */
|
||||
proc_id.nid = htonl(proc_id.nid);
|
||||
proc_id.pid = htonl(proc_id.pid);
|
||||
|
||||
info.nid = htonl(nid);
|
||||
info.pid = htonl((ptl_pid_t) getpid());
|
||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||
"contact info: %u, %u", ntohl(proc_id.nid),
|
||||
ntohl(proc_id.pid));
|
||||
"contact info: %u, %u", ntohl(info.nid),
|
||||
ntohl(info.pid));
|
||||
|
||||
ret = mca_pml_base_modex_send(&mca_btl_portals_component.super.btl_version,
|
||||
&proc_id, sizeof(ptl_process_id_t));
|
||||
&info, sizeof(ptl_process_id_t));
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"mca_pml_base_modex_send failed: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
/* Initialize the NID/PID data, per the 3.3.2 p3rt api */
|
||||
ret = PtlSetRank(mca_btl_portals_module.portals_ni_h, -1, -1);
|
||||
/*
|
||||
* Initialize Portals interface
|
||||
*/
|
||||
ret = PtlInit(&max_interfaces);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlSetRank(handle, -1, -1) failed, returning %d",
|
||||
ret);
|
||||
"PtlInit failed, returning %d\n", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* tell the UTCP runtime code to read the env variables */
|
||||
PtlSetRank(PTL_INVALID_HANDLE, -1, -1);
|
||||
|
||||
/*
|
||||
* Initialize a network device
|
||||
*/
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* no need to have limits around */
|
||||
&mca_btl_portals_module.portals_ni_h /* our interface handle */
|
||||
);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlNIInit failed, returning %d\n", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
}
|
||||
@ -143,27 +117,45 @@ mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl,
|
||||
ptl_process_id_t **portals_procs)
|
||||
{
|
||||
int ret;
|
||||
unsigned int nptl_procs, rank, i;
|
||||
|
||||
if (use_modex) {
|
||||
int my_rid;
|
||||
ptl_process_id_t *proc_id;
|
||||
ptl_nid_t *nidmap = NULL;
|
||||
ptl_pid_t *pidmap = NULL;
|
||||
size_t j, size;
|
||||
ptl_process_id_t *info;
|
||||
char *nidmap = NULL;
|
||||
char *pidmap = NULL;
|
||||
char *nid_str;
|
||||
char *pid_str;
|
||||
const size_t map_size = nprocs * 12 + 1; /* 12 is max length of long in decimal */
|
||||
size_t size, i;
|
||||
char *tmp;
|
||||
ompi_proc_t* proc_self = ompi_proc_local();
|
||||
int max_interfaces;
|
||||
|
||||
/* Create and set the NID / PID maps as needed */
|
||||
/*
|
||||
* Do all the NID/PID map setup
|
||||
*/
|
||||
/* each nid is a int, so need 10 there, plus the : */
|
||||
nidmap = malloc(map_size);
|
||||
pidmap = malloc(map_size);
|
||||
nid_str = malloc(12 + 1);
|
||||
pid_str = malloc(12 + 1);
|
||||
if (NULL == nidmap || NULL == pidmap ||
|
||||
NULL == nid_str || NULL == pid_str)
|
||||
return OMPI_ERROR;
|
||||
|
||||
nidmap = malloc(sizeof(ptl_nid_t) * nprocs);
|
||||
pidmap = malloc(sizeof(ptl_pid_t) * nprocs);
|
||||
if (NULL == nidmap || NULL == pidmap) return OMPI_ERROR;
|
||||
|
||||
for (j = 0 ; j < nprocs ; ++j) {
|
||||
if (proc_self == procs[j]) my_rid = j;
|
||||
/* get space for the portals procs list */
|
||||
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
||||
if (NULL == *portals_procs) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
if (proc_self == procs[i]) my_rid = i;
|
||||
|
||||
ret = mca_pml_base_modex_recv(&mca_btl_portals_component.super.btl_version,
|
||||
procs[j], (void**) &proc_id, &size);
|
||||
procs[i], (void**) &info, &size);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"mca_pml_base_modex_recv failed: %d", ret);
|
||||
@ -175,55 +167,109 @@ mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
nidmap[j] = ntohl(proc_id->nid);
|
||||
pidmap[j] = ntohl(proc_id->pid);
|
||||
if (i == 0) {
|
||||
snprintf(nidmap, map_size, "%u", ntohl(info->nid));
|
||||
snprintf(pidmap, map_size, "%u", ntohl(info->pid));
|
||||
} else {
|
||||
snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid));
|
||||
snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid));
|
||||
strncat(nidmap, nid_str, 12);
|
||||
strncat(pidmap, pid_str, 12);
|
||||
}
|
||||
|
||||
free(proc_id);
|
||||
/* update my local array of proc structs */
|
||||
(*portals_procs)[i].nid = ntohl(info->nid);
|
||||
(*portals_procs)[i].pid = ntohl(info->pid);
|
||||
|
||||
free(info);
|
||||
}
|
||||
|
||||
PtlSetRank(mca_btl_portals_module.portals_ni_h,
|
||||
my_rid, (unsigned) nprocs);
|
||||
PtlSetNIDMap(mca_btl_portals_module.portals_ni_h,
|
||||
nidmap, (unsigned) nprocs);
|
||||
PtlSetPIDMap(mca_btl_portals_module.portals_ni_h,
|
||||
pidmap, (unsigned) nprocs);
|
||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||
"my rid: %u", my_rid);
|
||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||
"nid map: %s", nidmap);
|
||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||
"pid map: %s", pidmap);
|
||||
opal_output_verbose(100, mca_btl_portals_component.portals_output,
|
||||
"iface: %s",
|
||||
mca_btl_portals_component.portals_ifname);
|
||||
|
||||
asprintf(&tmp, "PTL_MY_RID=%u", my_rid);
|
||||
putenv(tmp);
|
||||
asprintf(&tmp, "PTL_NIDMAP=%s", nidmap);
|
||||
putenv(tmp);
|
||||
asprintf(&tmp, "PTL_PIDMAP=%s", pidmap);
|
||||
putenv(tmp);
|
||||
asprintf(&tmp, "PTL_IFACE=%s", mca_btl_portals_component.portals_ifname);
|
||||
putenv(tmp);
|
||||
|
||||
free(pidmap);
|
||||
free(nidmap);
|
||||
}
|
||||
free(pid_str);
|
||||
free(nid_str);
|
||||
|
||||
ret = PtlGetRank(mca_btl_portals_module.portals_ni_h, &rank, &nptl_procs);
|
||||
if (ret != PTL_OK) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlGetRank() returned %d", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
} else if (nptl_procs != nprocs) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"nptl_procs != nprocs (%d, %d)", nptl_procs,
|
||||
nprocs);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* create enough space for all the proc info structs */
|
||||
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
||||
if (NULL == *portals_procs) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* fill in all the proc info structs */
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
ret = PtlGetRankId(mca_btl_portals_module.portals_ni_h,
|
||||
i, &((*portals_procs)[i]));
|
||||
/*
|
||||
* Initialize Portals
|
||||
*/
|
||||
ret = PtlInit(&max_interfaces);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10,
|
||||
mca_btl_portals_component.portals_output,
|
||||
"PtlGetRankId(%d) failed: %d\n", i, ret);
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlInit failed, returning %d\n", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* tell the UTCP runtime code to read the env variables */
|
||||
PtlSetRank(PTL_INVALID_HANDLE, -1, -1);
|
||||
|
||||
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
|
||||
PTL_PID_ANY, /* let library assign our pid */
|
||||
NULL, /* no desired limits */
|
||||
NULL, /* save our limits somewhere */
|
||||
&(mca_btl_portals_module.portals_ni_h) /* our interface handle */
|
||||
);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlNIInit failed, returning %d\n", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
} else { /* use_modex */
|
||||
unsigned int nptl_procs, rank, i;
|
||||
|
||||
/*
|
||||
*/
|
||||
ret = PtlGetRank(mca_btl_portals_module.portals_ni_h, &rank, &nptl_procs);
|
||||
if (ret != PTL_OK) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"PtlGetRank() returned %d", ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
} else if (nptl_procs != nprocs) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"nptl_procs != nprocs (%d, %d)", nptl_procs,
|
||||
nprocs);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* create enough space for all the proc info structs */
|
||||
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
|
||||
if (NULL == *portals_procs) {
|
||||
opal_output_verbose(10, mca_btl_portals_component.portals_output,
|
||||
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* fill in all the proc info structs */
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
ret = PtlGetRankId(mca_btl_portals_module.portals_ni_h,
|
||||
i, &((*portals_procs)[i]));
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(10,
|
||||
mca_btl_portals_component.portals_output,
|
||||
"PtlGetRankId(%d) failed: %d\n", i, ret);
|
||||
return OMPI_ERR_FATAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
PtlNIDebug(mca_btl_portals_module.portals_ni_h, PTL_DBG_ALL);
|
||||
#endif
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user