1
1

* checkpoint - initialization now factored in reasonable way for both

utcp reference and Red Storm (I think)

This commit was SVN r5515.
Этот коммит содержится в:
Brian Barrett 2005-04-27 16:42:56 +00:00
родитель ec4264a925
Коммит 206b1bace1
6 изменённых файлов: 119 добавлений и 28 удалений

Просмотреть файл

@ -73,10 +73,15 @@ mca_ptl_portals_add_procs(struct mca_ptl_base_module_t* ptl,
int ret;
struct ompi_proc_t *local_proc = ompi_proc_local();
struct ompi_proc_t *curr_proc;
ptl_process_id_t *portals_procs;
size_t i;
ret = mca_ptl_portals_add_procs_compat(ptl, nprocs, procs,
peers, reachable);
/* make sure our environment is fully initialized. At end of this
call, we have a working network handle on our module and
portals_procs will have the portals process identifier for each
proc (ordered, in theory) */
ret = mca_ptl_portals_add_procs_compat((struct mca_ptl_portals_module_t*) ptl,
nprocs, procs, &portals_procs);
if (OMPI_SUCCESS != ret) return ret;
/* loop through all procs, setting our reachable flag */

Просмотреть файл

@ -19,6 +19,8 @@
#ifndef MCA_PTL_PORTALS_H
#define MCA_PTL_PORTALS_H
#include <portals3.h>
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "class/ompi_bitmap.h"
@ -29,8 +31,6 @@
struct mca_ptl_portals_component_t;
typedef struct mca_ptl_portals_component_t mca_ptl_portals_component_t;
#include "ptl_portals_compat.h"
/**
* Portals PTL component.

Просмотреть файл

@ -20,7 +20,6 @@
#if PTL_PORTALS_UTCP
#include <portals3.h>
#include <p3nal_utcp.h>
#include <p3rt/p3rt.h>
#include <p3api/debug.h>
@ -37,9 +36,8 @@
int mca_ptl_portals_init(mca_ptl_portals_component_t *comp);
int mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl,
int mca_ptl_portals_add_procs_compat(mca_ptl_portals_module_t* ptl,
size_t nprocs, struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable);
ptl_process_id_t **portals_procs);
#endif /* PTL_PORTALS_NAL_H */

Просмотреть файл

@ -27,14 +27,84 @@
int
mca_ptl_portals_init(mca_ptl_portals_component_t *comp)
{
return OMPI_ERR_NOT_IMPLEMENTED;
int ret, max_interfaces;
struct mca_ptl_portals_module_t *ptl;
/*
* Initialize Portals interface
*/
ret = PtlInit(&max_interfaces);
if (PTL_OK != ret) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"PtlInit failed, returning %d\n", ret);
return OMPI_ERR_FATAL;
}
/*
* create module - only ever one "NIC" on red storm
*/
comp->portals_num_modules = 1;
comp->portals_modules = calloc(comp->portals_num_modules,
sizeof(mca_ptl_portals_module_t *));
if (NULL == comp->portals_modules) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"malloc failed in mca_ptl_portals_init");
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
comp->portals_modules[0] = malloc(sizeof(mca_ptl_portals_module_t));
if (NULL == comp->portals_modules) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"malloc failed in mca_ptl_portals_init");
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
ptl = comp->portals_modules[0];
*ptl = = mca_ptl_portals_module;
/*
* Initialize a network device
*/
ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */
PTL_PID_ANY, /* let library assign our pid */
NULL, /* no desired limits */
&(ptl->limits), /* save our limits somewhere */
&(ptl->ni_handle) /* our interface handle */
);
if (PTL_OK != ret) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"PtlNIInit failed, returning %d\n", ret);
return OMPI_ERR_FATAL;
}
return OMPI_SUCCESS;
}
int
mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
mca_ptl_portals_add_procs_compat(struct mca_ptl_portals_module_t* ptl,
size_t nprocs, struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable)
ptl_process_id_t **portals_procs)
{
int nptl_procs = 0;
/*
* FIXME - XXX - FIXME
* BWB - implicit assumption that cnos procs list will match our
* procs list. Don't know what to do about that...
*/
nptl_procs = cnos_get_nidpid_map(portals_procs);
if (nptl_procs <= 0) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"cnos_get_nidpid_map() returned %d", nptl_procs);
return OMPI_ERR_FATAL;
} else if (nptl_procs != nprocs) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"nptl_procs != nprocs (%d, %d)", nptl_procs,
nprocs);
return OMPI_ERR_FATAL;
}
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -22,6 +22,7 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <netinet/in.h>
#include "include/constants.h"
#include "util/output.h"
@ -46,8 +47,8 @@ mca_ptl_portals_init(mca_ptl_portals_component_t *comp)
utcp_lib_out = stderr;
utcp_api_out = stderr;
info.nid = utcp_my_nid(mca_ptl_portals_component.portals_ifname);
info.pid = (ptl_pid_t) getpid();
info.nid = htonl(utcp_my_nid(mca_ptl_portals_component.portals_ifname));
info.pid = htonl((ptl_pid_t) getpid());
ompi_output_verbose(100, mca_ptl_portals_component.portals_output,
"contact info: %u, %u", info.nid, info.pid);
@ -81,10 +82,9 @@ mca_ptl_portals_init(mca_ptl_portals_component_t *comp)
int
mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
mca_ptl_portals_add_procs_compat(struct mca_ptl_portals_module_t* ptl,
size_t nprocs, struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable)
ptl_process_id_t **portals_procs)
{
int ret, my_rid;
ptl_process_id_t *info;
@ -97,7 +97,6 @@ mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
char *tmp;
ompi_proc_t* proc_self = ompi_proc_local();
int max_interfaces;
struct mca_ptl_portals_module_t *ptl = (struct mca_ptl_portals_module_t*) ptl_base;
/*
* Do all the NID/PID map setup
@ -109,7 +108,15 @@ mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
pid_str = malloc(12 + 1);
if (NULL == nidmap || NULL == pidmap || NULL == nid_str || NULL == pid_str)
return OMPI_ERROR;
/* get space for the portals procs list */
*portals_procs = calloc(nprocs, sizeof(ptl_process_id_t));
if (NULL == *portals_procs) {
ompi_output_verbose(10, mca_ptl_portals_component.portals_output,
"calloc(nprocs, sizeof(ptl_process_id_t)) failed");
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
for (i = 0 ; i < nprocs ; ++i) {
if (proc_self == procs[i]) my_rid = i;
@ -127,25 +134,31 @@ mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
}
if (i == 0) {
snprintf(nidmap, map_size, "%u", info->nid);
snprintf(pidmap, map_size, "%u", info->pid);
snprintf(nidmap, map_size, "%u", ntohl(info->nid));
snprintf(pidmap, map_size, "%u", ntohl(info->pid));
} else {
snprintf(nid_str, 12 + 1, ":%u", info->nid);
snprintf(pid_str, 12 + 1, ":%u", info->pid);
snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid));
snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid));
strncat(nidmap, nid_str, 12);
strncat(pidmap, pid_str, 12);
}
/* update my local array of proc structs */
(*portals_procs)[i].nid = info->nid;
(*portals_procs)[i].pid = info->pid;
free(info);
}
ompi_output_verbose(100, mca_ptl_portals_component.portals_output,
"my rid: %u", my_rid);
"%d: my rid: %u", getpid(), my_rid);
ompi_output_verbose(100, mca_ptl_portals_component.portals_output,
"nid map: %s", nidmap);
"%d: nid map: %s", getpid(), nidmap);
ompi_output_verbose(100, mca_ptl_portals_component.portals_output,
"pid map: %s", pidmap);
"%d: pid map: %s", getpid(), pidmap);
ompi_output_verbose(100, mca_ptl_portals_component.portals_output,
"iface: %s", mca_ptl_portals_component.portals_ifname);
"%d: iface: %s", getpid(),
mca_ptl_portals_component.portals_ifname);
asprintf(&tmp, "PTL_MY_RID=%u", my_rid);
putenv(tmp);
@ -156,6 +169,11 @@ mca_ptl_portals_add_procs_compat(struct mca_ptl_base_module_t* ptl_base,
asprintf(&tmp, "PTL_IFACE=%s", mca_ptl_portals_component.portals_ifname);
putenv(tmp);
free(pidmap);
free(nidmap);
free(pid_str);
free(nid_str);
/*
* Initialize Portals
*/

Просмотреть файл

@ -23,7 +23,7 @@
#include "threads/thread.h"
#include "ptl_portals.h"
#include "ptl_portals_compat.h"
/*