1
1

a) Fill in the stub functions for mca or pml calling

This commit was SVN r1543.
Этот коммит содержится в:
Weikuan Yu 2004-07-02 19:34:21 +00:00
родитель df569c6458
Коммит dc943e56d2
9 изменённых файлов: 240 добавлений и 35 удалений

Просмотреть файл

@ -18,6 +18,7 @@
#include "ptl_elan_proc.h"
#include "ptl_elan_req.h"
#include "ptl_elan_frag.h"
#include "ptl_elan_priv.h"
/* XXX: There must be multiple PTL's. This could be the template */
@ -33,8 +34,8 @@ mca_ptl_elan_t mca_ptl_elan = {
MCA_PTL_PUT, /* ptl flags */
/* collection of interfaces */
mca_ptl_elan_add_proc,
mca_ptl_elan_del_proc,
mca_ptl_elan_add_procs,
mca_ptl_elan_del_procs,
mca_ptl_elan_finalize,
mca_ptl_elan_put,
mca_ptl_elan_get,
@ -44,19 +45,64 @@ mca_ptl_elan_t mca_ptl_elan = {
}
};
int mca_ptl_elan_add_proc (struct mca_ptl_t *ptl,
size_t nprocs,
struct ompi_proc_t **ompi_proc,
struct mca_ptl_base_peer_t **peer_ret,
ompi_bitmap_t* reachable)
int mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t **peers,
ompi_bitmap_t* reachable)
{
struct ompi_proc_t *ompi_proc;
mca_ptl_elan_proc_t *ptl_proc;
mca_ptl_elan_peer_t *ptl_peer;
int rc;
int i;
for(i=0; i<nprocs; i++) {
ompi_proc = procs[i];
ptl_proc = mca_ptl_elan_proc_create(ompi_proc);
OMPI_PTL_ELAN_CHECK_UNEX(ptl_proc, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
/* Check to make sure that the peer has at least as many
* interface addresses exported as we are trying to use.
* If not, then don't bind this PTL instance to the proc.
*/
OMPI_THREAD_LOCK(&ptl_proc->proc_lock);
if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
ompi_output(0, "all peers are taken already\n");
return OMPI_ERR_UNREACH;
}
/* The ptl_proc datastructure is shared by all TCP PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the ptl_proc.
*/
ptl_peer = OBJ_NEW(mca_ptl_elan_peer_t);
if(NULL == ptl_peer) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ptl_peer->peer_ptl = (mca_ptl_elan_t*)ptl;
rc = mca_ptl_elan_proc_insert(ptl_proc, ptl_peer);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ptl_peer);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
return rc;
}
ompi_bitmap_set_bit(reachable, i);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
peers[i] = ptl_peer;
}
return OMPI_SUCCESS;
}
int mca_ptl_elan_del_proc (struct mca_ptl_t *ptl,
int mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
size_t nprocs,
struct ompi_proc_t ** procs,
struct mca_ptl_base_peer_t **ptl_peer)
struct mca_ptl_base_peer_t **peers)
{
return OMPI_SUCCESS;
}

Просмотреть файл

@ -169,10 +169,10 @@ extern int mca_ptl_elan_finalize (struct mca_ptl_t *ptl);
* @return OMPI_SUCCESS or error status on failure.
*/
extern int
mca_ptl_elan_add_proc (struct mca_ptl_t *ptl,
mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
size_t nprocs,
struct ompi_proc_t **ompi_proc,
struct mca_ptl_base_peer_t **peer_ret,
struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t **peers,
ompi_bitmap_t* reachable);
/**
@ -184,10 +184,10 @@ mca_ptl_elan_add_proc (struct mca_ptl_t *ptl,
* @return Status indicating if cleanup was successful
*/
extern int
mca_ptl_elan_del_proc (struct mca_ptl_t *ptl,
mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
size_t nprocs,
struct ompi_proc_t ** procs,
struct mca_ptl_base_peer_t **ptl_peer);
struct mca_ptl_base_peer_t **peers);
/**
* PML->PTL Allocate a send request from the PTL modules free list.

Просмотреть файл

@ -91,7 +91,7 @@ mca_ptl_elan_send_frag_progress (mca_ptl_elan_send_frag_t * frag)
static inline void
mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack,
struct mca_ptl_t *ptl,
struct mca_ptl_elan_peer_T *ptl_peer,
struct mca_ptl_elan_peer_t *ptl_peer,
mca_ptl_elan_recv_frag_t * frag)
{
return;

Просмотреть файл

@ -82,6 +82,9 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
return OMPI_ERROR;
}
/*
* XXX: Leave the following later after testing of QDMA is done
*/
if (OMPI_SUCCESS != ompi_init_elan_rdma (emp, rail_count)) {
return OMPI_ERROR;
}

Просмотреть файл

@ -57,6 +57,31 @@ mca_ptl_elan_module_1_0_0_t mca_ptl_elan_module = {
static mca_ptl_elan_module_1_0_0_t *elan_mp = &mca_ptl_elan_module;
static int mca_ptl_elan_module_initialized = 0;
/*
* some elan vp information to the the global registery
*/
static int mca_ptl_elan_module_register (mca_ptl_elan_module_1_0_0_t *emp)
{
int rc;
size_t i;
size_t size;
mca_ptl_elan_addr_t *addrs = (mca_ptl_elan_addr_t*)malloc(size);
size = emp->elan_num_ptls * sizeof(mca_ptl_elan_addr_t);
for(i=0; i<emp->elan_num_ptls; i++) {
mca_ptl_elan_t * ptl = emp->elan_ptls[i];
addrs[i].elan_vp = ptl->elan_vp;
addrs[i].addr_inuse = 0;
}
rc = mca_base_modex_send(&emp->super.ptlm_version, addrs, size);
free(addrs);
return rc;
}
/*
* Called by MCA framework to open the module, registers
* module parameters.
@ -207,19 +232,17 @@ mca_ptl_elan_module_init (int *num_ptls,
}
/*
* (mca_ptl_elan_module_exchange () != OMPI_SUCCESS)
*
* No need to publish parameters with the MCA framework
*
* This is called only by those processes who have elan.
* So it does not qualify to be a global call.
* Since the processes has elan support can already communicate
* over elan, there is no need for a oob_based exchange.
* we need to publish some information for elan.
*/
if (OMPI_SUCCESS != mca_ptl_elan_module_register(&mca_ptl_elan_module)) {
ompi_output(0,
"[%s:%d] error in malloc for elan PTL references\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
ptls = (mca_ptl_t **) malloc (elan_mp->elan_num_ptls *
sizeof (mca_ptl_elan_t *));
sizeof (mca_ptl_elan_t *));
if (NULL == ptls) {
ompi_output(0,
"[%s:%d] error in malloc for elan PTL references\n",
@ -227,10 +250,7 @@ mca_ptl_elan_module_init (int *num_ptls,
return NULL;
}
/* FIXME:
* Why use memcopy to create two instances of the same
* structures, do they need to be defined them as constants,
* will coherency on two replicas be a potential problem? */
/* Will coherency on two replicas be a potential problem? */
memcpy (ptls, elan_mp->elan_ptls,
elan_mp->elan_num_ptls * sizeof (mca_ptl_elan_t *));
*num_ptls = elan_mp->elan_num_ptls;
@ -249,13 +269,18 @@ mca_ptl_elan_module_control (int param,
size_t size)
{
switch (param) {
#if 0
case MCA_PTL_ENABLE:
if (*(int *) value) {
/* Trying to trigger the thread progress engine,
* Here the elan PTL does not have this capability
* for now. So we skip this function. */
ompi_event_add (&elan_mp->elan_recv_event, 0);
} else {
ompi_event_del (&elan_mp->elan_recv_event);
}
break;
#endif
default:
break;
}

Просмотреть файл

@ -32,6 +32,7 @@ struct mca_ptl_elan_peer_t {
struct mca_ptl_elan_t* peer_ptl;
struct mca_ptl_elan_proc_t* peer_proc;
struct mca_ptl_elan_addr_t* peer_addr; /**< address of peer */
int resending; /* A resending stage, no more new dma's */
int num_resend; /* How many times I have retried */

Просмотреть файл

@ -50,6 +50,15 @@
} \
} while (0)
/**
* Structure used to publish elan information to peers.
*/
struct mca_ptl_elan_addr_t {
int elan_vp; /* Right now only elan_vp is needed */
int addr_inuse;
};
typedef struct mca_ptl_elan_addr_t mca_ptl_elan_addr_t;
struct ompi_ptl_elan_recv_queue_t
{
/* Events needs to be aligned */

Просмотреть файл

@ -8,7 +8,7 @@
#include "ptl_elan.h"
#include "ptl_elan_peer.h"
#include "ptl_elan_proc.h"
#include "ptl_elan_priv.h"
static void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc);
static void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc);
@ -29,6 +29,22 @@ ompi_class_t mca_ptl_elan_proc_t_class = {
void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
{
proc->proc_ompi = NULL;
proc->proc_addrs = NULL;
proc->proc_addr_count = 0;
proc->proc_peers = NULL;
proc->proc_peer_count = 0;
proc->proc_guid.cellid = 0;
proc->proc_guid.jobid = 0;
proc->proc_guid.procid = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
/* add to list of all proc instance */
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
ompi_list_append(&mca_ptl_elan_module.elan_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return;
}
@ -39,10 +55,18 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
{
/* remove from list of all proc instances */
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
ompi_list_remove_item(&mca_ptl_elan_module.elan_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
/* release resources */
if(NULL != proc->proc_peers)
free(proc->proc_peers);
return;
}
/*
* Create a ELAN process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_ptl_elan_proc_t instance.
@ -53,7 +77,48 @@ void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
{
return NULL;
int rc;
size_t size;
mca_ptl_elan_proc_t* ptl_proc;
ptl_proc = mca_ptl_elan_proc_lookup_ompi(ompi_proc);
if(ptl_proc != NULL)
return ptl_proc;
ptl_proc = OBJ_NEW(mca_ptl_elan_proc_t);
ptl_proc->proc_ompi = ompi_proc;
ptl_proc->proc_guid = ompi_proc->proc_name;
rc = mca_base_modex_recv( &mca_ptl_elan_module.super.ptlm_version,
ompi_proc, (void**)&ptl_proc->proc_addrs, &size);
if(rc != OMPI_SUCCESS) {
ompi_output(0, "[%s:%d] mca_base_modex_recv failed to recv data \n",
__FILE__, __LINE__);
OBJ_RELEASE(ptl_proc);
return NULL;
}
if(0 != (size % sizeof(mca_ptl_elan_addr_t))) {
ompi_output(0, "[%s:%d] invalid received data size %d\n", size);
return NULL;
}
ptl_proc->proc_addr_count = size / sizeof(mca_ptl_elan_addr_t);
/* allocate space for peer array - one for each exported address */
ptl_proc->proc_peers = (mca_ptl_elan_peer_t**)
malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_elan_peer_t*));
if(NULL == ptl_proc->proc_peers) {
OBJ_RELEASE(ptl_proc);
return NULL;
}
if(NULL == mca_ptl_elan_module.elan_local
&& ompi_proc == ompi_proc_local()) {
mca_ptl_elan_module.elan_local = ptl_proc;
}
return ptl_proc;
}
/*
@ -63,6 +128,23 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
static mca_ptl_elan_proc_t *
mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc)
{
mca_ptl_elan_proc_t* elan_proc;
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
elan_proc = (mca_ptl_elan_proc_t*)
ompi_list_get_first(&mca_ptl_elan_module.elan_procs);
for( ; elan_proc != (mca_ptl_elan_proc_t*)
ompi_list_get_end(&mca_ptl_elan_module.elan_procs);
elan_proc = (mca_ptl_elan_proc_t*)ompi_list_get_next(elan_proc)) {
if(elan_proc->proc_ompi == ompi_proc) {
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return elan_proc;
}
}
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return NULL;
}
@ -84,6 +166,41 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size)
int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
mca_ptl_elan_peer_t * ptl_peer)
{
int i;
struct mca_ptl_elan_t *ptl_elan;
ptl_elan = ptl_peer->peer_ptl;
ptl_peer->peer_proc = ptl_proc;
ptl_proc->proc_peers[ptl_proc->proc_peer_count++] = ptl_peer;
/* Look through the proc instance for an address that is on the
* directly attached network. If we don't find one, pick the first
* unused address. */
for(i=0; i<ptl_proc->proc_addr_count; i++) {
unsigned vp_local;
unsigned vp_remote;
mca_ptl_elan_addr_t* peer_addr;
peer_addr = ptl_proc->proc_addrs + i;
if(peer_addr->addr_inuse != 0) {
continue;
}
vp_local = ptl_elan->elan_vp;
vp_remote = peer_addr->elan_vp;
if(vp_local = vp_remote) {
ptl_peer->peer_addr = peer_addr;
break;
} else if(ptl_peer->peer_addr != 0) {
ptl_peer->peer_addr = peer_addr;
}
}
ptl_peer->peer_addr->addr_inuse++;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -25,10 +25,14 @@ extern ompi_class_t mca_ptl_elan_proc_t_class;
struct mca_ptl_elan_proc_t {
ompi_list_item_t super; /**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */
struct mca_ptl_base_peer_t **proc_peers; /**< array of peers */
ompi_process_name_t proc_guid; /**< globally unique identifier
for the process */
struct mca_ptl_elan_addr_t *proc_addrs; /**< array of addresses published
by peer */
size_t proc_addr_count;
struct mca_ptl_elan_peer_t **proc_peers; /**< array of peers */
size_t proc_peer_count; /**< number of peers */
void* proc_uuid; /**< univeral unique identifier the process */
size_t univ_size; /**< size of the universe */
ompi_mutex_t proc_lock; /**< lock to for proc state */
};
typedef struct mca_ptl_elan_proc_t mca_ptl_elan_proc_t;