1
1

a) Fill in the stub functions for mca or pml calling

This commit was SVN r1543.
Этот коммит содержится в:
Weikuan Yu 2004-07-02 19:34:21 +00:00
родитель df569c6458
Коммит dc943e56d2
9 изменённых файлов: 240 добавлений и 35 удалений

Просмотреть файл

@ -18,6 +18,7 @@
#include "ptl_elan_proc.h" #include "ptl_elan_proc.h"
#include "ptl_elan_req.h" #include "ptl_elan_req.h"
#include "ptl_elan_frag.h" #include "ptl_elan_frag.h"
#include "ptl_elan_priv.h"
/* XXX: There must be multiple PTL's. This could be the template */ /* XXX: There must be multiple PTL's. This could be the template */
@ -33,8 +34,8 @@ mca_ptl_elan_t mca_ptl_elan = {
MCA_PTL_PUT, /* ptl flags */ MCA_PTL_PUT, /* ptl flags */
/* collection of interfaces */ /* collection of interfaces */
mca_ptl_elan_add_proc, mca_ptl_elan_add_procs,
mca_ptl_elan_del_proc, mca_ptl_elan_del_procs,
mca_ptl_elan_finalize, mca_ptl_elan_finalize,
mca_ptl_elan_put, mca_ptl_elan_put,
mca_ptl_elan_get, mca_ptl_elan_get,
@ -44,19 +45,64 @@ mca_ptl_elan_t mca_ptl_elan = {
} }
}; };
int mca_ptl_elan_add_proc (struct mca_ptl_t *ptl, int mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
size_t nprocs, size_t nprocs,
struct ompi_proc_t **ompi_proc, struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t **peer_ret, struct mca_ptl_base_peer_t **peers,
ompi_bitmap_t* reachable) ompi_bitmap_t* reachable)
{ {
struct ompi_proc_t *ompi_proc;
mca_ptl_elan_proc_t *ptl_proc;
mca_ptl_elan_peer_t *ptl_peer;
int rc;
int i;
for(i=0; i<nprocs; i++) {
ompi_proc = procs[i];
ptl_proc = mca_ptl_elan_proc_create(ompi_proc);
OMPI_PTL_ELAN_CHECK_UNEX(ptl_proc, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0);
/* Check to make sure that the peer has at least as many
* interface addresses exported as we are trying to use.
* If not, then don't bind this PTL instance to the proc.
*/
OMPI_THREAD_LOCK(&ptl_proc->proc_lock);
if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
ompi_output(0, "all peers are taken already\n");
return OMPI_ERR_UNREACH;
}
/* The ptl_proc datastructure is shared by all TCP PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the ptl_proc.
*/
ptl_peer = OBJ_NEW(mca_ptl_elan_peer_t);
if(NULL == ptl_peer) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
ptl_peer->peer_ptl = (mca_ptl_elan_t*)ptl;
rc = mca_ptl_elan_proc_insert(ptl_proc, ptl_peer);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ptl_peer);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
return rc;
}
ompi_bitmap_set_bit(reachable, i);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock);
peers[i] = ptl_peer;
}
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int mca_ptl_elan_del_proc (struct mca_ptl_t *ptl, int mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
size_t nprocs, size_t nprocs,
struct ompi_proc_t ** procs, struct ompi_proc_t ** procs,
struct mca_ptl_base_peer_t **ptl_peer) struct mca_ptl_base_peer_t **peers)
{ {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -169,10 +169,10 @@ extern int mca_ptl_elan_finalize (struct mca_ptl_t *ptl);
* @return OMPI_SUCCESS or error status on failure. * @return OMPI_SUCCESS or error status on failure.
*/ */
extern int extern int
mca_ptl_elan_add_proc (struct mca_ptl_t *ptl, mca_ptl_elan_add_procs (struct mca_ptl_t *ptl,
size_t nprocs, size_t nprocs,
struct ompi_proc_t **ompi_proc, struct ompi_proc_t **procs,
struct mca_ptl_base_peer_t **peer_ret, struct mca_ptl_base_peer_t **peers,
ompi_bitmap_t* reachable); ompi_bitmap_t* reachable);
/** /**
@ -184,10 +184,10 @@ mca_ptl_elan_add_proc (struct mca_ptl_t *ptl,
* @return Status indicating if cleanup was successful * @return Status indicating if cleanup was successful
*/ */
extern int extern int
mca_ptl_elan_del_proc (struct mca_ptl_t *ptl, mca_ptl_elan_del_procs (struct mca_ptl_t *ptl,
size_t nprocs, size_t nprocs,
struct ompi_proc_t ** procs, struct ompi_proc_t ** procs,
struct mca_ptl_base_peer_t **ptl_peer); struct mca_ptl_base_peer_t **peers);
/** /**
* PML->PTL Allocate a send request from the PTL modules free list. * PML->PTL Allocate a send request from the PTL modules free list.

Просмотреть файл

@ -91,7 +91,7 @@ mca_ptl_elan_send_frag_progress (mca_ptl_elan_send_frag_t * frag)
static inline void static inline void
mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack, mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack,
struct mca_ptl_t *ptl, struct mca_ptl_t *ptl,
struct mca_ptl_elan_peer_T *ptl_peer, struct mca_ptl_elan_peer_t *ptl_peer,
mca_ptl_elan_recv_frag_t * frag) mca_ptl_elan_recv_frag_t * frag)
{ {
return; return;

Просмотреть файл

@ -82,6 +82,9 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
return OMPI_ERROR; return OMPI_ERROR;
} }
/*
* XXX: Leave the following later after testing of QDMA is done
*/
if (OMPI_SUCCESS != ompi_init_elan_rdma (emp, rail_count)) { if (OMPI_SUCCESS != ompi_init_elan_rdma (emp, rail_count)) {
return OMPI_ERROR; return OMPI_ERROR;
} }

Просмотреть файл

@ -57,6 +57,31 @@ mca_ptl_elan_module_1_0_0_t mca_ptl_elan_module = {
static mca_ptl_elan_module_1_0_0_t *elan_mp = &mca_ptl_elan_module; static mca_ptl_elan_module_1_0_0_t *elan_mp = &mca_ptl_elan_module;
static int mca_ptl_elan_module_initialized = 0; static int mca_ptl_elan_module_initialized = 0;
/*
* some elan vp information to the the global registery
*/
static int mca_ptl_elan_module_register (mca_ptl_elan_module_1_0_0_t *emp)
{
int rc;
size_t i;
size_t size;
mca_ptl_elan_addr_t *addrs = (mca_ptl_elan_addr_t*)malloc(size);
size = emp->elan_num_ptls * sizeof(mca_ptl_elan_addr_t);
for(i=0; i<emp->elan_num_ptls; i++) {
mca_ptl_elan_t * ptl = emp->elan_ptls[i];
addrs[i].elan_vp = ptl->elan_vp;
addrs[i].addr_inuse = 0;
}
rc = mca_base_modex_send(&emp->super.ptlm_version, addrs, size);
free(addrs);
return rc;
}
/* /*
* Called by MCA framework to open the module, registers * Called by MCA framework to open the module, registers
* module parameters. * module parameters.
@ -207,19 +232,17 @@ mca_ptl_elan_module_init (int *num_ptls,
} }
/* /*
* (mca_ptl_elan_module_exchange () != OMPI_SUCCESS) * we need to publish some information for elan.
*
* No need to publish parameters with the MCA framework
*
* This is called only by those processes who have elan.
* So it does not qualify to be a global call.
* Since the processes has elan support can already communicate
* over elan, there is no need for a oob_based exchange.
*/ */
if (OMPI_SUCCESS != mca_ptl_elan_module_register(&mca_ptl_elan_module)) {
ompi_output(0,
"[%s:%d] error in malloc for elan PTL references\n",
__FILE__, __LINE__);
return OMPI_ERROR;
}
ptls = (mca_ptl_t **) malloc (elan_mp->elan_num_ptls * ptls = (mca_ptl_t **) malloc (elan_mp->elan_num_ptls *
sizeof (mca_ptl_elan_t *)); sizeof (mca_ptl_elan_t *));
if (NULL == ptls) { if (NULL == ptls) {
ompi_output(0, ompi_output(0,
"[%s:%d] error in malloc for elan PTL references\n", "[%s:%d] error in malloc for elan PTL references\n",
@ -227,10 +250,7 @@ mca_ptl_elan_module_init (int *num_ptls,
return NULL; return NULL;
} }
/* FIXME: /* Will coherency on two replicas be a potential problem? */
* Why use memcopy to create two instances of the same
* structures, do they need to be defined them as constants,
* will coherency on two replicas be a potential problem? */
memcpy (ptls, elan_mp->elan_ptls, memcpy (ptls, elan_mp->elan_ptls,
elan_mp->elan_num_ptls * sizeof (mca_ptl_elan_t *)); elan_mp->elan_num_ptls * sizeof (mca_ptl_elan_t *));
*num_ptls = elan_mp->elan_num_ptls; *num_ptls = elan_mp->elan_num_ptls;
@ -249,13 +269,18 @@ mca_ptl_elan_module_control (int param,
size_t size) size_t size)
{ {
switch (param) { switch (param) {
#if 0
case MCA_PTL_ENABLE: case MCA_PTL_ENABLE:
if (*(int *) value) { if (*(int *) value) {
/* Trying to trigger the thread progress engine,
* Here the elan PTL does not have this capability
* for now. So we skip this function. */
ompi_event_add (&elan_mp->elan_recv_event, 0); ompi_event_add (&elan_mp->elan_recv_event, 0);
} else { } else {
ompi_event_del (&elan_mp->elan_recv_event); ompi_event_del (&elan_mp->elan_recv_event);
} }
break; break;
#endif
default: default:
break; break;
} }

Просмотреть файл

@ -32,6 +32,7 @@ struct mca_ptl_elan_peer_t {
struct mca_ptl_elan_t* peer_ptl; struct mca_ptl_elan_t* peer_ptl;
struct mca_ptl_elan_proc_t* peer_proc; struct mca_ptl_elan_proc_t* peer_proc;
struct mca_ptl_elan_addr_t* peer_addr; /**< address of peer */
int resending; /* A resending stage, no more new dma's */ int resending; /* A resending stage, no more new dma's */
int num_resend; /* How many times I have retried */ int num_resend; /* How many times I have retried */

Просмотреть файл

@ -50,6 +50,15 @@
} \ } \
} while (0) } while (0)
/**
* Structure used to publish elan information to peers.
*/
struct mca_ptl_elan_addr_t {
int elan_vp; /* Right now only elan_vp is needed */
int addr_inuse;
};
typedef struct mca_ptl_elan_addr_t mca_ptl_elan_addr_t;
struct ompi_ptl_elan_recv_queue_t struct ompi_ptl_elan_recv_queue_t
{ {
/* Events needs to be aligned */ /* Events needs to be aligned */

Просмотреть файл

@ -8,7 +8,7 @@
#include "ptl_elan.h" #include "ptl_elan.h"
#include "ptl_elan_peer.h" #include "ptl_elan_peer.h"
#include "ptl_elan_proc.h" #include "ptl_elan_proc.h"
#include "ptl_elan_priv.h"
static void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc); static void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc);
static void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc); static void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc);
@ -29,6 +29,22 @@ ompi_class_t mca_ptl_elan_proc_t_class = {
void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc) void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
{ {
proc->proc_ompi = NULL;
proc->proc_addrs = NULL;
proc->proc_addr_count = 0;
proc->proc_peers = NULL;
proc->proc_peer_count = 0;
proc->proc_guid.cellid = 0;
proc->proc_guid.jobid = 0;
proc->proc_guid.procid = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
/* add to list of all proc instance */
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
ompi_list_append(&mca_ptl_elan_module.elan_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return; return;
} }
@ -39,10 +55,18 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc)
void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc) void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
{ {
/* remove from list of all proc instances */
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
ompi_list_remove_item(&mca_ptl_elan_module.elan_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
/* release resources */
if(NULL != proc->proc_peers)
free(proc->proc_peers);
return; return;
} }
/* /*
* Create a ELAN process structure. There is a one-to-one correspondence * Create a ELAN process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_ptl_elan_proc_t instance. * between a ompi_proc_t and a mca_ptl_elan_proc_t instance.
@ -53,7 +77,48 @@ void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc)
mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc) mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
{ {
return NULL; int rc;
size_t size;
mca_ptl_elan_proc_t* ptl_proc;
ptl_proc = mca_ptl_elan_proc_lookup_ompi(ompi_proc);
if(ptl_proc != NULL)
return ptl_proc;
ptl_proc = OBJ_NEW(mca_ptl_elan_proc_t);
ptl_proc->proc_ompi = ompi_proc;
ptl_proc->proc_guid = ompi_proc->proc_name;
rc = mca_base_modex_recv( &mca_ptl_elan_module.super.ptlm_version,
ompi_proc, (void**)&ptl_proc->proc_addrs, &size);
if(rc != OMPI_SUCCESS) {
ompi_output(0, "[%s:%d] mca_base_modex_recv failed to recv data \n",
__FILE__, __LINE__);
OBJ_RELEASE(ptl_proc);
return NULL;
}
if(0 != (size % sizeof(mca_ptl_elan_addr_t))) {
ompi_output(0, "[%s:%d] invalid received data size %d\n", size);
return NULL;
}
ptl_proc->proc_addr_count = size / sizeof(mca_ptl_elan_addr_t);
/* allocate space for peer array - one for each exported address */
ptl_proc->proc_peers = (mca_ptl_elan_peer_t**)
malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_elan_peer_t*));
if(NULL == ptl_proc->proc_peers) {
OBJ_RELEASE(ptl_proc);
return NULL;
}
if(NULL == mca_ptl_elan_module.elan_local
&& ompi_proc == ompi_proc_local()) {
mca_ptl_elan_module.elan_local = ptl_proc;
}
return ptl_proc;
} }
/* /*
@ -63,6 +128,23 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc)
static mca_ptl_elan_proc_t * static mca_ptl_elan_proc_t *
mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc) mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc)
{ {
mca_ptl_elan_proc_t* elan_proc;
OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock);
elan_proc = (mca_ptl_elan_proc_t*)
ompi_list_get_first(&mca_ptl_elan_module.elan_procs);
for( ; elan_proc != (mca_ptl_elan_proc_t*)
ompi_list_get_end(&mca_ptl_elan_module.elan_procs);
elan_proc = (mca_ptl_elan_proc_t*)ompi_list_get_next(elan_proc)) {
if(elan_proc->proc_ompi == ompi_proc) {
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return elan_proc;
}
}
OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock);
return NULL; return NULL;
} }
@ -84,6 +166,41 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size)
int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc, int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc,
mca_ptl_elan_peer_t * ptl_peer) mca_ptl_elan_peer_t * ptl_peer)
{ {
int i;
struct mca_ptl_elan_t *ptl_elan;
ptl_elan = ptl_peer->peer_ptl;
ptl_peer->peer_proc = ptl_proc;
ptl_proc->proc_peers[ptl_proc->proc_peer_count++] = ptl_peer;
/* Look through the proc instance for an address that is on the
* directly attached network. If we don't find one, pick the first
* unused address. */
for(i=0; i<ptl_proc->proc_addr_count; i++) {
unsigned vp_local;
unsigned vp_remote;
mca_ptl_elan_addr_t* peer_addr;
peer_addr = ptl_proc->proc_addrs + i;
if(peer_addr->addr_inuse != 0) {
continue;
}
vp_local = ptl_elan->elan_vp;
vp_remote = peer_addr->elan_vp;
if(vp_local = vp_remote) {
ptl_peer->peer_addr = peer_addr;
break;
} else if(ptl_peer->peer_addr != 0) {
ptl_peer->peer_addr = peer_addr;
}
}
ptl_peer->peer_addr->addr_inuse++;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -25,10 +25,14 @@ extern ompi_class_t mca_ptl_elan_proc_t_class;
struct mca_ptl_elan_proc_t { struct mca_ptl_elan_proc_t {
ompi_list_item_t super; /**< allow proc to be placed on a list */ ompi_list_item_t super; /**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */ ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */
struct mca_ptl_base_peer_t **proc_peers; /**< array of peers */
ompi_process_name_t proc_guid; /**< globally unique identifier
for the process */
struct mca_ptl_elan_addr_t *proc_addrs; /**< array of addresses published
by peer */
size_t proc_addr_count;
struct mca_ptl_elan_peer_t **proc_peers; /**< array of peers */
size_t proc_peer_count; /**< number of peers */ size_t proc_peer_count; /**< number of peers */
void* proc_uuid; /**< univeral unique identifier the process */
size_t univ_size; /**< size of the universe */
ompi_mutex_t proc_lock; /**< lock to for proc state */ ompi_mutex_t proc_lock; /**< lock to for proc state */
}; };
typedef struct mca_ptl_elan_proc_t mca_ptl_elan_proc_t; typedef struct mca_ptl_elan_proc_t mca_ptl_elan_proc_t;