1
1

The IB proc/peer interfaces updated and modified.

This commit was SVN r2047.
Этот коммит содержится в:
Sayantan Sur 2004-08-11 16:45:40 +00:00
родитель 4a164fd164
Коммит 1dfc06736f
6 изменённых файлов: 131 добавлений и 105 удалений

Просмотреть файл

@ -10,4 +10,5 @@ libmca_ptl_ib_la_SOURCES = \
ptl_ib.h \ ptl_ib.h \
ptl_ib_component.c \ ptl_ib_component.c \
ptl_ib_priv.c \ ptl_ib_priv.c \
ptl_ib_proc.c ptl_ib_proc.c \
ptl_ib_peer.c

Просмотреть файл

@ -52,35 +52,26 @@ OBJ_CLASS_INSTANCE(mca_ptl_ib_send_request_t,
mca_pml_base_send_request_t, mca_pml_base_send_request_t,
NULL, NULL); NULL, NULL);
OBJ_CLASS_INSTANCE(mca_ptl_ib_peer_t,
ompi_list_item_t,
NULL, NULL);
OBJ_CLASS_INSTANCE(mca_ptl_ib_proc_t, int mca_ptl_ib_add_procs(struct mca_ptl_base_module_t* base_module,
ompi_list_item_t, size_t nprocs, struct ompi_proc_t **ompi_procs,
NULL, NULL); struct mca_ptl_base_peer_t** peers, ompi_bitmap_t* reachable)
int mca_ptl_ib_add_procs(
struct mca_ptl_base_module_t* ptl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_ptl_base_peer_t** peers,
ompi_bitmap_t* reachable)
{ {
int i; int i, rc;
struct ompi_proc_t* ompi_proc; struct ompi_proc_t* ompi_proc;
mca_ptl_ib_proc_t* ptl_proc;
mca_ptl_base_peer_t* ptl_peer; mca_ptl_ib_proc_t* module_proc;
mca_ptl_base_peer_t* module_peer;
D_PRINT("Adding %d procs\n", nprocs); D_PRINT("Adding %d procs\n", nprocs);
for(i = 0; i < nprocs; i++) { for(i = 0; i < nprocs; i++) {
ompi_proc = ompi_procs[i]; ompi_proc = ompi_procs[i];
ptl_proc = mca_ptl_ib_proc_create(ompi_proc); module_proc = mca_ptl_ib_proc_create(ompi_proc);
if(NULL == ptl_proc) { if(NULL == module_proc) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
@ -90,9 +81,9 @@ int mca_ptl_ib_add_procs(
* don't bind this PTL instance to the proc. * don't bind this PTL instance to the proc.
*/ */
OMPI_THREAD_LOCK(&ptl_proc->proc_lock); OMPI_THREAD_LOCK(&module_proc->proc_lock);
if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) { if(module_proc->proc_addr_count == module_proc->proc_peer_count) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
return OMPI_ERR_UNREACH; return OMPI_ERR_UNREACH;
} }
@ -100,26 +91,25 @@ int mca_ptl_ib_add_procs(
* instances that are trying to reach this destination. * instances that are trying to reach this destination.
* Cache the peer instance on the ptl_proc. * Cache the peer instance on the ptl_proc.
*/ */
ptl_peer = OBJ_NEW(mca_ptl_ib_peer_t); module_peer = OBJ_NEW(mca_ptl_ib_peer_t);
if(NULL == ptl_peer) { if(NULL == module_peer) {
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
ptl_peer->peer_ptl = (mca_ptl_ib_module_t*)ptl; module_peer->peer_module = (mca_ptl_ib_module_t*)base_module;
/* rc = mca_ptl_ib_proc_insert(module_proc, module_peer);
rc = mca_ptl_ib_proc_insert(ptl_proc, ptl_peer);
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(ptl_peer); OBJ_RELEASE(module_peer);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
return rc; return rc;
} }
*/
ompi_bitmap_set_bit(reachable, i); ompi_bitmap_set_bit(reachable, i);
OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); OMPI_THREAD_UNLOCK(&module_proc->proc_lock);
peers[i] = ptl_peer; peers[i] = module_peer;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -130,14 +120,14 @@ int mca_ptl_ib_del_procs(struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t ** peers) struct mca_ptl_base_peer_t ** peers)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int mca_ptl_ib_finalize(struct mca_ptl_base_module_t* ptl) int mca_ptl_ib_finalize(struct mca_ptl_base_module_t* ptl)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -145,7 +135,7 @@ int mca_ptl_ib_request_alloc(struct mca_ptl_base_module_t* ptl,
struct mca_pml_base_send_request_t** request) struct mca_pml_base_send_request_t** request)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -154,7 +144,7 @@ void mca_ptl_ib_request_return(struct mca_ptl_base_module_t* ptl,
struct mca_pml_base_send_request_t* request) struct mca_pml_base_send_request_t* request)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
} }
/* /*
@ -173,7 +163,7 @@ int mca_ptl_ib_send(
int flags) int flags)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -187,6 +177,6 @@ void mca_ptl_ib_matched(
mca_ptl_base_module_t* ptl, mca_ptl_base_module_t* ptl,
mca_ptl_base_recv_frag_t* frag) mca_ptl_base_recv_frag_t* frag)
{ {
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__);
/* Stub */ /* Stub */
D_PRINT("Stub\n");
} }

Просмотреть файл

@ -44,6 +44,7 @@ struct mca_ptl_ib_component_t {
int ib_free_list_inc; /**< number of elements to alloc when growing free lists */ int ib_free_list_inc; /**< number of elements to alloc when growing free lists */
ompi_free_list_t ib_send_requests; /**< free list of ib send requests -- sendreq + IB */ ompi_free_list_t ib_send_requests; /**< free list of ib send requests -- sendreq + IB */
ompi_free_list_t ib_recv_frags; /**< free list of ib recv fragments */ ompi_free_list_t ib_recv_frags; /**< free list of ib recv fragments */
ompi_list_t ib_procs; /**< list of ib proc structures */
ompi_event_t ib_send_event; /**< event structure for sends */ ompi_event_t ib_send_event; /**< event structure for sends */
ompi_event_t ib_recv_event; /**< event structure for recvs */ ompi_event_t ib_recv_event; /**< event structure for recvs */
ompi_mutex_t ib_lock; /**< lock for accessing module state */ ompi_mutex_t ib_lock; /**< lock for accessing module state */

Просмотреть файл

@ -91,6 +91,9 @@ static inline int mca_ptl_ib_param_register_int(
int mca_ptl_ib_component_open(void) int mca_ptl_ib_component_open(void)
{ {
D_PRINT("Opening InfiniBand component ...\n"); D_PRINT("Opening InfiniBand component ...\n");
OBJ_CONSTRUCT(&mca_ptl_ib_component.ib_procs, ompi_list_t);
/* register super component parameters */ /* register super component parameters */
mca_ptl_ib_module.super.ptl_exclusivity = mca_ptl_ib_module.super.ptl_exclusivity =
mca_ptl_ib_param_register_int ("exclusivity", 0); mca_ptl_ib_param_register_int ("exclusivity", 0);
@ -390,7 +393,7 @@ mca_ptl_base_module_t** mca_ptl_ib_component_init(int *num_ptl_modules,
int mca_ptl_ib_component_control(int param, void* value, size_t size) int mca_ptl_ib_component_control(int param, void* value, size_t size)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -402,6 +405,6 @@ int mca_ptl_ib_component_control(int param, void* value, size_t size)
int mca_ptl_ib_component_progress(mca_ptl_tstamp_t tstamp) int mca_ptl_ib_component_progress(mca_ptl_tstamp_t tstamp)
{ {
/* Stub */ /* Stub */
fprintf(stderr,"[%s][%d]\n", __FILE__, __LINE__); D_PRINT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -13,10 +13,11 @@
*/ */
typedef enum { typedef enum {
MCA_PTL_IB_CLOSED, MCA_PTL_IB_NOT_OPENED,
MCA_PTL_IB_CONNECTING, MCA_PTL_IB_CONNECTING,
MCA_PTL_IB_CONNECT_ACK, MCA_PTL_IB_CONNECT_ACK,
MCA_PTL_IB_CONNECTED, MCA_PTL_IB_CONNECTED,
MCA_PTL_IB_CLOSED,
MCA_PTL_IB_FAILED MCA_PTL_IB_FAILED
} mca_ptl_ib_state_t; } mca_ptl_ib_state_t;
@ -28,20 +29,19 @@ typedef enum {
*/ */
struct mca_ptl_base_peer_t { struct mca_ptl_base_peer_t {
ompi_list_item_t super; ompi_list_item_t super;
struct mca_ptl_ib_module_t* peer_ptl; /**< PTL instance that created this connection */ struct mca_ptl_ib_module_t* peer_module; /**< PTL instance that created this connection */
struct mca_ptl_ib_proc_t* peer_proc; /**< proc structure corresponding to peer */ struct mca_ptl_ib_proc_t* peer_proc; /**< proc structure corresponding to peer */
struct mca_ptl_ib_addr_t* peer_addr; /**< address of peer */ struct mca_ptl_ib_ud_addr_t* peer_addr; /**< address of peer */
int peer_sd; /**< socket connection to peer */ mca_ptl_ib_send_frag_t* peer_send_frag; /**< current send frag being processed */
mca_ptl_ib_send_frag_t* peer_send_frag; /**< current send frag being processed */ mca_ptl_ib_recv_frag_t* peer_recv_frag; /**< current recv frag being processed */
mca_ptl_ib_recv_frag_t* peer_recv_frag; /**< current recv frag being processed */ mca_ptl_ib_state_t peer_state; /**< current state of the connection */
mca_ptl_ib_state_t peer_state; /**< current state of the connection */ size_t peer_retries; /**< number of connection retries attempted */
size_t peer_retries; /**< number of connection retries attempted */ ompi_list_t peer_frags; /**< list of pending frags to send */
ompi_list_t peer_frags; /**< list of pending frags to send */ ompi_mutex_t peer_send_lock; /**< lock for concurrent access to peer state */
ompi_mutex_t peer_send_lock; /**< lock for concurrent access to peer state */ ompi_mutex_t peer_recv_lock; /**< lock for concurrent access to peer state */
ompi_mutex_t peer_recv_lock; /**< lock for concurrent access to peer state */ ompi_event_t peer_send_event; /**< event for async processing of send frags */
ompi_event_t peer_send_event; /**< event for async processing of send frags */ ompi_event_t peer_recv_event; /**< event for async processing of recv frags */
ompi_event_t peer_recv_event; /**< event for async processing of recv frags */
}; };
typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t; typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t;
typedef struct mca_ptl_base_peer_t mca_ptl_ib_peer_t; typedef struct mca_ptl_base_peer_t mca_ptl_ib_peer_t;

Просмотреть файл

@ -6,29 +6,72 @@
#include "ptl_ib_vapi.h" #include "ptl_ib_vapi.h"
#include "ptl_ib_proc.h" #include "ptl_ib_proc.h"
static void mca_ptl_ib_proc_construct(mca_ptl_ib_proc_t* proc);
static void mca_ptl_ib_proc_destruct(mca_ptl_ib_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_ptl_ib_proc_t,
ompi_list_item_t, mca_ptl_ib_proc_construct,
mca_ptl_ib_proc_destruct);
void mca_ptl_ib_proc_construct(mca_ptl_ib_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addrs = 0;
proc->proc_addr_count = 0;
proc->proc_peers = 0;
proc->proc_peer_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
/* add to list of all proc instance */
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
ompi_list_append(&mca_ptl_ib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_ptl_ib_proc_destruct(mca_ptl_ib_proc_t* proc)
{
/* remove from list of all proc instances */
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
ompi_list_remove_item(&mca_ptl_ib_component.ib_procs, &proc->super);
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
/* release resources */
if(NULL != proc->proc_peers) {
free(proc->proc_peers);
}
}
/* /*
* Look for an existing IB process instances based on the associated * Look for an existing IB process instances based on the associated
* ompi_proc_t instance. * ompi_proc_t instance.
*/ */
/* static mca_ptl_ib_proc_t* mca_ptl_ib_proc_lookup_ompi(ompi_proc_t* ompi_proc)
mca_ptl_ib_proc_t* mca_ptl_ib_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{ {
mca_ptl_ib_proc_t* ib_proc; mca_ptl_ib_proc_t* ib_proc;
OMPI_THREAD_LOCK(&mca_ptl_ib_module.ib_lock);
OMPI_THREAD_LOCK(&mca_ptl_ib_component.ib_lock);
for(ib_proc = (mca_ptl_ib_proc_t*) for(ib_proc = (mca_ptl_ib_proc_t*)
ompi_list_get_first(&mca_ptl_ib_module.ib_procs); ompi_list_get_first(&mca_ptl_ib_component.ib_procs);
ib_proc != (mca_ptl_ib_proc_t*) ib_proc != (mca_ptl_ib_proc_t*)
ompi_list_get_end(&mca_ptl_ib_module.ib_procs); ompi_list_get_end(&mca_ptl_ib_component.ib_procs);
ib_proc = (mca_ptl_ib_proc_t*)ompi_list_get_next(ib_proc)) { ib_proc = (mca_ptl_ib_proc_t*)ompi_list_get_next(ib_proc)) {
if(ib_proc->proc_ompi == ompi_proc) { if(ib_proc->proc_ompi == ompi_proc) {
OMPI_THREAD_UNLOCK(&mca_ptl_ib_module.ib_lock); OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
return ib_proc; return ib_proc;
} }
} }
OMPI_THREAD_UNLOCK(&mca_ptl_ib_module.ib_lock);
OMPI_THREAD_UNLOCK(&mca_ptl_ib_component.ib_lock);
return NULL; return NULL;
} }
*/
/* /*
* Create a IB process structure. There is a one-to-one correspondence * Create a IB process structure. There is a one-to-one correspondence
@ -43,27 +86,26 @@ mca_ptl_ib_proc_t* mca_ptl_ib_proc_create(ompi_proc_t* ompi_proc)
int rc; int rc;
size_t size; size_t size;
mca_ptl_ib_proc_t* ptl_proc = NULL; mca_ptl_ib_proc_t* module_proc = NULL;
/* /*
mca_ptl_ib_proc_t* ptl_proc = module_proc = mca_ptl_ib_proc_lookup_ompi(ompi_proc);
mca_ptl_ib_proc_lookup_ompi(ompi_proc);
if(ptl_proc != NULL) { if(module_proc != NULL) {
return ptl_proc; return module_proc;
} }
*/ */
ptl_proc = OBJ_NEW(mca_ptl_ib_proc_t); module_proc = OBJ_NEW(mca_ptl_ib_proc_t);
/* Initialize number of peer */ /* Initialize number of peer */
ptl_proc->proc_peer_count = 0; module_proc->proc_peer_count = 0;
ptl_proc->proc_ompi = ompi_proc; module_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary /* build a unique identifier (of arbitrary
* size) to represent the proc */ * size) to represent the proc */
ptl_proc->proc_guid = ompi_proc->proc_name; module_proc->proc_guid = ompi_proc->proc_name;
D_PRINT("Creating proc for %d\n", ompi_proc->proc_name.vpid); D_PRINT("Creating proc for %d\n", ompi_proc->proc_name.vpid);
@ -72,37 +114,41 @@ mca_ptl_ib_proc_t* mca_ptl_ib_proc_create(ompi_proc_t* ompi_proc)
rc = mca_base_modex_recv( rc = mca_base_modex_recv(
&mca_ptl_ib_component.super.ptlm_version, &mca_ptl_ib_component.super.ptlm_version,
ompi_proc, ompi_proc,
(void**)&ptl_proc->proc_addrs, (void**)&module_proc->proc_addrs,
&size); &size);
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
ompi_output(0, "mca_ptl_ib_proc_create: mca_base_modex_recv: " ompi_output(0, "mca_ptl_ib_proc_create: mca_base_modex_recv: "
"failed with return value=%d", rc); "failed with return value=%d", rc);
OBJ_RELEASE(ptl_proc); OBJ_RELEASE(module_proc);
return NULL; return NULL;
} }
D_PRINT("UD q.p. obtained is: %d, Lid : %d\n",
(int)module_proc->proc_addrs[0].ud_qp,
(int)module_proc->proc_addrs[0].lid);
if(0 != (size % sizeof(mca_ptl_ib_ud_addr_t))) { if(0 != (size % sizeof(mca_ptl_ib_ud_addr_t))) {
ompi_output(0, "mca_ptl_ib_proc_create: mca_base_modex_recv: " ompi_output(0, "mca_ptl_ib_proc_create: mca_base_modex_recv: "
"invalid size %d\n", size); "invalid size %d\n", size);
return NULL; return NULL;
} }
ptl_proc->proc_addr_count = size / sizeof(mca_ptl_ib_ud_addr_t); module_proc->proc_addr_count = size / sizeof(mca_ptl_ib_ud_addr_t);
/* allocate space for peer array - one for /* allocate space for peer array - one for
* each exported address * each exported address
*/ */
ptl_proc->proc_peers = (mca_ptl_base_peer_t**) module_proc->proc_peers = (mca_ptl_base_peer_t**)
malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_base_peer_t*)); malloc(module_proc->proc_addr_count * sizeof(mca_ptl_base_peer_t*));
if(NULL == ptl_proc->proc_peers) { if(NULL == module_proc->proc_peers) {
OBJ_RELEASE(ptl_proc); OBJ_RELEASE(module_proc);
return NULL; return NULL;
} }
return ptl_proc; return module_proc;
} }
@ -111,15 +157,14 @@ mca_ptl_ib_proc_t* mca_ptl_ib_proc_create(ompi_proc_t* ompi_proc)
* already held. Insert a ptl instance into the proc array and assign * already held. Insert a ptl instance into the proc array and assign
* it an address. * it an address.
*/ */
int mca_ptl_ib_proc_insert(mca_ptl_ib_proc_t* ptl_proc, int mca_ptl_ib_proc_insert(mca_ptl_ib_proc_t* module_proc,
mca_ptl_base_peer_t* ptl_peer) mca_ptl_base_peer_t* module_peer)
{ {
int i; int i;
struct mca_ptl_ib_module_t *ptl_ib = ptl_peer->peer_ptl;
/* insert into peer array */ /* insert into peer array */
ptl_peer->peer_proc = ptl_proc; module_peer->peer_proc = module_proc;
ptl_proc->proc_peers[ptl_proc->proc_peer_count++] = ptl_peer; module_proc->proc_peers[module_proc->proc_peer_count++] = module_peer;
/* /*
* Look through the proc instance for an address that is on the * Look through the proc instance for an address that is on the
@ -127,26 +172,12 @@ int mca_ptl_ib_proc_insert(mca_ptl_ib_proc_t* ptl_proc,
* unused address. * unused address.
*/ */
for(i=0; i<ptl_proc->proc_addr_count; i++) { for(i = 0; i < module_proc->proc_addr_count; i++) {
/*
mca_ptl_ib_ud_addr_t* peer_addr = ptl_proc->proc_addrs + i;
*/
#if 0 mca_ptl_ib_ud_addr_t* peer_addr = module_proc->proc_addrs + i;
unsigned long net1 = ptl_ib->ptl_ifaddr.sin_addr.s_addr & ptl_ib->ptl_ifmask.sin_addr.s_addr;
unsigned long net2 = peer_addr->addr_inet.s_addr & ptl_ib->ptl_ifmask.sin_addr.s_addr;
if(peer_addr->addr_inuse != 0) module_peer->peer_addr = peer_addr;
continue;
if(net1 == net2) {
ptl_peer->peer_addr = peer_addr;
break;
} else if(ptl_peer->peer_addr != 0)
ptl_peer->peer_addr = peer_addr;
#endif
} }
/*
ptl_peer->peer_addr->addr_inuse++;
*/
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }