MX is up running again. Several modifications:
- we modex send and receive a structure containing the nid id and the endpoint id. On the remote node we can recompose the endpoint_addr via mx_connect. - accept several retry to mx_connect (up to 5 seconds ... soon to be a MCA param). - correctly construct/destruct the internal objects. - some others minor changes. This commit was SVN r6535.
Этот коммит содержится в:
родитель
657d10187e
Коммит
a06f555707
@ -287,14 +287,13 @@ int mca_ptl_mx_send(
|
||||
sendfrag->frag_send.frag_base.frag_size);
|
||||
|
||||
/* start the fragment */
|
||||
mx_return = mx_isend(
|
||||
mx_ptl->mx_endpoint,
|
||||
segments,
|
||||
sendfrag->frag_segment_count,
|
||||
ptl_peer->peer_addr,
|
||||
match.lval,
|
||||
sendfrag,
|
||||
&sendfrag->frag_request);
|
||||
mx_return = mx_isend( mx_ptl->mx_endpoint,
|
||||
segments,
|
||||
sendfrag->frag_segment_count,
|
||||
ptl_peer->peer_addr,
|
||||
match.lval,
|
||||
sendfrag,
|
||||
&sendfrag->frag_request );
|
||||
if(mx_return != MX_SUCCESS) {
|
||||
opal_output(0, "mca_ptl_mx_send: mx_isend() failed with return value=%d\n", mx_return);
|
||||
return OMPI_ERROR;
|
||||
|
@ -42,7 +42,7 @@ struct mca_ptl_mx_component_t {
|
||||
uint32_t mx_filter; /**< filter assigned to application */
|
||||
uint32_t mx_num_ptls; /**< number of MX NICs available to app */
|
||||
uint32_t mx_max_ptls; /**< max number of MX NICs to use */
|
||||
struct mca_ptl_mx_module_t** mx_ptls; /**< array of available PTL moduless */
|
||||
struct mca_ptl_mx_module_t** mx_ptls; /**< array of available PTL modules */
|
||||
ompi_free_list_t mx_send_frags; /**< free list of mx send fragments */
|
||||
ompi_free_list_t mx_recv_frags; /**< free list of mx recv fragments */
|
||||
opal_hash_table_t mx_procs; /**< hash table of procs */
|
||||
@ -133,9 +133,6 @@ extern int mca_ptl_mx_component_progress(
|
||||
struct mca_ptl_mx_module_t {
|
||||
mca_ptl_base_module_t super; /**< base PTL module interface */
|
||||
opal_list_t mx_peers; /**< list of peers */
|
||||
uint64_t mx_nic_addr; /**< NIC MAC address */
|
||||
uint32_t mx_filter; /**< endpoint filter */
|
||||
uint32_t mx_endpoint_id; /**< endpoint ID */
|
||||
bool mx_enabled; /**< flag to indicate if endpoint enabled */
|
||||
mx_endpoint_t mx_endpoint; /**< endpoint */
|
||||
mx_endpoint_addr_t mx_endpoint_addr; /**< endpoint address */
|
||||
|
@ -150,11 +150,11 @@ int mca_ptl_mx_component_close(void)
|
||||
#endif
|
||||
|
||||
/* release resources */
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_lock);
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_send_frags);
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_recv_frags);
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_procs);
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_pending_acks);
|
||||
OBJ_DESTRUCT(&mca_ptl_mx_component.mx_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -171,44 +171,42 @@ mca_ptl_base_module_t** mca_ptl_mx_component_init(
|
||||
*num_ptls = 0;
|
||||
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_send_frags, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_recv_frags, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_procs, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_pending_acks, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_lock, opal_mutex_t);
|
||||
|
||||
ompi_free_list_init(&mca_ptl_mx_component.mx_send_frags,
|
||||
sizeof(mca_ptl_mx_send_frag_t),
|
||||
OBJ_CLASS(mca_ptl_mx_send_frag_t),
|
||||
mca_ptl_mx_component.mx_free_list_num,
|
||||
mca_ptl_mx_component.mx_free_list_max,
|
||||
mca_ptl_mx_component.mx_free_list_inc,
|
||||
NULL); /* use default allocator */
|
||||
ompi_free_list_init( &mca_ptl_mx_component.mx_send_frags,
|
||||
sizeof(mca_ptl_mx_send_frag_t),
|
||||
OBJ_CLASS(mca_ptl_mx_send_frag_t),
|
||||
mca_ptl_mx_component.mx_free_list_num,
|
||||
mca_ptl_mx_component.mx_free_list_max,
|
||||
mca_ptl_mx_component.mx_free_list_inc,
|
||||
NULL ); /* use default allocator */
|
||||
|
||||
ompi_free_list_init(&mca_ptl_mx_component.mx_recv_frags,
|
||||
sizeof(mca_ptl_mx_recv_frag_t),
|
||||
OBJ_CLASS(mca_ptl_mx_recv_frag_t),
|
||||
mca_ptl_mx_component.mx_free_list_num,
|
||||
mca_ptl_mx_component.mx_free_list_max,
|
||||
mca_ptl_mx_component.mx_free_list_inc,
|
||||
NULL); /* use default allocator */
|
||||
ompi_free_list_init( &mca_ptl_mx_component.mx_recv_frags,
|
||||
sizeof(mca_ptl_mx_recv_frag_t),
|
||||
OBJ_CLASS(mca_ptl_mx_recv_frag_t),
|
||||
mca_ptl_mx_component.mx_free_list_num,
|
||||
mca_ptl_mx_component.mx_free_list_max,
|
||||
mca_ptl_mx_component.mx_free_list_inc,
|
||||
NULL ); /* use default allocator */
|
||||
|
||||
/* intialize process hash table */
|
||||
opal_hash_table_init(&mca_ptl_mx_component.mx_procs, 256);
|
||||
opal_hash_table_init( &mca_ptl_mx_component.mx_procs, 256 );
|
||||
|
||||
/* initialize mx ptls */
|
||||
if(OMPI_SUCCESS != mca_ptl_mx_module_init())
|
||||
return NULL;
|
||||
|
||||
/* allocate and return a copy of the ptl array */
|
||||
ptls = malloc(mca_ptl_mx_component.mx_num_ptls *
|
||||
sizeof(mca_ptl_base_module_t*));
|
||||
ptls = malloc( mca_ptl_mx_component.mx_num_ptls * sizeof(mca_ptl_base_module_t*) );
|
||||
if(NULL == ptls)
|
||||
return NULL;
|
||||
|
||||
memcpy(ptls,
|
||||
mca_ptl_mx_component.mx_ptls,
|
||||
mca_ptl_mx_component.mx_num_ptls*sizeof(mca_ptl_mx_module_t*));
|
||||
memcpy( ptls, mca_ptl_mx_component.mx_ptls,
|
||||
mca_ptl_mx_component.mx_num_ptls*sizeof(mca_ptl_mx_module_t*) );
|
||||
*num_ptls = mca_ptl_mx_component.mx_num_ptls;
|
||||
return ptls;
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ int mca_ptl_mx_module_init(void)
|
||||
uint32_t i;
|
||||
int rc;
|
||||
uint64_t *nic_addrs;
|
||||
mx_endpoint_addr_t *endpoint_addrs;
|
||||
mca_ptl_mx_endpoint_t *endpoint_addrs;
|
||||
mx_return_t status;
|
||||
|
||||
/* intialize MX library */
|
||||
@ -92,20 +92,19 @@ int mca_ptl_mx_module_init(void)
|
||||
free(nic_addrs);
|
||||
|
||||
/* post local endpoint addresses */
|
||||
size = mca_ptl_mx_component.mx_num_ptls * sizeof(mx_endpoint_addr_t);
|
||||
endpoint_addrs = (mx_endpoint_addr_t*)malloc(size);
|
||||
size = mca_ptl_mx_component.mx_num_ptls * sizeof(mca_ptl_mx_endpoint_t);
|
||||
endpoint_addrs = (mca_ptl_mx_endpoint_t*)malloc(size);
|
||||
if(NULL == endpoint_addrs) {
|
||||
opal_output(0, "mca_ptl_mx_module_init: malloc() failed\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(i=0; i<mca_ptl_mx_component.mx_num_ptls; i++) {
|
||||
for( i = 0; i < mca_ptl_mx_component.mx_num_ptls; i++ ) {
|
||||
mca_ptl_mx_module_t* ptl = mca_ptl_mx_component.mx_ptls[i];
|
||||
endpoint_addrs[i] = ptl->mx_endpoint_addr;
|
||||
mx_decompose_endpoint_addr( ptl->mx_endpoint_addr,
|
||||
&(endpoint_addrs[i].nic_id), &(endpoint_addrs[i].endpoint_id) );
|
||||
}
|
||||
if((rc = mca_base_modex_send( &mca_ptl_mx_component.super.ptlm_version,
|
||||
endpoint_addrs,
|
||||
mca_ptl_mx_component.mx_num_ptls * sizeof(mx_endpoint_addr_t)))
|
||||
!= OMPI_SUCCESS)
|
||||
endpoint_addrs, size )) != OMPI_SUCCESS )
|
||||
return rc;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -294,7 +293,7 @@ static mca_ptl_mx_module_t* mca_ptl_mx_create(uint64_t addr)
|
||||
mca_ptl_mx_finalize(&ptl->super);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* query the endpoint address */
|
||||
if((status = mx_get_endpoint_addr( ptl->mx_endpoint,
|
||||
&ptl->mx_endpoint_addr)) != MX_SUCCESS) {
|
||||
@ -303,23 +302,6 @@ static mca_ptl_mx_module_t* mca_ptl_mx_create(uint64_t addr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* breakup the endpoint address */
|
||||
if((status = mx_decompose_endpoint_addr( ptl->mx_endpoint_addr,
|
||||
&ptl->mx_nic_addr,
|
||||
&ptl->mx_endpoint_id)) != MX_SUCCESS) {
|
||||
opal_output(0, "mca_ptl_mx_init: mx_decompose_endpoint_addr() failed with status=%d\n", status);
|
||||
mca_ptl_mx_finalize(&ptl->super);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(mca_ptl_mx_component.mx_debug) {
|
||||
opal_output(0, "mca_ptl_mx_create: opened %08X:%08X:%08X:%08X\n",
|
||||
(uint32_t)(ptl->mx_nic_addr >> 32),
|
||||
(uint32_t)ptl->mx_nic_addr,
|
||||
ptl->mx_endpoint_id,
|
||||
ptl->mx_filter);
|
||||
}
|
||||
|
||||
/* prepost a receive buffer */
|
||||
ptl->mx_recvs_posted = 1;
|
||||
MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_MATCH, sizeof(mca_ptl_base_match_header_t));
|
||||
@ -376,6 +358,7 @@ int mca_ptl_mx_finalize(struct mca_ptl_base_module_t* ptl)
|
||||
opal_thread_join(&mx_ptl->mx_thread, NULL);
|
||||
#endif
|
||||
mx_close_endpoint(mx_ptl->mx_endpoint);
|
||||
mx_ptl->mx_endpoint = NULL;
|
||||
free(mx_ptl);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -114,11 +114,11 @@ mca_ptl_mx_proc_t* mca_ptl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
OBJ_RELEASE(ptl_proc);
|
||||
return NULL;
|
||||
}
|
||||
if(0 != (size % sizeof(mx_endpoint_addr_t))) {
|
||||
if(0 != (size % sizeof(mca_ptl_mx_endpoint_t))) {
|
||||
opal_output(0, "mca_ptl_mx_proc_create: mca_base_modex_recv: invalid size %d\n", size);
|
||||
return NULL;
|
||||
}
|
||||
ptl_proc->proc_addr_count = size / sizeof(mx_endpoint_addr_t);
|
||||
ptl_proc->proc_addr_count = size / sizeof(mca_ptl_mx_endpoint_t);
|
||||
|
||||
/* allocate space for peer array - one for each exported address */
|
||||
ptl_proc->proc_peers = (mca_ptl_mx_peer_t**)
|
||||
@ -152,12 +152,28 @@ mca_ptl_mx_proc_t* mca_ptl_mx_proc_lookup(const orte_process_name_t *name)
|
||||
*/
|
||||
int mca_ptl_mx_proc_insert(mca_ptl_mx_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer)
|
||||
{
|
||||
mx_return_t mx_status;
|
||||
mca_ptl_mx_endpoint_t* remote = &(ptl_proc->proc_addrs[ptl_proc->proc_peer_count]);
|
||||
int num_retry = 0;
|
||||
/* insert into peer array */
|
||||
ptl_peer->peer_proc = ptl_proc;
|
||||
ptl_peer->peer_addr = ptl_proc->proc_addrs[ptl_proc->proc_peer_count];
|
||||
ptl_proc->proc_peers[ptl_proc->proc_peer_count] = ptl_peer;
|
||||
ptl_proc->proc_peer_count++;
|
||||
|
||||
/* construct the remote endpoint addr */
|
||||
retry_connect:
|
||||
mx_status = mx_connect( ptl_peer->peer_ptl->mx_endpoint, remote->nic_id, remote->endpoint_id,
|
||||
mca_ptl_mx_component.mx_filter, 1, &(ptl_peer->peer_addr) );
|
||||
if( OMPI_SUCCESS != mx_status ) {
|
||||
if( MX_TIMEOUT == mx_status )
|
||||
if( num_retry++ < 5 )
|
||||
goto retry_connect;
|
||||
opal_output( 0, "mx_connect fail for peer %d remote %lx %d filter %x with error %s\n",
|
||||
ptl_proc->proc_peer_count,
|
||||
remote->nic_id, remote->endpoint_id, mca_ptl_mx_component.mx_filter,
|
||||
mx_strerror(mx_status) );
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
ptl_proc->proc_peer_count++;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -30,6 +30,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct mca_ptl_mx_endpoint_t {
|
||||
uint64_t nic_id;
|
||||
uint32_t endpoint_id;
|
||||
} mca_ptl_mx_endpoint_t;
|
||||
|
||||
/**
|
||||
* Represents the state of a remote process and the set of addresses
|
||||
* that it exports. Also cache an instance of mca_ptl_base_peer_t for each
|
||||
@ -39,7 +44,7 @@ struct mca_ptl_mx_proc_t {
|
||||
opal_list_item_t super; /**< allow proc to be placed on a list */
|
||||
ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */
|
||||
orte_process_name_t proc_name; /**< globally unique identifier for the process */
|
||||
mx_endpoint_addr_t *proc_addrs; /**< peer endpoint address */
|
||||
mca_ptl_mx_endpoint_t *proc_addrs; /**< peer endpoint address */
|
||||
size_t proc_addr_count; /**< number of addresses published by peer */
|
||||
mca_ptl_mx_peer_t **proc_peers; /**< array of peers that have been created to access this proc */
|
||||
size_t proc_peer_count; /**< number of peers */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user