1
1

Add more output when calls to the MX library fails.

Move the connection status from theproc into the endpoint.

This commit was SVN r12924.
Этот коммит содержится в:
George Bosilca 2006-12-24 22:34:48 +00:00
родитель 14dc72f595
Коммит e8bd985870
6 изменённых файлов: 42 добавлений и 45 удалений

Просмотреть файл

@ -31,36 +31,6 @@
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/mpool.h"
mca_btl_mx_module_t mca_btl_mx_module = {
{
&mca_btl_mx_component.super,
0, /* max size of first fragment */
0, /* min send fragment size */
0, /* max send fragment size */
0, /* min rdma fragment size */
0, /* max rdma fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT, /* flags */
mca_btl_mx_add_procs,
mca_btl_mx_del_procs,
mca_btl_mx_register,
mca_btl_mx_finalize,
mca_btl_mx_alloc,
mca_btl_mx_free,
mca_btl_mx_prepare_src,
mca_btl_mx_prepare_dst,
mca_btl_mx_send,
NULL, /* put */
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error */
}
};
/**
*
*/
@ -378,8 +348,8 @@ int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
mx_return_t mx_return;
uint64_t total_length;
if( MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->endpoint_proc->status ) {
if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->endpoint_proc->status )
if( MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status ) {
if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->status )
return OMPI_ERROR;
if( OMPI_SUCCESS != mca_btl_mx_proc_connect( (mca_btl_mx_endpoint_t*)endpoint ) )
return OMPI_ERROR;
@ -433,3 +403,32 @@ int mca_btl_mx_finalize( struct mca_btl_base_module_t* btl )
return OMPI_SUCCESS;
}
mca_btl_mx_module_t mca_btl_mx_module = {
{
&mca_btl_mx_component.super,
0, /* max size of first fragment */
0, /* min send fragment size */
0, /* max send fragment size */
0, /* min rdma fragment size */
0, /* max rdma fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_PUT, /* flags */
mca_btl_mx_add_procs,
mca_btl_mx_del_procs,
mca_btl_mx_register,
mca_btl_mx_finalize,
mca_btl_mx_alloc,
mca_btl_mx_free,
mca_btl_mx_prepare_src,
mca_btl_mx_prepare_dst,
mca_btl_mx_send,
NULL, /* put */
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error */
}
};

Просмотреть файл

@ -336,6 +336,8 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
return NULL;
if( (status = mx_get_info( NULL, MX_NIC_IDS, NULL, 0,
nic_addrs, size)) != MX_SUCCESS) {
opal_output(0, "MX BTL error (mx_get_info failed) size = %ld [%s] #cards %d\n",
size, mx_strerror(status), mca_btl_mx_component.mx_num_btls );
free(nic_addrs);
return NULL;
}

Просмотреть файл

@ -35,18 +35,17 @@
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_mx_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
endpoint->endpoint_btl = NULL;
endpoint->endpoint_btl = NULL;
endpoint->endpoint_proc = NULL;
endpoint->status = MCA_BTL_MX_NOT_CONNECTED;
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_mx_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}

Просмотреть файл

@ -32,6 +32,10 @@
extern "C" {
#endif
#define MCA_BTL_MX_NOT_CONNECTED 0x0000
#define MCA_BTL_MX_NOT_REACHEABLE 0x0001
#define MCA_BTL_MX_CONNECTED 0x0002
/**
* Structure used to publish MX information to peers
*/
@ -62,6 +66,8 @@ struct mca_btl_base_endpoint_t {
mx_endpoint_addr_t mx_peer_addr;
/** the remote MX endpoint address */
int status; /**< status of the endpoint */
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;

Просмотреть файл

@ -159,7 +159,6 @@ int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
return OMPI_ERROR;
}
module_proc->status = MCA_BTL_MX_NOT_CONNECTED;
module_proc->mx_peers = mx_peers;
if( NULL == module_proc->proc_endpoints ) {
@ -169,7 +168,6 @@ int mca_btl_mx_proc_insert( mca_btl_mx_proc_t* module_proc,
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
/* insert into endpoint array */
module_endpoint->endpoint_proc = module_proc;
return OMPI_SUCCESS;
@ -206,13 +204,12 @@ int mca_btl_mx_proc_connect( mca_btl_mx_endpoint_t* module_endpoint )
module_endpoint->mx_peer.nic_id = module_proc->mx_peers[i].nic_id;
module_endpoint->mx_peer.endpoint_id = module_proc->mx_peers[i].endpoint_id;
module_endpoint->mx_peer_addr = mx_remote_addr;
module_endpoint->status = MCA_BTL_MX_CONNECTED;
module_proc->proc_addr_index = i;
module_proc->status = MCA_BTL_MX_CONNECTED;
break;
}
if( i == module_proc->mx_peers_count ) { /* no available connection */
module_proc->status = MCA_BTL_MX_NOT_REACHEABLE;
return OMPI_ERROR;
}

Просмотреть файл

@ -29,10 +29,6 @@
extern "C" {
#endif
#define MCA_BTL_MX_NOT_CONNECTED 0x0000
#define MCA_BTL_MX_NOT_REACHEABLE 0x0001
#define MCA_BTL_MX_CONNECTED 0x0002
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
@ -46,8 +42,6 @@ extern "C" {
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
int status; /**< status of the connection */
mca_btl_mx_addr_t *mx_peers; /**< peers addresses */
int mx_peers_count;