Checkpoint for switch to mpool work:
- Remove printing of CFLAGS in configure.m4 - Set MCA_BTL_FLAGS_SEND flag - Improved error handling during module initialization - Extract the address of each interface with dat_ia_query - Start playing around with fragment stuff - probably wrong - Misc code cleanup (removal of GM-specific code) This commit was SVN r8801.
Этот коммит содержится в:
родитель
51ec050647
Коммит
ec995160e6
@ -44,7 +44,7 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
|
||||
0, /* exclusivity */
|
||||
0, /* latency */
|
||||
0, /* bandwidth */
|
||||
0, /* flags */
|
||||
MCA_BTL_FLAGS_SEND,
|
||||
mca_btl_udapl_add_procs,
|
||||
mca_btl_udapl_del_procs,
|
||||
mca_btl_udapl_register,
|
||||
@ -93,9 +93,11 @@ mca_btl_udapl_error(DAT_RETURN ret, char* str)
|
||||
int
|
||||
mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t * btl)
|
||||
{
|
||||
DAT_IA_ATTR attr;
|
||||
DAT_RETURN rc;
|
||||
|
||||
/* open the uDAPL interface */
|
||||
btl->udapl_evd_dflt = DAT_HANDLE_NULL;
|
||||
rc = dat_ia_open(ia_name, mca_btl_udapl_component.udapl_evd_qlen,
|
||||
&btl->udapl_evd_dflt, &btl->udapl_ia);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
@ -103,12 +105,25 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t * btl)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* query to get address information */
|
||||
/* TODO - we only get the address, but there's other useful stuff here */
|
||||
rc = dat_ia_query(btl->udapl_ia, &btl->udapl_evd_dflt,
|
||||
DAT_IA_FIELD_IA_ADDRESS_PTR, &attr, DAT_IA_FIELD_NONE, NULL);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
mca_btl_udapl_error(rc, "dat_ia_query");
|
||||
dat_ia_close(btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
memcpy(&btl->udapl_addr.addr, attr.ia_address_ptr, sizeof(DAT_SOCK_ADDR));
|
||||
|
||||
/* set up evd's */
|
||||
rc = dat_evd_create(btl->udapl_ia,
|
||||
mca_btl_udapl_component.udapl_evd_qlen, DAT_HANDLE_NULL,
|
||||
DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG, &btl->udapl_evd_dto);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
mca_btl_udapl_error(rc, "dat_evd_create (dto)");
|
||||
dat_ia_close(btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -117,9 +132,13 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t * btl)
|
||||
DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG, &btl->udapl_evd_conn);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
mca_btl_udapl_error(rc, "dat_evd_create (conn)");
|
||||
dat_evd_free(btl->udapl_evd_dto);
|
||||
dat_ia_close(btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* TODO - post some receives - involves setting up ep's, psp's, and LMR's */
|
||||
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&btl->udapl_frag_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&btl->udapl_frag_max, ompi_free_list_t);
|
||||
|
@ -155,7 +155,18 @@ int mca_btl_udapl_component_open(void)
|
||||
mca_btl_udapl_module.super.btl_bandwidth =
|
||||
mca_btl_udapl_param_register_int("bandwidth", 225);
|
||||
|
||||
/* compute the eager frag size */
|
||||
/* compute the eager and max frag sizes */
|
||||
mca_btl_udapl_component.udapl_eager_frag_size =
|
||||
mca_btl_udapl_module.super.btl_eager_limit;
|
||||
/*mca_btl_udapl_component.udapl_eager_limit =
|
||||
mca_btl_udapl_module.super.btl_eager_limit -
|
||||
sizeof(mca_btl_base_header_t);*/
|
||||
|
||||
mca_btl_udapl_component.udapl_max_frag_size =
|
||||
mca_btl_udapl_module.super.btl_max_send_size;
|
||||
mca_btl_udapl_module.super.btl_max_send_size =
|
||||
mca_btl_udapl_module.super.btl_max_send_size -
|
||||
sizeof(mca_btl_base_header_t);
|
||||
#if 0
|
||||
mca_btl_udapl_component.udapl_eager_frag_size =
|
||||
udapl_min_size_for_length(mca_btl_udapl_module.super.btl_eager_limit) - 1;
|
||||
@ -208,9 +219,14 @@ mca_btl_udapl_modex_send(void)
|
||||
size_t size;
|
||||
mca_btl_udapl_addr_t *addrs = NULL;
|
||||
|
||||
opal_output(0, "udapl_modex_send\n");
|
||||
size = sizeof(mca_btl_udapl_addr_t) *
|
||||
mca_btl_udapl_component.udapl_num_btls;
|
||||
|
||||
if(mca_btl_udapl_component.udapl_debug) {
|
||||
opal_output(0, "udapl_modex_send %d addrs %d bytes\n",
|
||||
mca_btl_udapl_component.udapl_num_btls, size);
|
||||
}
|
||||
|
||||
size = mca_btl_udapl_component.udapl_num_btls * sizeof (mca_btl_udapl_addr_t);
|
||||
if (0 != size) {
|
||||
addrs = (mca_btl_udapl_addr_t *)malloc (size);
|
||||
if (NULL == addrs) {
|
||||
@ -243,38 +259,34 @@ mca_btl_udapl_component_init (int *num_btl_modules,
|
||||
DAT_PROVIDER_INFO* datinfo;
|
||||
mca_btl_base_module_t **btls;
|
||||
mca_btl_udapl_module_t *btl;
|
||||
size_t i;
|
||||
DAT_COUNT num_ias;
|
||||
int32_t i;
|
||||
|
||||
opal_output(0, "udapl_component_init\n");
|
||||
|
||||
/* enumerate uDAPL interfaces */
|
||||
datinfo = malloc(mca_btl_udapl_component.udapl_max_btls * sizeof(DAT_PROVIDER_INFO));
|
||||
datinfo = malloc(sizeof(DAT_PROVIDER_INFO) *
|
||||
mca_btl_udapl_component.udapl_max_btls);
|
||||
if(NULL == datinfo) {
|
||||
return NULL;
|
||||
}
|
||||
if(DAT_SUCCESS != dat_registry_list_providers(mca_btl_udapl_component.udapl_max_btls,
|
||||
(DAT_COUNT*)&mca_btl_udapl_component.udapl_num_btls, &datinfo)) {
|
||||
if(DAT_SUCCESS != dat_registry_list_providers(
|
||||
mca_btl_udapl_component.udapl_max_btls,
|
||||
(DAT_COUNT*)&num_ias, &datinfo)) {
|
||||
free(datinfo);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Make sure we have some interfaces */
|
||||
if(0 == mca_btl_udapl_component.udapl_num_btls) {
|
||||
mca_btl_base_error_no_nics("uDAPL", "NIC");
|
||||
free(datinfo);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create a BTL module for each interface */
|
||||
mca_btl_udapl_component.udapl_btls =
|
||||
malloc(mca_btl_udapl_component.udapl_num_btls *
|
||||
sizeof(mca_btl_udapl_module_t *));
|
||||
/* allocate space for the each possible BTL */
|
||||
mca_btl_udapl_component.udapl_btls = (mca_btl_udapl_module_t *)
|
||||
malloc(num_ias * sizeof(mca_btl_udapl_module_t *));
|
||||
if(NULL == mca_btl_udapl_component.udapl_btls) {
|
||||
free(datinfo);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for(i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) {
|
||||
/* create a BTL module for each interface */
|
||||
for(mca_btl_udapl_component.udapl_num_btls = i = 0; i < num_ias; i++) {
|
||||
opal_output(0, "udapl creating btl for %s\n", datinfo[i].ia_name);
|
||||
|
||||
btl = malloc(sizeof(mca_btl_udapl_module_t));
|
||||
@ -287,34 +299,50 @@ mca_btl_udapl_component_init (int *num_btl_modules,
|
||||
/* copy default values into the new BTL */
|
||||
memcpy(btl, &mca_btl_udapl_module, sizeof(mca_btl_udapl_module_t));
|
||||
|
||||
/* initialize this BTL */
|
||||
/* TODO - make use of the thread-safety info in datinfo also */
|
||||
if(OMPI_SUCCESS != mca_btl_udapl_init(datinfo[i].ia_name, btl)) {
|
||||
opal_output(0, "udapl module init for %s failed\n",
|
||||
datinfo[i].ia_name);
|
||||
/*TODO - how do i correctly handle an error here? */
|
||||
free(btl);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* successful btl creation */
|
||||
mca_btl_udapl_component.udapl_btls[i] = btl;
|
||||
if(++mca_btl_udapl_component.udapl_num_btls >=
|
||||
mca_btl_udapl_component.udapl_max_btls) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* finished with datinfo */
|
||||
free(datinfo);
|
||||
|
||||
/* Make sure we have some interfaces */
|
||||
if(0 == mca_btl_udapl_component.udapl_num_btls) {
|
||||
mca_btl_base_error_no_nics("uDAPL", "NIC");
|
||||
free(mca_btl_udapl_component.udapl_btls);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* publish uDAPL parameters with the MCA framework */
|
||||
if (OMPI_SUCCESS != mca_btl_udapl_modex_send()) {
|
||||
free(mca_btl_udapl_component.udapl_btls);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* return array of BTLs */
|
||||
btls = (mca_btl_base_module_t**) malloc (
|
||||
mca_btl_udapl_component.udapl_num_btls * sizeof(mca_btl_base_module_t *));
|
||||
btls = (mca_btl_base_module_t**) malloc(sizeof(mca_btl_base_module_t *) *
|
||||
mca_btl_udapl_component.udapl_num_btls);
|
||||
if (NULL == btls) {
|
||||
free(mca_btl_udapl_component.udapl_btls);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memcpy(btls, mca_btl_udapl_component.udapl_btls,
|
||||
mca_btl_udapl_component.udapl_num_btls * sizeof(mca_btl_udapl_module_t *));
|
||||
mca_btl_udapl_component.udapl_num_btls *
|
||||
sizeof(mca_btl_udapl_module_t *));
|
||||
*num_btl_modules = mca_btl_udapl_component.udapl_num_btls;
|
||||
return btls;
|
||||
}
|
||||
@ -331,17 +359,23 @@ int mca_btl_udapl_component_progress()
|
||||
int count = 0;
|
||||
size_t i;
|
||||
|
||||
opal_output(0, "udapl_component_progress\n");
|
||||
|
||||
/* could get into deadlock in this case as we post recvs after callback completes */
|
||||
/* prevent deadlock - only one thread should be 'progressing' at a time */
|
||||
if(OPAL_THREAD_ADD32(&inprogress, 1) > 1) {
|
||||
OPAL_THREAD_ADD32(&inprogress, -1);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
opal_output(0, "udapl_component_progress\n");
|
||||
|
||||
/* check for work to do on each uDAPL btl */
|
||||
for( i = 0; i < mca_btl_udapl_component.udapl_num_btls; ) {
|
||||
mca_btl_udapl_module_t *btl = mca_btl_udapl_component.udapl_btls[i];
|
||||
|
||||
/* TODO - check the DTO EVD for events */
|
||||
i++;
|
||||
}
|
||||
|
||||
/* unlock and return */
|
||||
OPAL_THREAD_ADD32(&inprogress, -1);
|
||||
return count;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ extern "C" {
|
||||
* Structure used to publish uDAPL id information to peers.
|
||||
*/
|
||||
struct mca_btl_udapl_addr_t {
|
||||
int foo; /* placeholder to prevent division by 0 */
|
||||
DAT_SOCK_ADDR addr;
|
||||
};
|
||||
typedef struct mca_btl_udapl_addr_t mca_btl_udapl_addr_t;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -38,17 +38,18 @@ typedef enum {
|
||||
|
||||
|
||||
/**
|
||||
* UDAPL send fragment derived type.
|
||||
* uDAPL send fragment derived type.
|
||||
*/
|
||||
struct mca_btl_udapl_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment;
|
||||
mca_btl_base_segment_t segment;
|
||||
|
||||
struct mca_btl_udapl_module_t* btl;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
struct mca_mpool_base_registration_t* registration;
|
||||
|
||||
mca_btl_base_header_t *hdr;
|
||||
size_t size;
|
||||
/*enum gm_priority priority;*/
|
||||
mca_btl_udapl_frag_type_t type;
|
||||
};
|
||||
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_t;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -45,7 +45,7 @@ void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* proc)
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup ib proc instance
|
||||
* Cleanup uDAPL proc instance
|
||||
*/
|
||||
|
||||
void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* proc)
|
||||
@ -130,6 +130,11 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(mca_btl_udapl_component.udapl_debug) {
|
||||
opal_output(0, "udapl_proc_create got %d addrs\n",
|
||||
size / sizeof(mca_btl_udapl_addr_t));
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]",
|
||||
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
|
||||
@ -161,39 +166,16 @@ int mca_btl_udapl_proc_insert(
|
||||
mca_btl_udapl_proc_t* udapl_proc,
|
||||
mca_btl_base_endpoint_t* udapl_endpoint)
|
||||
{
|
||||
/*mca_btl_udapl_module_t* udapl_btl = udapl_endpoint->endpoint_btl;*/
|
||||
|
||||
/* insert into endpoint array */
|
||||
if(udapl_proc->proc_addr_count <= udapl_proc->proc_endpoint_count)
|
||||
if(udapl_proc->proc_endpoint_count > udapl_proc->proc_addr_count)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
opal_output(0, "udapl_proc_insert\n");
|
||||
|
||||
udapl_endpoint->endpoint_proc = udapl_proc;
|
||||
udapl_endpoint->endpoint_addr = udapl_proc->proc_addrs[udapl_proc->proc_endpoint_count];
|
||||
#if 0
|
||||
#if GM_API_VERSION > 0x200
|
||||
if (GM_SUCCESS != udapl_global_id_to_node_id(
|
||||
udapl_btl->port,
|
||||
udapl_endpoint->endpoint_addr.global_id,
|
||||
&udapl_endpoint->endpoint_addr.node_id)) {
|
||||
opal_output( 0, "[%s:%d] error in converting global to local id \n",
|
||||
__FILE__, __LINE__ );
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if(mca_btl_udapl_component.udapl_debug > 0) {
|
||||
opal_output(0, "[%d,%d,%d] mapped global id %lu to node id %lu\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
udapl_endpoint->endpoint_addr.global_id,
|
||||
udapl_endpoint->endpoint_addr.node_id);
|
||||
}
|
||||
#else
|
||||
udapl_endpoint->udapl_addr.node_id = udapl_host_name_to_node_id( udapl_btl->udapl_port,
|
||||
udapl_endpoint->udapl_addr.global_id);
|
||||
if( GM_NO_SUCH_NODE_ID == udapl_endpoint->udapl_addr.node_id ) {
|
||||
ompi_output( 0, "[%s:%d] unable to convert the remote host name (%s) to a host id",
|
||||
__FILE__, __LINE__, udapl_endpoint->udapl_addr.global_id);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif /* GM_API_VERSION > 0x200 */
|
||||
#endif
|
||||
udapl_endpoint->endpoint_addr =
|
||||
udapl_proc->proc_addrs[udapl_proc->proc_endpoint_count];
|
||||
|
||||
udapl_proc->proc_endpoints[udapl_proc->proc_endpoint_count] = udapl_endpoint;
|
||||
udapl_proc->proc_endpoint_count++;
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -37,7 +37,7 @@ AC_DEFUN([MCA_btl_udapl_CONFIG],[
|
||||
btl_udapl_CFLAGS="`echo $CFLAGS | sed 's/-pedantic//g'`"
|
||||
AS_IF([test "$btl_udapl_CFLAGS" != "$CFLAGS" -a "$btl_udapl_happy" = "yes"],
|
||||
[AC_MSG_WARN([Removed -pedantic from CFLAGS for
|
||||
uDAPL component because the uDAPL headers are not fully ISO C: $btl_udapl_CFLAGS])])
|
||||
uDAPL component because the uDAPL headers are not fully ISO C])])
|
||||
|
||||
# substitute in the things needed to build udapl
|
||||
AC_SUBST([btl_udapl_CFLAGS])
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user