clean up and make consistent the reporting out from the udapl btl; report out readeable event string instead of just a number
This commit was SVN r16954.
Этот коммит содержится в:
родитель
f7812baf5b
Коммит
d05d3afaed
@ -100,8 +100,8 @@ static int udapl_reg_mr(void *reg_data, void *base, size_t size,
|
||||
&dat_size, &dat_addr);
|
||||
|
||||
if(rc != DAT_SUCCESS) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"dat_lmr_create DAT_INSUFFICIENT_RESOURCES", true);
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"dat_lmr_create DAT_INSUFFICIENT_RESOURCES", true));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -152,8 +152,8 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
dat_strerror(rc, (const char**)&major,
|
||||
(const char**)&minor);
|
||||
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"dat_ia_open fail", true, ia_name, major, minor);
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"dat_ia_open fail", true, ia_name, major, minor));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -190,21 +190,21 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
|
||||
/* check evd qlen against adapter max */
|
||||
if (btl->udapl_dto_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen adapter max",
|
||||
true,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
btl->udapl_dto_evd_qlen,
|
||||
(btl->udapl_ia_attr).max_evd_qlen);
|
||||
(btl->udapl_ia_attr).max_evd_qlen));
|
||||
btl->udapl_dto_evd_qlen = btl->udapl_ia_attr.max_evd_qlen;
|
||||
}
|
||||
if (btl->udapl_conn_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen adapter max",
|
||||
true,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
btl->udapl_conn_evd_qlen,
|
||||
(btl->udapl_ia_attr).max_evd_qlen);
|
||||
(btl->udapl_ia_attr).max_evd_qlen));
|
||||
btl->udapl_conn_evd_qlen = btl->udapl_ia_attr.max_evd_qlen;
|
||||
}
|
||||
|
||||
@ -257,9 +257,9 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
/* by not erroring out here we can try to continue with
|
||||
* the default endpoint parameter values
|
||||
*/
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"use default endpoint params",
|
||||
true);
|
||||
true));
|
||||
}
|
||||
|
||||
/* Save the port with the address information */
|
||||
@ -292,12 +292,12 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
if (mca_btl_udapl_component.udapl_buffer_alignment !=
|
||||
DAT_OPTIMAL_ALIGNMENT) {
|
||||
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"optimal buffer alignment mismatch",
|
||||
true,
|
||||
DAT_OPTIMAL_ALIGNMENT,
|
||||
mca_btl_udapl_component.udapl_buffer_alignment,
|
||||
DAT_OPTIMAL_ALIGNMENT);
|
||||
DAT_OPTIMAL_ALIGNMENT));
|
||||
}
|
||||
|
||||
/* initialize free lists */
|
||||
@ -470,13 +470,14 @@ static int mca_btl_udapl_set_peer_parameters(
|
||||
udapl_btl->udapl_dto_evd_qlen) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen too low",
|
||||
true,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
udapl_btl->udapl_dto_evd_qlen,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
potential_dto_evd_qlen);
|
||||
potential_dto_evd_qlen));
|
||||
} else {
|
||||
udapl_btl->udapl_dto_evd_qlen = potential_dto_evd_qlen;
|
||||
}
|
||||
@ -540,13 +541,14 @@ static int mca_btl_udapl_set_peer_parameters(
|
||||
udapl_btl->udapl_conn_evd_qlen) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen too low",
|
||||
true,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
udapl_btl->udapl_conn_evd_qlen,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
potential_conn_evd_qlen);
|
||||
potential_conn_evd_qlen));
|
||||
} else {
|
||||
udapl_btl->udapl_conn_evd_qlen = potential_conn_evd_qlen;
|
||||
}
|
||||
@ -609,13 +611,14 @@ static int mca_btl_udapl_set_peer_parameters(
|
||||
mca_btl_udapl_component.udapl_timeout) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt",
|
||||
"connection timeout low",
|
||||
true,
|
||||
"btl_udapl_timeout",
|
||||
mca_btl_udapl_component.udapl_timeout,
|
||||
"btl_udapl_timeout",
|
||||
potential_udapl_timeout);
|
||||
potential_udapl_timeout));
|
||||
} else {
|
||||
mca_btl_udapl_component.udapl_timeout =
|
||||
potential_udapl_timeout;
|
||||
@ -804,7 +807,8 @@ int mca_btl_udapl_free(
|
||||
} else if(frag->size == mca_btl_udapl_component.udapl_max_frag_size) {
|
||||
MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag);
|
||||
} else {
|
||||
OPAL_OUTPUT((0, "[%s:%d] mca_btl_udapl_free: invalid descriptor\n", __FILE__,__LINE__));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
("mca_btl_udapl_free: invalid descriptor\n"));
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
@ -1108,7 +1112,7 @@ int mca_btl_udapl_get(
|
||||
mca_btl_base_endpoint_t* endpoint,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
OPAL_OUTPUT((0, "udapl_get\n"));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DEVELOPER, ("udapl_get\n"));
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,8 @@ extern "C" {
|
||||
struct mca_btl_udapl_component_t {
|
||||
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
|
||||
|
||||
int32_t udapl_verbosity; /**< report out level, see
|
||||
"Report Out from uDAPL BTL" below for details. */
|
||||
size_t udapl_num_btls; /**< number of hcas available to the uDAPL component */
|
||||
size_t udapl_max_btls; /**< maximum number of supported hcas */
|
||||
struct mca_btl_udapl_module_t **udapl_btls; /**< array of available BTL modules */
|
||||
@ -161,8 +163,85 @@ struct mca_btl_udapl_reg_t {
|
||||
typedef struct mca_btl_udapl_reg_t mca_btl_udapl_reg_t;
|
||||
|
||||
/**
|
||||
* Report a uDAPL error - for debugging
|
||||
* Report Out from uDAPL BTL
|
||||
*
|
||||
* - BTL_ERROR() : Use to report out errors from uDAPL BTL. These are
|
||||
* critical errors which will most likely cause the program to fail so
|
||||
* this message should always be reported to the user. Defined in
|
||||
* btl/base/btl_base_error.h.
|
||||
* Example:
|
||||
* dat_strerror(rc, (const char**)&major, (const char**)&minor);
|
||||
* BTL_ERROR(("ERROR: %s %s %s\n", "dat_cr_accept", major, minor));
|
||||
*
|
||||
* - BTL_UDAPL_VERBOSE_OUTPUT() : Use to output different levels
|
||||
* of verbosity to the user. See Note below.
|
||||
* Example:
|
||||
* BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
* ("WARNING: don't %s", "jump"));
|
||||
*
|
||||
* - BTL_UDAPL_VERBOSE_HELP() : Use output information as defined in
|
||||
* uDAPL BTL help file (help-mpi-btl-udapl.txt). See Note below.
|
||||
* Example:
|
||||
* BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
* ("help-mpi-btl-udapl.txt",
|
||||
* "invalid num rdma segments", true, 22));
|
||||
*
|
||||
* Note : - Verbose levels are defined below. These levels are
|
||||
* controlled by the mca parameter "btl_udapl_verbose".
|
||||
* The verbose level is set to 10 by default so that critical
|
||||
* error and useful help information will appear. Which ever value
|
||||
* this param is set to, those messages as well as any lower level
|
||||
* verbose messages will be reported.
|
||||
* - Setting "btl_udapl_verbose" to "-1" will turn off all
|
||||
* messages reported by the use of BTL_UDAPL_VERBOSE_*().
|
||||
* - These macros should not be used in a critical path as they
|
||||
* are always included in the compiled code.
|
||||
* - These macros rely on the use of paranthesis around the "args"
|
||||
* value.
|
||||
*
|
||||
* Values used with BTL_UDAPL_VERBOSE_*():
|
||||
*
|
||||
* - 0: critical user information; should always be reported;
|
||||
* on by default
|
||||
* - 10: useful help messages that would be reported from
|
||||
* "help-mpi-btl-udapl.txt"; accessed from
|
||||
* BTL_UDAPL_VERBOSE_HELP(); on by default
|
||||
* - 20: general execution diagnostic information;
|
||||
* may be useful to user or btl developer
|
||||
* - 30: basic debugging/diagnostic information
|
||||
* - 90: useful only to developers
|
||||
* - 100: other components do not appear to go beyond 100 for verbose
|
||||
* levels so noting here as the max for future reference
|
||||
*/
|
||||
#define VERBOSE_CRITICAL 0
|
||||
#define VERBOSE_SHOW_HELP 10
|
||||
#define VERBOSE_INFORM 20
|
||||
#define VERBOSE_DIAGNOSE 30
|
||||
#define VERBOSE_DEVELOPER 90
|
||||
|
||||
#define BTL_UDAPL_VERBOSE_OUTPUT(verbose_level, args) \
|
||||
do { \
|
||||
if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
orte_system_info.nodename, \
|
||||
ORTE_NAME_PRINT(orte_process_info.my_name), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
mca_btl_base_out("\n"); \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
#define BTL_UDAPL_VERBOSE_HELP(verbose_level, args) \
|
||||
do { \
|
||||
if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \
|
||||
opal_show_help args; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
|
||||
/*
|
||||
* Report a uDAPL error - for debugging
|
||||
*/
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
extern void mca_btl_udapl_error(DAT_RETURN ret, char* str);
|
||||
|
@ -59,6 +59,8 @@ void mca_btl_udapl_frag_progress_pending(mca_btl_udapl_module_t* udapl_btl,
|
||||
const int connection);
|
||||
static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
|
||||
DAT_PROVIDER_INFO* datinfo);
|
||||
static const char*
|
||||
mca_btl_udapl_dat_event_to_string(DAT_EVENT_NUMBER event_number);
|
||||
|
||||
|
||||
mca_btl_udapl_component_t mca_btl_udapl_component = {
|
||||
@ -91,6 +93,77 @@ mca_btl_udapl_component_t mca_btl_udapl_component = {
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Predefined and fixed size structure containing DAT_EVENT values
|
||||
* and associated string as defined in: "uDAPL:User Direct Access
|
||||
* Programming Library v1.2 Sept 15, 2004", DAT Collaborative Organization.
|
||||
*/
|
||||
static struct mca_btl_udapl_dat_events {
|
||||
DAT_EVENT_NUMBER value;
|
||||
const char* name;
|
||||
} mca_btl_udapl_dat_events[] = {
|
||||
{ DAT_DTO_COMPLETION_EVENT,
|
||||
"DAT_DTO_COMPLETION_EVENT" },
|
||||
{ DAT_RMR_BIND_COMPLETION_EVENT,
|
||||
"DAT_RMR_BIND_COMPLETION_EVENT" },
|
||||
{ DAT_CONNECTION_REQUEST_EVENT,
|
||||
"DAT_CONNECTION_REQUEST_EVENT" },
|
||||
{ DAT_CONNECTION_EVENT_ESTABLISHED,
|
||||
"DAT_CONNECTION_EVENT_ESTABLISHED" },
|
||||
{ DAT_CONNECTION_EVENT_PEER_REJECTED,
|
||||
"DAT_CONNECTION_EVENT_PEER_REJECTED" },
|
||||
{ DAT_CONNECTION_EVENT_NON_PEER_REJECTED,
|
||||
"DAT_CONNECTION_EVENT_NON_PEER_REJECTED" },
|
||||
{ DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR,
|
||||
"DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR" },
|
||||
{ DAT_CONNECTION_EVENT_DISCONNECTED,
|
||||
"DAT_CONNECTION_EVENT_DISCONNECTED" },
|
||||
{ DAT_CONNECTION_EVENT_BROKEN,
|
||||
"DAT_CONNECTION_EVENT_BROKEN" },
|
||||
{ DAT_CONNECTION_EVENT_TIMED_OUT,
|
||||
"DAT_CONNECTION_EVENT_TIMED_OUT" },
|
||||
{ DAT_CONNECTION_EVENT_UNREACHABLE,
|
||||
"DAT_CONNECTION_EVENT_UNREACHABLE" },
|
||||
{ DAT_ASYNC_ERROR_EVD_OVERFLOW,
|
||||
"DAT_ASYNC_ERROR_EVD_OVERFLOW" },
|
||||
{ DAT_ASYNC_ERROR_IA_CATASTROPHIC,
|
||||
"DAT_ASYNC_ERROR_IA_CATASTROPHIC" },
|
||||
{ DAT_ASYNC_ERROR_EP_BROKEN,
|
||||
"DAT_ASYNC_ERROR_EP_BROKEN" },
|
||||
{ DAT_ASYNC_ERROR_TIMED_OUT,
|
||||
"DAT_ASYNC_ERROR_TIMED_OUT" },
|
||||
{ DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR,
|
||||
"DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR" },
|
||||
{ DAT_SOFTWARE_EVENT,
|
||||
"DAT_SOFTWARE_EVENT" }
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Function to convert DAT_EVENT_NUMBER into a readable string.
|
||||
*
|
||||
* @param event_number (IN) DAT_EVENT_NUMBER value
|
||||
*
|
||||
* @return event string or a string indicating
|
||||
* event number is invalid
|
||||
*/
|
||||
static const char *
|
||||
mca_btl_udapl_dat_event_to_string(DAT_EVENT_NUMBER event_number)
|
||||
{
|
||||
int i;
|
||||
int num_events = (sizeof(mca_btl_udapl_dat_events) /
|
||||
sizeof(mca_btl_udapl_dat_events[0]));
|
||||
|
||||
for (i = 0; i < num_events; i++) {
|
||||
if (mca_btl_udapl_dat_events[i].value == event_number) {
|
||||
return (mca_btl_udapl_dat_events[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
return ("Unknown DAT Event Number");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Report a uDAPL error - for debugging
|
||||
*/
|
||||
@ -348,10 +421,11 @@ static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
|
||||
/* if if_list not NULL, either not found or user error */
|
||||
if (opal_argv_count(mca_btl_udapl_component.if_list)) {
|
||||
char *str = opal_argv_join(mca_btl_udapl_component.if_list, ',');
|
||||
opal_show_help("help-mpi-btl-udapl.txt", "nonexistent entry",
|
||||
true, orte_system_info.nodename,
|
||||
((NULL != mca_btl_udapl_component.if_include) ?
|
||||
"in" : "ex"), str);
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "nonexistent entry",
|
||||
true, orte_system_info.nodename,
|
||||
((NULL != mca_btl_udapl_component.if_include) ?
|
||||
"in" : "ex"), str));
|
||||
free(str);
|
||||
}
|
||||
|
||||
@ -383,11 +457,10 @@ mca_btl_udapl_component_init (int *num_btl_modules,
|
||||
mca_btl_udapl_component.if_list = NULL;
|
||||
if (NULL != mca_btl_udapl_component.if_include &&
|
||||
NULL != mca_btl_udapl_component.if_exclude) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"specified include and exclude", true,
|
||||
mca_btl_udapl_component.if_include,
|
||||
mca_btl_udapl_component.if_exclude);
|
||||
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"specified include and exclude", true,
|
||||
mca_btl_udapl_component.if_include,
|
||||
mca_btl_udapl_component.if_exclude));
|
||||
mca_btl_udapl_component.udapl_num_btls = 0;
|
||||
mca_btl_udapl_modex_send();
|
||||
return NULL;
|
||||
@ -898,14 +971,17 @@ int mca_btl_udapl_component_progress()
|
||||
(mca_btl_base_descriptor_t*)frag);
|
||||
break;
|
||||
default:
|
||||
OPAL_OUTPUT((0, "WARNING unknown frag type: %d\n",
|
||||
frag->type));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
("WARNING: unknown frag type: %d\n",
|
||||
frag->type));
|
||||
}
|
||||
count++;
|
||||
break;
|
||||
default:
|
||||
OPAL_OUTPUT((0, "WARNING unknown dto event: %d\n",
|
||||
event.event_number));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
("WARNING: DTO event: %s (%d)\n",
|
||||
mca_btl_udapl_dat_event_to_string(event.event_number),
|
||||
event.event_number));
|
||||
}
|
||||
}
|
||||
|
||||
@ -947,11 +1023,14 @@ int mca_btl_udapl_component_progress()
|
||||
case DAT_CONNECTION_EVENT_UNREACHABLE:
|
||||
/* Need to set the BTL endpoint to MCA_BTL_UDAPL_FAILED
|
||||
See dat_ep_connect documentation pdf pg 198 */
|
||||
BTL_OUTPUT(("WARNING : Connection event not handled : %d\n",
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("WARNING: connection event not handled : %s (%d)\n",
|
||||
mca_btl_udapl_dat_event_to_string(event.event_number),
|
||||
event.event_number));
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("ERROR: unknown connection event : %d",
|
||||
BTL_ERROR(("ERROR: connection event : %s (%d)",
|
||||
mca_btl_udapl_dat_event_to_string(event.event_number),
|
||||
event.event_number));
|
||||
}
|
||||
}
|
||||
@ -969,11 +1048,15 @@ int mca_btl_udapl_component_progress()
|
||||
case DAT_ASYNC_ERROR_EP_BROKEN:
|
||||
case DAT_ASYNC_ERROR_TIMED_OUT:
|
||||
case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR:
|
||||
BTL_OUTPUT(("WARNING: async event ignored : %d",
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("WARNING: async event ignored : %s (%d)",
|
||||
mca_btl_udapl_dat_event_to_string(event.event_number),
|
||||
event.event_number));
|
||||
break;
|
||||
default:
|
||||
BTL_OUTPUT(("WARNING unknown async event: %d\n",
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("WARNING: %s (%d)\n",
|
||||
mca_btl_udapl_dat_event_to_string(event.event_number),
|
||||
event.event_number));
|
||||
}
|
||||
}
|
||||
|
@ -82,9 +82,9 @@ int mca_btl_udapl_endpoint_write_eager(mca_btl_base_endpoint_t* endpoint,
|
||||
int pad = 0;
|
||||
uint8_t head = endpoint->endpoint_eager_rdma_remote.head;
|
||||
size_t size_plus_align = OPAL_ALIGN(
|
||||
mca_btl_udapl_component.udapl_eager_frag_size,
|
||||
DAT_OPTIMAL_ALIGNMENT,
|
||||
size_t);
|
||||
mca_btl_udapl_component.udapl_eager_frag_size,
|
||||
DAT_OPTIMAL_ALIGNMENT,
|
||||
size_t);
|
||||
|
||||
/* now that we have the head update it */
|
||||
MCA_BTL_UDAPL_RDMA_NEXT_INDEX(endpoint->endpoint_eager_rdma_remote.head);
|
||||
@ -117,7 +117,7 @@ int mca_btl_udapl_endpoint_write_eager(mca_btl_base_endpoint_t* endpoint,
|
||||
* and then working way back
|
||||
*/
|
||||
remote_buf = (char *)(endpoint->endpoint_eager_rdma_remote.base.pval) +
|
||||
(head * size_plus_align) +
|
||||
(head * size_plus_align) +
|
||||
frag->size -
|
||||
frag->triplet.segment_length;
|
||||
|
||||
@ -169,7 +169,7 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
|
||||
/* just send it already.. */
|
||||
if(frag->size ==
|
||||
mca_btl_udapl_component.udapl_eager_frag_size) {
|
||||
|
||||
|
||||
if(OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, -1) < 0) {
|
||||
/* no rdma segment available so either send or queue */
|
||||
OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, 1);
|
||||
@ -348,11 +348,11 @@ int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl,
|
||||
btl->udapl_max_recv_dtos) {
|
||||
|
||||
/* user modified, this will fail and is not acceptable */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"max_recv_dtos too low",
|
||||
true,
|
||||
btl->udapl_max_recv_dtos,
|
||||
mca_btl_udapl_component.udapl_num_recvs);
|
||||
mca_btl_udapl_component.udapl_num_recvs));
|
||||
|
||||
btl->udapl_max_recv_dtos =
|
||||
mca_btl_udapl_component.udapl_num_recvs;
|
||||
@ -392,10 +392,11 @@ int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl,
|
||||
mca_btl_udapl_module.udapl_max_request_dtos) {
|
||||
|
||||
/* user has modified */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt",
|
||||
"max_request_dtos too low",
|
||||
true,
|
||||
btl->udapl_max_request_dtos, request_dtos);
|
||||
btl->udapl_max_request_dtos, request_dtos));
|
||||
} else {
|
||||
btl->udapl_max_request_dtos = request_dtos;
|
||||
}
|
||||
@ -403,11 +404,11 @@ int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl,
|
||||
|
||||
if (btl->udapl_max_request_dtos > btl->udapl_ia_attr.max_dto_per_ep) {
|
||||
/* do not go beyond what is allowed by the system */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"max_request_dtos system max",
|
||||
true,
|
||||
btl->udapl_max_request_dtos,
|
||||
btl->udapl_ia_attr.max_dto_per_ep);
|
||||
btl->udapl_ia_attr.max_dto_per_ep));
|
||||
btl->udapl_max_request_dtos = btl->udapl_ia_attr.max_dto_per_ep;
|
||||
}
|
||||
|
||||
@ -675,8 +676,9 @@ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
|
||||
|
||||
rc = mca_btl_udapl_endpoint_finish_max(ep);
|
||||
} else {
|
||||
OPAL_OUTPUT((0, "btl_udapl ERROR invalid EP state %d\n",
|
||||
ep->endpoint_state));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
("ERROR: invalid EP state %d\n",
|
||||
ep->endpoint_state));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return rc;
|
||||
@ -685,7 +687,8 @@ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
|
||||
}
|
||||
|
||||
/* If this point is reached, no matching endpoint was found */
|
||||
OPAL_OUTPUT((0, "btl_udapl ERROR could not match endpoint\n"));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
|
||||
("btl_udapl ERROR could not match endpoint\n"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -773,7 +776,7 @@ static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint)
|
||||
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t));
|
||||
assert(frag->size ==
|
||||
mca_btl_udapl_component.udapl_eager_frag_size);
|
||||
|
||||
|
||||
rc = dat_ep_post_send(endpoint->endpoint_eager, 1,
|
||||
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
@ -1058,7 +1061,7 @@ static int mca_btl_udapl_endpoint_send_eager_rdma(
|
||||
rdma_connect->rkey =
|
||||
endpoint->endpoint_eager_rdma_local.reg->rmr_context;
|
||||
rdma_connect->rdma_start.pval =
|
||||
(unsigned char*)frag->base.super.ptr;
|
||||
(unsigned char*)frag->base.super.ptr;
|
||||
|
||||
/* send fragment */
|
||||
rc = mca_btl_udapl_send((mca_btl_base_module_t *)udapl_btl, endpoint,
|
||||
@ -1102,16 +1105,16 @@ void mca_btl_udapl_endpoint_connect_eager_rdma(
|
||||
/* NOTE: Need to find a more generic way to check ranges
|
||||
* for all mca parameters.
|
||||
*/
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
|
||||
"invalid num rdma segments",
|
||||
true,
|
||||
mca_btl_udapl_component.udapl_eager_rdma_num);
|
||||
mca_btl_udapl_component.udapl_eager_rdma_num));
|
||||
goto unlock_rdma_local;
|
||||
}
|
||||
|
||||
/* create space for fragment structures */
|
||||
alloc_ptr = (char*)malloc(mca_btl_udapl_component.udapl_eager_rdma_num *
|
||||
sizeof(mca_btl_udapl_frag_eager_rdma_t));
|
||||
sizeof(mca_btl_udapl_frag_eager_rdma_t));
|
||||
|
||||
if(NULL == alloc_ptr) {
|
||||
goto unlock_rdma_local;
|
||||
@ -1119,13 +1122,13 @@ void mca_btl_udapl_endpoint_connect_eager_rdma(
|
||||
|
||||
/* get size of one fragment's data region */
|
||||
size_plus_align = OPAL_ALIGN(
|
||||
mca_btl_udapl_component.udapl_eager_frag_size,
|
||||
DAT_OPTIMAL_ALIGNMENT, size_t);
|
||||
mca_btl_udapl_component.udapl_eager_frag_size,
|
||||
DAT_OPTIMAL_ALIGNMENT, size_t);
|
||||
|
||||
/* create and register memory for all rdma segments */
|
||||
buf = udapl_btl->super.btl_mpool->mpool_alloc(udapl_btl->super.btl_mpool,
|
||||
(size_plus_align * mca_btl_udapl_component.udapl_eager_rdma_num),
|
||||
0, 0,
|
||||
0, 0,
|
||||
(mca_mpool_base_registration_t**)&endpoint->endpoint_eager_rdma_local.reg);
|
||||
|
||||
if(!buf)
|
||||
|
@ -236,6 +236,12 @@ int mca_btl_udapl_register_mca_params(void)
|
||||
NULL, &mca_btl_udapl_component.if_exclude,
|
||||
REGSTR_EMPTY_OK), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("verbose",
|
||||
"Verbosity level of the uDAPL BTL (-1 thru 100)",
|
||||
VERBOSE_SHOW_HELP,
|
||||
&(mca_btl_udapl_component.udapl_verbosity),
|
||||
REGINT_NEG_ONE_OK), tmp_rc, rc);
|
||||
|
||||
/* register uDAPL module parameters */
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen",
|
||||
"The asynchronous event dispatcher queue length.",
|
||||
|
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Sandia National Laboratories. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -129,15 +130,17 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
(void*)&udapl_proc->proc_addrs,
|
||||
&size);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("ompi_modex_recv failed for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid udapl address for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("invalid udapl address for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user