break evd_qlen into individual qlens (async,dto,conn); add checks based on udapl limits and number of peers
This commit was SVN r14659.
Этот коммит содержится в:
родитель
cd87b05711
Коммит
2ed72bf2e2
@ -30,9 +30,10 @@
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
#include "btl_udapl.h"
|
||||
#include "btl_udapl_frag.h"
|
||||
#include "btl_udapl_proc.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
#include "btl_udapl_frag.h"
|
||||
#include "btl_udapl_mca.h"
|
||||
#include "btl_udapl_proc.h"
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -132,12 +133,11 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
{
|
||||
mca_mpool_base_resources_t res;
|
||||
DAT_CONN_QUAL port;
|
||||
DAT_IA_ATTR attr;
|
||||
DAT_RETURN rc;
|
||||
|
||||
/* open the uDAPL interface */
|
||||
btl->udapl_evd_async = DAT_HANDLE_NULL;
|
||||
rc = dat_ia_open(ia_name, mca_btl_udapl_module.udapl_evd_qlen,
|
||||
rc = dat_ia_open(ia_name, btl->udapl_async_evd_qlen,
|
||||
&btl->udapl_evd_async, &btl->udapl_ia);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
char* major;
|
||||
@ -164,9 +164,8 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
}
|
||||
|
||||
/* query to get address information */
|
||||
/* TODO - we only get the address, but there's other useful stuff here */
|
||||
rc = dat_ia_query(btl->udapl_ia, &btl->udapl_evd_async,
|
||||
DAT_IA_FIELD_IA_ADDRESS_PTR, &attr, 0, NULL);
|
||||
DAT_IA_ALL, &(btl->udapl_ia_attr), 0, NULL);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
char* major;
|
||||
char* minor;
|
||||
@ -178,12 +177,33 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
goto failure;
|
||||
}
|
||||
|
||||
memcpy(&btl->udapl_addr.addr, attr.ia_address_ptr, sizeof(DAT_SOCK_ADDR));
|
||||
memcpy(&btl->udapl_addr.addr, (btl->udapl_ia_attr).ia_address_ptr,
|
||||
sizeof(DAT_SOCK_ADDR));
|
||||
|
||||
/* check evd qlen against adapter max */
|
||||
if (btl->udapl_dto_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen adapter max",
|
||||
true,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
btl->udapl_dto_evd_qlen,
|
||||
(btl->udapl_ia_attr).max_evd_qlen);
|
||||
btl->udapl_dto_evd_qlen = btl->udapl_ia_attr.max_evd_qlen;
|
||||
}
|
||||
if (btl->udapl_conn_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) {
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen adapter max",
|
||||
true,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
btl->udapl_conn_evd_qlen,
|
||||
(btl->udapl_ia_attr).max_evd_qlen);
|
||||
btl->udapl_conn_evd_qlen = btl->udapl_ia_attr.max_evd_qlen;
|
||||
}
|
||||
|
||||
/* set up evd's */
|
||||
rc = dat_evd_create(btl->udapl_ia,
|
||||
mca_btl_udapl_module.udapl_evd_qlen, DAT_HANDLE_NULL,
|
||||
DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG, &btl->udapl_evd_dto);
|
||||
btl->udapl_dto_evd_qlen, DAT_HANDLE_NULL,
|
||||
DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG, &btl->udapl_evd_dto);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
char* major;
|
||||
char* minor;
|
||||
@ -196,7 +216,7 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
}
|
||||
|
||||
rc = dat_evd_create(btl->udapl_ia,
|
||||
mca_btl_udapl_module.udapl_evd_qlen, DAT_HANDLE_NULL,
|
||||
btl->udapl_conn_evd_qlen, DAT_HANDLE_NULL,
|
||||
DAT_EVD_CR_FLAG | DAT_EVD_CONNECTION_FLAG, &btl->udapl_evd_conn);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
char* major;
|
||||
@ -251,7 +271,7 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
So, we insert the port we used for our PSP into the DAT_SOCK_ADDR for
|
||||
this IA. uDAPL then conveniently propagates this to where we need it.
|
||||
*/
|
||||
((struct sockaddr_in*)attr.ia_address_ptr)->sin_port = htons(port);
|
||||
((struct sockaddr_in*)(btl->udapl_ia_attr.ia_address_ptr))->sin_port = htons(port);
|
||||
((struct sockaddr_in*)&btl->udapl_addr.addr)->sin_port = htons(port);
|
||||
|
||||
/* initialize the memory pool */
|
||||
@ -332,7 +352,8 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
/* initialize miscellaneous variables */
|
||||
btl->udapl_async_events = 0;
|
||||
btl->udapl_connect_inprogress = 0;
|
||||
|
||||
btl->udapl_num_peers = 0;
|
||||
|
||||
/* TODO - Set up SRQ when it is supported */
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
@ -383,6 +404,191 @@ int mca_btl_udapl_finalize(struct mca_btl_base_module_t* base_btl)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Adjust parameters that are dependent on the number of peers.
|
||||
*
|
||||
* @param udapl_btl (IN) BTL module
|
||||
* @param nprocs (IN) number of processes handed into
|
||||
* mca_btl_udapl_add_procs()
|
||||
* @return OMPI_SUCCESS or error status on failure
|
||||
*/
|
||||
|
||||
int mca_btl_udapl_set_peer_parameters(
|
||||
struct mca_btl_udapl_module_t* udapl_btl,
|
||||
size_t nprocs)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
DAT_RETURN dat_rc = DAT_SUCCESS;
|
||||
uint potential_udapl_timeout;
|
||||
int first_time_sizing = (udapl_btl->udapl_num_peers == 0 ? 1 : 0);
|
||||
|
||||
/* nprocs includes self so subtract 1 */
|
||||
udapl_btl->udapl_num_peers += nprocs - 1;
|
||||
|
||||
/* resize dto_evd_qlen if not already at its max */
|
||||
if (udapl_btl->udapl_dto_evd_qlen !=
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen) {
|
||||
|
||||
int potential_dto_evd_qlen;
|
||||
int max_connection_dto_events;
|
||||
int eager_connection_dto_events;
|
||||
|
||||
/* eager connection dto events already factored into
|
||||
* max_recv/request_dtos but need to calculate max connection dtos;
|
||||
* see mca_btl_udapl_get_params() for max_recv/request_dtos
|
||||
*/
|
||||
eager_connection_dto_events = udapl_btl->udapl_max_recv_dtos +
|
||||
udapl_btl->udapl_max_request_dtos;
|
||||
max_connection_dto_events = mca_btl_udapl_component.udapl_num_recvs +
|
||||
mca_btl_udapl_component.udapl_num_sends +
|
||||
(mca_btl_udapl_component.udapl_num_recvs /
|
||||
mca_btl_udapl_component.udapl_sr_win) + 1;
|
||||
potential_dto_evd_qlen = udapl_btl->udapl_num_peers *
|
||||
(eager_connection_dto_events + max_connection_dto_events);
|
||||
|
||||
/* here we use what the library calculates as the
|
||||
* potential_dto_evd_qlen unless the user has set
|
||||
*/
|
||||
if (first_time_sizing) {
|
||||
if (udapl_btl->udapl_dto_evd_qlen < potential_dto_evd_qlen) {
|
||||
if (MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT !=
|
||||
udapl_btl->udapl_dto_evd_qlen) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen too low",
|
||||
true,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
udapl_btl->udapl_dto_evd_qlen,
|
||||
"btl_udapl_dto_evd_qlen",
|
||||
potential_dto_evd_qlen);
|
||||
} else {
|
||||
udapl_btl->udapl_dto_evd_qlen = potential_dto_evd_qlen;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* since this is not the first time attempting to resize the
|
||||
* evd queue length just use the potential value; this may not
|
||||
* be the best solution
|
||||
*/
|
||||
udapl_btl->udapl_dto_evd_qlen = potential_dto_evd_qlen;
|
||||
}
|
||||
|
||||
udapl_btl->udapl_dto_evd_qlen = ((udapl_btl->udapl_dto_evd_qlen >
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen) ?
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen :
|
||||
udapl_btl->udapl_dto_evd_qlen);
|
||||
|
||||
/* dat call to actually resize dto event dispatcher queue length */
|
||||
dat_rc = dat_evd_resize(udapl_btl->udapl_evd_dto,
|
||||
udapl_btl->udapl_dto_evd_qlen);
|
||||
if(DAT_SUCCESS != dat_rc) {
|
||||
char* major;
|
||||
char* minor;
|
||||
|
||||
dat_strerror(dat_rc, (const char**)&major,
|
||||
(const char**)&minor);
|
||||
|
||||
/* DAT_INVALID_STATE is actually OK for a call to dat_evd_resize(),
|
||||
* all it indicates is that you are setting to the current value
|
||||
*/
|
||||
if (strcmp(major, "DAT_INVALID_STATE")) {
|
||||
BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_resize",
|
||||
major, minor));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* resize connection evd qlen */
|
||||
if (udapl_btl->udapl_conn_evd_qlen !=
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen) {
|
||||
|
||||
int potential_conn_evd_qlen = 2 * udapl_btl->udapl_num_peers;
|
||||
|
||||
if (first_time_sizing) {
|
||||
if (udapl_btl->udapl_conn_evd_qlen < potential_conn_evd_qlen) {
|
||||
if (MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT !=
|
||||
udapl_btl->udapl_conn_evd_qlen) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"evd_qlen too low",
|
||||
true,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
udapl_btl->udapl_conn_evd_qlen,
|
||||
"btl_udapl_conn_evd_qlen",
|
||||
potential_conn_evd_qlen);
|
||||
} else {
|
||||
udapl_btl->udapl_conn_evd_qlen = potential_conn_evd_qlen;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* since this is not the first time attempting to resize the
|
||||
* evd queue length just use the potential value; this may not
|
||||
* be the best solution
|
||||
*/
|
||||
udapl_btl->udapl_conn_evd_qlen = potential_conn_evd_qlen;
|
||||
}
|
||||
|
||||
udapl_btl->udapl_conn_evd_qlen = ((udapl_btl->udapl_conn_evd_qlen >
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen) ?
|
||||
udapl_btl->udapl_ia_attr.max_evd_qlen :
|
||||
udapl_btl->udapl_conn_evd_qlen);
|
||||
|
||||
/* dat call to actually resize conn evd queue length */
|
||||
dat_rc = dat_evd_resize(udapl_btl->udapl_evd_conn,
|
||||
udapl_btl->udapl_conn_evd_qlen);
|
||||
if(DAT_SUCCESS != dat_rc) {
|
||||
char* major;
|
||||
char* minor;
|
||||
|
||||
dat_strerror(dat_rc, (const char**)&major,
|
||||
(const char**)&minor);
|
||||
|
||||
/* DAT_INVALID_STATE is actually OK for a call to dat_evd_resize(),
|
||||
* all it indicates is that you are setting to the current value
|
||||
*/
|
||||
if (strcmp(major, "DAT_INVALID_STATE")) {
|
||||
BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_resize",
|
||||
major, minor));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* adjust connection timeout value, calculated in microseconds */
|
||||
potential_udapl_timeout = MCA_BTL_UDAPL_CONN_TIMEOUT_INC *
|
||||
udapl_btl->udapl_num_peers;
|
||||
|
||||
if (mca_btl_udapl_component.udapl_timeout <
|
||||
potential_udapl_timeout) {
|
||||
|
||||
if (MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT !=
|
||||
mca_btl_udapl_component.udapl_timeout) {
|
||||
|
||||
/* user modified so warn */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"connection timeout low",
|
||||
true,
|
||||
"btl_udapl_timeout",
|
||||
mca_btl_udapl_component.udapl_timeout,
|
||||
"btl_udapl_timeout",
|
||||
potential_udapl_timeout);
|
||||
} else {
|
||||
mca_btl_udapl_component.udapl_timeout =
|
||||
potential_udapl_timeout;
|
||||
}
|
||||
}
|
||||
mca_btl_udapl_component.udapl_timeout =
|
||||
((mca_btl_udapl_component.udapl_timeout >
|
||||
MCA_BTL_UDAPL_CONN_TIMEOUT_MAX) ?
|
||||
MCA_BTL_UDAPL_CONN_TIMEOUT_MAX :
|
||||
mca_btl_udapl_component.udapl_timeout);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
@ -441,6 +647,12 @@ int mca_btl_udapl_add_procs(
|
||||
peers[i] = udapl_endpoint;
|
||||
}
|
||||
|
||||
/* resize based on number of processes */
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_btl_udapl_set_peer_parameters(udapl_btl, nprocs)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ struct mca_btl_udapl_component_t {
|
||||
int32_t udapl_num_recvs; /**< number of recv buffers to keep posted */
|
||||
int32_t udapl_num_sends; /**< number of sends to post on endpoint */
|
||||
int32_t udapl_sr_win; /**< number of fragments recieved before
|
||||
returnting credits to sendier */
|
||||
returning credits to sender */
|
||||
int32_t udapl_timeout; /**< connection timeout, in microseconds */
|
||||
size_t udapl_eager_frag_size;
|
||||
size_t udapl_max_frag_size;
|
||||
@ -103,12 +103,13 @@ struct mca_btl_udapl_module_t {
|
||||
DAT_IA_HANDLE udapl_ia;
|
||||
DAT_PZ_HANDLE udapl_pz;
|
||||
DAT_PSP_HANDLE udapl_psp;
|
||||
DAT_EP_PARAM udapl_ep_param;
|
||||
DAT_IA_ATTR udapl_ia_attr;
|
||||
|
||||
/* event dispatchers - async, data transfer, connection negotiation */
|
||||
DAT_EVD_HANDLE udapl_evd_async;
|
||||
DAT_EVD_HANDLE udapl_evd_dto;
|
||||
DAT_EVD_HANDLE udapl_evd_conn;
|
||||
DAT_EP_PARAM udapl_ep_param;
|
||||
|
||||
/* free list of fragment descriptors */
|
||||
ompi_free_list_t udapl_frag_eager;
|
||||
@ -117,7 +118,6 @@ struct mca_btl_udapl_module_t {
|
||||
ompi_free_list_t udapl_frag_control;
|
||||
|
||||
opal_mutex_t udapl_lock; /* lock for accessing module state */
|
||||
|
||||
opal_mutex_t udapl_eager_rdma_lock; /* eager rdma lock */
|
||||
int32_t udapl_eager_rdma_endpoint_count; /* count of the number of
|
||||
* endpoints in
|
||||
@ -129,9 +129,12 @@ struct mca_btl_udapl_module_t {
|
||||
*/
|
||||
int32_t udapl_async_events;
|
||||
int32_t udapl_connect_inprogress;
|
||||
int32_t udapl_num_peers;
|
||||
|
||||
/* module specific limits */
|
||||
int udapl_evd_qlen;
|
||||
int udapl_async_evd_qlen;
|
||||
int udapl_conn_evd_qlen;
|
||||
int udapl_dto_evd_qlen;
|
||||
int udapl_max_request_dtos; /**< maximum number of outstanding consumer
|
||||
submitted sends and rdma operations, see
|
||||
section 6.6.6 of uDAPL Spec */
|
||||
|
@ -111,30 +111,6 @@ mca_btl_udapl_error(DAT_RETURN ret, char* str)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Utility routines for parameter registration
|
||||
*/
|
||||
|
||||
static inline char* mca_btl_udapl_param_register_string(
|
||||
const char* param_name,
|
||||
const char* default_value)
|
||||
{
|
||||
char *param_value;
|
||||
int id = mca_base_param_register_string("btl","udapl",param_name,NULL,default_value);
|
||||
mca_base_param_lookup_string(id, ¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
static inline int mca_btl_udapl_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("btl","udapl",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by MCA framework to open the component, registers
|
||||
* component parameters.
|
||||
@ -689,7 +665,7 @@ int mca_btl_udapl_component_progress()
|
||||
|
||||
/* setup frag ftr location and do callback */
|
||||
frag->segment.seg_len = dto->transfered_length -
|
||||
sizeof(mca_btl_udapl_footer_t);
|
||||
sizeof(mca_btl_udapl_footer_t);
|
||||
frag->ftr = (mca_btl_udapl_footer_t *)
|
||||
((char *)frag->segment.seg_addr.pval +
|
||||
frag->segment.seg_len);
|
||||
|
@ -37,9 +37,10 @@
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "btl_udapl.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
#include "btl_udapl_proc.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
#include "btl_udapl_frag.h"
|
||||
#include "btl_udapl_mca.h"
|
||||
#include "btl_udapl_proc.h"
|
||||
|
||||
static void mca_btl_udapl_endpoint_send_cb(int status, orte_process_name_t* endpoint,
|
||||
orte_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
@ -168,6 +169,7 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
|
||||
/* just send it already.. */
|
||||
if(frag->size ==
|
||||
mca_btl_udapl_component.udapl_eager_frag_size) {
|
||||
|
||||
if(OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, -1) < 0) {
|
||||
/* no rdma segment available so either send or queue */
|
||||
OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, 1);
|
||||
@ -291,6 +293,8 @@ int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl,
|
||||
DAT_EP_PARAM* ep_param)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
int request_dtos;
|
||||
int max_control_messages;
|
||||
DAT_EP_HANDLE dummy_ep;
|
||||
DAT_EP_ATTR* ep_attr = &((*ep_param).ep_attr);
|
||||
|
||||
@ -332,12 +336,83 @@ int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Set values from mca parameters */
|
||||
(*ep_attr).max_recv_dtos =
|
||||
btl->udapl_max_recv_dtos;
|
||||
(*ep_attr).max_request_dtos =
|
||||
btl->udapl_max_request_dtos;
|
||||
/* Set max_recv_dtos :
|
||||
* The max_recv_dtos should be equal to the number of
|
||||
* outstanding posted receives, which for this BTL will
|
||||
* be mca_btl_udapl_component.udapl_num_recvs.
|
||||
*/
|
||||
if (btl->udapl_max_recv_dtos <
|
||||
mca_btl_udapl_component.udapl_num_recvs) {
|
||||
|
||||
if (MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT !=
|
||||
btl->udapl_max_recv_dtos) {
|
||||
|
||||
/* user modified, this will fail and is not acceptable */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"max_recv_dtos too low",
|
||||
true,
|
||||
btl->udapl_max_recv_dtos,
|
||||
mca_btl_udapl_component.udapl_num_recvs);
|
||||
|
||||
btl->udapl_max_recv_dtos =
|
||||
mca_btl_udapl_component.udapl_num_recvs;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UDAPL_NUM_RECVS_DEFAULT !=
|
||||
mca_btl_udapl_component.udapl_num_recvs) {
|
||||
|
||||
/* user modified udapl_num_recvs so adjust max_recv_dtos */
|
||||
btl->udapl_max_recv_dtos =
|
||||
mca_btl_udapl_component.udapl_num_recvs;
|
||||
}
|
||||
}
|
||||
|
||||
(*ep_attr).max_recv_dtos = btl->udapl_max_recv_dtos;
|
||||
|
||||
/* Set max_request_dtos :
|
||||
* The max_request_dtos should equal the max number of
|
||||
* outstanding sends plus RDMA operations.
|
||||
*
|
||||
* Note: Using the same value for both EAGER and MAX
|
||||
* connections even though the MAX connection does not
|
||||
* have the extra RDMA operations that the EAGER
|
||||
* connection does.
|
||||
*/
|
||||
max_control_messages =
|
||||
(mca_btl_udapl_component.udapl_num_recvs /
|
||||
mca_btl_udapl_component.udapl_sr_win) + 1 +
|
||||
(mca_btl_udapl_component.udapl_eager_rdma_num /
|
||||
mca_btl_udapl_component.udapl_eager_rdma_win) + 1;
|
||||
request_dtos = mca_btl_udapl_component.udapl_num_sends +
|
||||
(2*mca_btl_udapl_component.udapl_eager_rdma_num) +
|
||||
max_control_messages;
|
||||
|
||||
if (btl->udapl_max_request_dtos < request_dtos) {
|
||||
if (MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT !=
|
||||
mca_btl_udapl_module.udapl_max_request_dtos) {
|
||||
|
||||
/* user has modified */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"max_request_dtos too low",
|
||||
true,
|
||||
btl->udapl_max_request_dtos, request_dtos);
|
||||
} else {
|
||||
btl->udapl_max_request_dtos = request_dtos;
|
||||
}
|
||||
}
|
||||
|
||||
if (btl->udapl_max_request_dtos > btl->udapl_ia_attr.max_dto_per_ep) {
|
||||
/* do not go beyond what is allowed by the system */
|
||||
opal_show_help("help-mpi-btl-udapl.txt",
|
||||
"max_request_dtos system max",
|
||||
true,
|
||||
btl->udapl_max_request_dtos,
|
||||
btl->udapl_ia_attr.max_dto_per_ep);
|
||||
btl->udapl_max_request_dtos = btl->udapl_ia_attr.max_dto_per_ep;
|
||||
}
|
||||
|
||||
(*ep_attr).max_request_dtos = btl->udapl_max_request_dtos;
|
||||
|
||||
/* close the dummy endpoint */
|
||||
rc = dat_ep_free(dummy_ep);
|
||||
if (rc != DAT_SUCCESS) {
|
||||
@ -695,6 +770,7 @@ static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint)
|
||||
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t));
|
||||
assert(frag->size ==
|
||||
mca_btl_udapl_component.udapl_eager_frag_size);
|
||||
|
||||
rc = dat_ep_post_send(endpoint->endpoint_eager, 1,
|
||||
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
|
@ -215,10 +215,25 @@ int mca_btl_udapl_register_mca_params(void)
|
||||
REGINT_GE_ONE), tmp_rc, rc);
|
||||
|
||||
/* register uDAPL module parameters */
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("evd_qlen",
|
||||
"The event dispatcher queue length.",
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen",
|
||||
"The asynchronous event dispatcher queue length.",
|
||||
MCA_BTL_UDAPL_ASYNC_EVD_QLEN_DEFAULT,
|
||||
(int*)&mca_btl_udapl_module.udapl_async_evd_qlen,
|
||||
REGINT_GE_ONE), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_evd_qlen",
|
||||
"The connection event dispatcher queue length is "
|
||||
"a function of the number of connections expected.",
|
||||
MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT,
|
||||
(int*)&mca_btl_udapl_module.udapl_conn_evd_qlen,
|
||||
REGINT_GE_ONE), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("dto_evd_qlen",
|
||||
"The data transfer operation event dispatcher queue length is "
|
||||
"a function of the number of connections as well as the "
|
||||
"maximum number of outstanding data transfer operations.",
|
||||
MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT,
|
||||
(int*)&mca_btl_udapl_module.udapl_evd_qlen,
|
||||
(int*)&mca_btl_udapl_module.udapl_dto_evd_qlen,
|
||||
REGINT_GE_ONE), tmp_rc, rc);
|
||||
|
||||
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_request_dtos",
|
||||
|
@ -39,3 +39,45 @@ report the same DAT_OPTIMAL_ALIGNMENT value and this differs from
|
||||
BTL buffer_alignment then setting "--mca btl_udapl_buffer_alignment
|
||||
%d" may improve performance.
|
||||
|
||||
[max_recv_dtos too low]
|
||||
|
||||
WARNING: The MCA parameter max_recv_dtos has been modified to a value,
|
||||
%d, that is insufficient. This value must be greater than or equal to
|
||||
num_recvs, %d. The uDAPL BTL will adjust to allow the program to
|
||||
proceed.
|
||||
|
||||
[max_request_dtos too low]
|
||||
|
||||
WARNING: The MCA parameter max_request_dtos has been modified to a
|
||||
value, %d, which may not be sufficient. Try setting max_request_dtos
|
||||
to %d if program fails.
|
||||
|
||||
[max_recv_dtos system max]
|
||||
|
||||
WARNING: The MCA parameter max_recv_dtos is trying to be set to,
|
||||
%d, which is larger than allowable so the value will be set to maximum
|
||||
allowed, %d.
|
||||
|
||||
[max_request_dtos system max]
|
||||
|
||||
WARNING: The MCA parameter max_request_dtos is trying to be set to,
|
||||
%d, which is larger than allowable so the value will be set to maximum
|
||||
allowed, %d.
|
||||
|
||||
[evd_qlen adapter max]
|
||||
|
||||
WARNING: The MCA parameter %s is trying to be set to %d,
|
||||
which is larger than allowable so the value will be set to maximum
|
||||
allowed, %d.
|
||||
|
||||
[evd_qlen too low]
|
||||
|
||||
WARNING: The MCA parameter %s has been modified to a value,
|
||||
%d, which may not be sufficient. Try setting %s to %d if
|
||||
program fails.
|
||||
|
||||
[connection timeout low]
|
||||
|
||||
WARNING: The MCA parameter %s has been modified to a value,
|
||||
%d, which may not be sufficient. Try setting %s to %d if
|
||||
program fails.
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user