Merge branch 'topic/oshmem_spml_ikrit_hw_rdma_channel-fix1'
Этот коммит содержится в:
Коммит
a2c85c6d16
@ -7,45 +7,39 @@
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
[unable to create endpoint]
|
||||
MXM was unable to create an endpoint. Please make sure that the network link is
|
||||
active on the node and the hardware is functioning.
|
||||
|
||||
Error: %s
|
||||
|
||||
#
|
||||
[unable to get endpoint address]
|
||||
MXM was unable to get endpoint address
|
||||
|
||||
Error: %s
|
||||
|
||||
#
|
||||
[mxm mq create]
|
||||
Failed to create MQ for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
#
|
||||
[errors during mxm_progress]
|
||||
|
||||
Error %s occurred in attempting to make network progress (mxm_progress).
|
||||
|
||||
|
||||
#
|
||||
[mxm init]
|
||||
Initialization of MXM library failed.
|
||||
|
||||
Error: %s
|
||||
|
||||
#
|
||||
[mxm shm tls]
|
||||
ERROR: MXM shared memory transport can not be used
|
||||
bacause it is not fully compliant with OSHMEM spec
|
||||
|
||||
MXM transport setting: %s
|
||||
|
||||
#
|
||||
[mxm tls]
|
||||
ERROR: valid mxm transports are:
|
||||
"ud" "ud,self" "rc" or "dc"
|
||||
|
||||
transport setting is: %s=%s
|
||||
|
||||
|
||||
|
||||
#
|
||||
|
@ -74,7 +74,7 @@ static inline int check_mxm_tls(char *var)
|
||||
"%s=%s",
|
||||
var, getenv(var)
|
||||
)) {
|
||||
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
|
||||
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm shm tls", true,
|
||||
str);
|
||||
free(str);
|
||||
}
|
||||
@ -108,21 +108,40 @@ static inline int set_mxm_tls()
|
||||
setenv("MXM_OSHMEM_TLS", mca_spml_ikrit.mxm_tls, 1);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
return check_mxm_tls("MXM_TLS");
|
||||
}
|
||||
|
||||
static inline void set_mxm_rc_tls()
|
||||
{
|
||||
char *tls;
|
||||
|
||||
tls = getenv("MXM_OSHMEM_HW_RDMA_TLS");
|
||||
if (NULL != tls) {
|
||||
return;
|
||||
if (OSHMEM_SUCCESS == check_mxm_tls("MXM_TLS")) {
|
||||
setenv("MXM_OSHMEM_TLS", tls, 1);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 1);
|
||||
return;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static inline int check_mxm_hw_tls(char *v, char *tls)
|
||||
{
|
||||
if ((0 == strcmp(tls, "rc") || 0 == strcmp(tls, "dc")))
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
if (strstr(tls, "ud") &&
|
||||
(NULL == strstr(tls, "rc") && NULL == strstr(tls, "dc") &&
|
||||
NULL == strstr(tls, "shm")))
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
|
||||
v, tls);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static inline int set_mxm_hw_rdma_tls()
|
||||
{
|
||||
if (!mca_spml_ikrit.hw_rdma_channel) {
|
||||
return check_mxm_hw_tls("MXM_OSHMEM_TLS", getenv("MXM_OSHMEM_TLS"));
|
||||
}
|
||||
setenv("MXM_OSHMEM_HW_RDMA_RC_QP_LIMIT", "-1", 0);
|
||||
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 0);
|
||||
|
||||
return check_mxm_hw_tls("MXM_OSHMEM_HW_RDMA_TLS",
|
||||
getenv("MXM_OSHMEM_HW_RDMA_TLS"));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void mca_spml_ikrit_param_register_int(const char* param_name,
|
||||
@ -157,6 +176,8 @@ static inline void mca_spml_ikrit_param_register_string(const char* param_name,
|
||||
|
||||
static int mca_spml_ikrit_component_register(void)
|
||||
{
|
||||
char *v;
|
||||
|
||||
mca_spml_ikrit_param_register_int("free_list_num", 1024,
|
||||
0,
|
||||
&mca_spml_ikrit.free_list_num);
|
||||
@ -178,8 +199,13 @@ static int mca_spml_ikrit_component_register(void)
|
||||
mca_spml_ikrit_param_register_int("hw_rdma_channel", 0,
|
||||
"create separate reliable connection channel",
|
||||
&mca_spml_ikrit.hw_rdma_channel);
|
||||
|
||||
if (!mca_spml_ikrit.hw_rdma_channel)
|
||||
v = "ud,self";
|
||||
else
|
||||
v = "rc,ud,self";
|
||||
mca_spml_ikrit_param_register_string("mxm_tls",
|
||||
"rc,ud,self",
|
||||
v,
|
||||
"[string] TL channels for MXM",
|
||||
&mca_spml_ikrit.mxm_tls);
|
||||
|
||||
@ -236,15 +262,16 @@ static int mca_spml_ikrit_component_open(void)
|
||||
|
||||
mca_spml_ikrit.ud_only = 0;
|
||||
#if MXM_API < MXM_VERSION(2,1)
|
||||
mca_spml_ikrit.rc_channel = 0;
|
||||
mca_spml_ikrit.hw_rdma_channel = 0;
|
||||
if ((MXM_OK != mxm_config_read_context_opts(&mca_spml_ikrit.mxm_ctx_opts)) ||
|
||||
(MXM_OK != mxm_config_read_ep_opts(&mca_spml_ikrit.mxm_ep_opts)))
|
||||
#else
|
||||
if (OSHMEM_SUCCESS != set_mxm_tls()) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
set_mxm_rc_tls();
|
||||
|
||||
if (OSHMEM_SUCCESS != set_mxm_hw_rdma_tls()) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
if ((mca_spml_ikrit.hw_rdma_channel && MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
|
||||
&mca_spml_ikrit.mxm_ep_hw_rdma_opts,
|
||||
"OSHMEM_HW_RDMA", NULL, 0)) ||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user