Merge branch 'topic/oshmem_spml_ikrit_hw_rdma_channel-fix1'
Этот коммит содержится в:
Коммит
a2c85c6d16
@ -7,45 +7,39 @@
|
|||||||
#
|
#
|
||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
[unable to create endpoint]
|
[unable to create endpoint]
|
||||||
MXM was unable to create an endpoint. Please make sure that the network link is
|
MXM was unable to create an endpoint. Please make sure that the network link is
|
||||||
active on the node and the hardware is functioning.
|
active on the node and the hardware is functioning.
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
|
#
|
||||||
[unable to get endpoint address]
|
[unable to get endpoint address]
|
||||||
MXM was unable to get endpoint address
|
MXM was unable to get endpoint address
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
|
#
|
||||||
[mxm mq create]
|
[mxm mq create]
|
||||||
Failed to create MQ for endpoint
|
Failed to create MQ for endpoint
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
|
#
|
||||||
[errors during mxm_progress]
|
[errors during mxm_progress]
|
||||||
|
|
||||||
Error %s occurred in attempting to make network progress (mxm_progress).
|
Error %s occurred in attempting to make network progress (mxm_progress).
|
||||||
|
#
|
||||||
|
|
||||||
[mxm init]
|
[mxm init]
|
||||||
Initialization of MXM library failed.
|
Initialization of MXM library failed.
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
|
#
|
||||||
[mxm shm tls]
|
[mxm shm tls]
|
||||||
ERROR: MXM shared memory transport can not be used
|
ERROR: MXM shared memory transport can not be used
|
||||||
bacause it is not fully compliant with OSHMEM spec
|
bacause it is not fully compliant with OSHMEM spec
|
||||||
|
|
||||||
MXM transport setting: %s
|
MXM transport setting: %s
|
||||||
|
#
|
||||||
[mxm tls]
|
[mxm tls]
|
||||||
ERROR: valid mxm transports are:
|
ERROR: valid mxm transports are:
|
||||||
"ud" "ud,self" "rc" or "dc"
|
"ud" "ud,self" "rc" or "dc"
|
||||||
|
|
||||||
transport setting is: %s=%s
|
transport setting is: %s=%s
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ static inline int check_mxm_tls(char *var)
|
|||||||
"%s=%s",
|
"%s=%s",
|
||||||
var, getenv(var)
|
var, getenv(var)
|
||||||
)) {
|
)) {
|
||||||
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
|
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm shm tls", true,
|
||||||
str);
|
str);
|
||||||
free(str);
|
free(str);
|
||||||
}
|
}
|
||||||
@ -108,21 +108,40 @@ static inline int set_mxm_tls()
|
|||||||
setenv("MXM_OSHMEM_TLS", mca_spml_ikrit.mxm_tls, 1);
|
setenv("MXM_OSHMEM_TLS", mca_spml_ikrit.mxm_tls, 1);
|
||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
return check_mxm_tls("MXM_TLS");
|
if (OSHMEM_SUCCESS == check_mxm_tls("MXM_TLS")) {
|
||||||
}
|
setenv("MXM_OSHMEM_TLS", tls, 1);
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
static inline void set_mxm_rc_tls()
|
|
||||||
{
|
|
||||||
char *tls;
|
|
||||||
|
|
||||||
tls = getenv("MXM_OSHMEM_HW_RDMA_TLS");
|
|
||||||
if (NULL != tls) {
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
return OSHMEM_ERROR;
|
||||||
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 1);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int check_mxm_hw_tls(char *v, char *tls)
|
||||||
|
{
|
||||||
|
if ((0 == strcmp(tls, "rc") || 0 == strcmp(tls, "dc")))
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
|
||||||
|
if (strstr(tls, "ud") &&
|
||||||
|
(NULL == strstr(tls, "rc") && NULL == strstr(tls, "dc") &&
|
||||||
|
NULL == strstr(tls, "shm")))
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
|
||||||
|
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
|
||||||
|
v, tls);
|
||||||
|
return OSHMEM_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int set_mxm_hw_rdma_tls()
|
||||||
|
{
|
||||||
|
if (!mca_spml_ikrit.hw_rdma_channel) {
|
||||||
|
return check_mxm_hw_tls("MXM_OSHMEM_TLS", getenv("MXM_OSHMEM_TLS"));
|
||||||
|
}
|
||||||
|
setenv("MXM_OSHMEM_HW_RDMA_RC_QP_LIMIT", "-1", 0);
|
||||||
|
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 0);
|
||||||
|
|
||||||
|
return check_mxm_hw_tls("MXM_OSHMEM_HW_RDMA_TLS",
|
||||||
|
getenv("MXM_OSHMEM_HW_RDMA_TLS"));
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void mca_spml_ikrit_param_register_int(const char* param_name,
|
static inline void mca_spml_ikrit_param_register_int(const char* param_name,
|
||||||
@ -157,6 +176,8 @@ static inline void mca_spml_ikrit_param_register_string(const char* param_name,
|
|||||||
|
|
||||||
static int mca_spml_ikrit_component_register(void)
|
static int mca_spml_ikrit_component_register(void)
|
||||||
{
|
{
|
||||||
|
char *v;
|
||||||
|
|
||||||
mca_spml_ikrit_param_register_int("free_list_num", 1024,
|
mca_spml_ikrit_param_register_int("free_list_num", 1024,
|
||||||
0,
|
0,
|
||||||
&mca_spml_ikrit.free_list_num);
|
&mca_spml_ikrit.free_list_num);
|
||||||
@ -178,8 +199,13 @@ static int mca_spml_ikrit_component_register(void)
|
|||||||
mca_spml_ikrit_param_register_int("hw_rdma_channel", 0,
|
mca_spml_ikrit_param_register_int("hw_rdma_channel", 0,
|
||||||
"create separate reliable connection channel",
|
"create separate reliable connection channel",
|
||||||
&mca_spml_ikrit.hw_rdma_channel);
|
&mca_spml_ikrit.hw_rdma_channel);
|
||||||
|
|
||||||
|
if (!mca_spml_ikrit.hw_rdma_channel)
|
||||||
|
v = "ud,self";
|
||||||
|
else
|
||||||
|
v = "rc,ud,self";
|
||||||
mca_spml_ikrit_param_register_string("mxm_tls",
|
mca_spml_ikrit_param_register_string("mxm_tls",
|
||||||
"rc,ud,self",
|
v,
|
||||||
"[string] TL channels for MXM",
|
"[string] TL channels for MXM",
|
||||||
&mca_spml_ikrit.mxm_tls);
|
&mca_spml_ikrit.mxm_tls);
|
||||||
|
|
||||||
@ -236,15 +262,16 @@ static int mca_spml_ikrit_component_open(void)
|
|||||||
|
|
||||||
mca_spml_ikrit.ud_only = 0;
|
mca_spml_ikrit.ud_only = 0;
|
||||||
#if MXM_API < MXM_VERSION(2,1)
|
#if MXM_API < MXM_VERSION(2,1)
|
||||||
mca_spml_ikrit.rc_channel = 0;
|
mca_spml_ikrit.hw_rdma_channel = 0;
|
||||||
if ((MXM_OK != mxm_config_read_context_opts(&mca_spml_ikrit.mxm_ctx_opts)) ||
|
if ((MXM_OK != mxm_config_read_context_opts(&mca_spml_ikrit.mxm_ctx_opts)) ||
|
||||||
(MXM_OK != mxm_config_read_ep_opts(&mca_spml_ikrit.mxm_ep_opts)))
|
(MXM_OK != mxm_config_read_ep_opts(&mca_spml_ikrit.mxm_ep_opts)))
|
||||||
#else
|
#else
|
||||||
if (OSHMEM_SUCCESS != set_mxm_tls()) {
|
if (OSHMEM_SUCCESS != set_mxm_tls()) {
|
||||||
return OSHMEM_ERROR;
|
return OSHMEM_ERROR;
|
||||||
}
|
}
|
||||||
set_mxm_rc_tls();
|
if (OSHMEM_SUCCESS != set_mxm_hw_rdma_tls()) {
|
||||||
|
return OSHMEM_ERROR;
|
||||||
|
}
|
||||||
if ((mca_spml_ikrit.hw_rdma_channel && MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
|
if ((mca_spml_ikrit.hw_rdma_channel && MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
|
||||||
&mca_spml_ikrit.mxm_ep_hw_rdma_opts,
|
&mca_spml_ikrit.mxm_ep_hw_rdma_opts,
|
||||||
"OSHMEM_HW_RDMA", NULL, 0)) ||
|
"OSHMEM_HW_RDMA", NULL, 0)) ||
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user