1
1

Merge branch 'topic/oshmem_spml_ikrit_hw_rdma_channel-fix1'

Этот коммит содержится в:
Alex Mikheev 2014-11-04 12:03:06 +02:00
родитель a4c0019153 1f2ab43ba9
Коммит a2c85c6d16
2 изменённых файлов: 52 добавлений и 31 удалений

Просмотреть файл

@ -7,45 +7,39 @@
#
# $HEADER$
#
[unable to create endpoint]
MXM was unable to create an endpoint. Please make sure that the network link is
active on the node and the hardware is functioning.
Error: %s
#
[unable to get endpoint address]
MXM was unable to get endpoint address
Error: %s
#
[mxm mq create]
Failed to create MQ for endpoint
Error: %s
#
[errors during mxm_progress]
Error %s occurred in attempting to make network progress (mxm_progress).
#
[mxm init]
Initialization of MXM library failed.
Error: %s
#
[mxm shm tls]
ERROR: MXM shared memory transport can not be used
bacause it is not fully compliant with OSHMEM spec
MXM transport setting: %s
#
[mxm tls]
ERROR: valid mxm transports are:
"ud" "ud,self" "rc" or "dc"
transport setting is: %s=%s
#

Просмотреть файл

@ -74,7 +74,7 @@ static inline int check_mxm_tls(char *var)
"%s=%s",
var, getenv(var)
)) {
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm shm tls", true,
str);
free(str);
}
@ -108,21 +108,40 @@ static inline int set_mxm_tls()
setenv("MXM_OSHMEM_TLS", mca_spml_ikrit.mxm_tls, 1);
return OSHMEM_SUCCESS;
}
return check_mxm_tls("MXM_TLS");
}
static inline void set_mxm_rc_tls()
{
char *tls;
tls = getenv("MXM_OSHMEM_HW_RDMA_TLS");
if (NULL != tls) {
return;
if (OSHMEM_SUCCESS == check_mxm_tls("MXM_TLS")) {
setenv("MXM_OSHMEM_TLS", tls, 1);
return OSHMEM_SUCCESS;
}
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 1);
return;
return OSHMEM_ERROR;
}
static inline int check_mxm_hw_tls(char *v, char *tls)
{
if ((0 == strcmp(tls, "rc") || 0 == strcmp(tls, "dc")))
return OSHMEM_SUCCESS;
if (strstr(tls, "ud") &&
(NULL == strstr(tls, "rc") && NULL == strstr(tls, "dc") &&
NULL == strstr(tls, "shm")))
return OSHMEM_SUCCESS;
orte_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
v, tls);
return OSHMEM_ERROR;
}
static inline int set_mxm_hw_rdma_tls()
{
if (!mca_spml_ikrit.hw_rdma_channel) {
return check_mxm_hw_tls("MXM_OSHMEM_TLS", getenv("MXM_OSHMEM_TLS"));
}
setenv("MXM_OSHMEM_HW_RDMA_RC_QP_LIMIT", "-1", 0);
setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 0);
return check_mxm_hw_tls("MXM_OSHMEM_HW_RDMA_TLS",
getenv("MXM_OSHMEM_HW_RDMA_TLS"));
}
#endif
static inline void mca_spml_ikrit_param_register_int(const char* param_name,
@ -157,6 +176,8 @@ static inline void mca_spml_ikrit_param_register_string(const char* param_name,
static int mca_spml_ikrit_component_register(void)
{
char *v;
mca_spml_ikrit_param_register_int("free_list_num", 1024,
0,
&mca_spml_ikrit.free_list_num);
@ -178,8 +199,13 @@ static int mca_spml_ikrit_component_register(void)
mca_spml_ikrit_param_register_int("hw_rdma_channel", 0,
"create separate reliable connection channel",
&mca_spml_ikrit.hw_rdma_channel);
if (!mca_spml_ikrit.hw_rdma_channel)
v = "ud,self";
else
v = "rc,ud,self";
mca_spml_ikrit_param_register_string("mxm_tls",
"rc,ud,self",
v,
"[string] TL channels for MXM",
&mca_spml_ikrit.mxm_tls);
@ -236,15 +262,16 @@ static int mca_spml_ikrit_component_open(void)
mca_spml_ikrit.ud_only = 0;
#if MXM_API < MXM_VERSION(2,1)
mca_spml_ikrit.rc_channel = 0;
mca_spml_ikrit.hw_rdma_channel = 0;
if ((MXM_OK != mxm_config_read_context_opts(&mca_spml_ikrit.mxm_ctx_opts)) ||
(MXM_OK != mxm_config_read_ep_opts(&mca_spml_ikrit.mxm_ep_opts)))
#else
if (OSHMEM_SUCCESS != set_mxm_tls()) {
return OSHMEM_ERROR;
}
set_mxm_rc_tls();
if (OSHMEM_SUCCESS != set_mxm_hw_rdma_tls()) {
return OSHMEM_ERROR;
}
if ((mca_spml_ikrit.hw_rdma_channel && MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
&mca_spml_ikrit.mxm_ep_hw_rdma_opts,
"OSHMEM_HW_RDMA", NULL, 0)) ||