Add support for MCA parameters for PSM MTL to specify IB unit, port, IB service level and PSM debug level to use. Also specify in the openib btl params file that QLogic hardware supports a max inlined messages size of 0 only.
This commit was SVN r21734.
Этот коммит содержится в:
родитель
3d265e859d
Коммит
38e48d4e2f
@ -190,18 +190,21 @@ vendor_id = 0x1fc1,0x1077,0x1175
|
||||
vendor_part_id = 13
|
||||
use_eager_rdma = 1
|
||||
mtu = 2048
|
||||
max_inline_data = 0
|
||||
|
||||
[QLogic InfiniPath 2]
|
||||
vendor_id = 0x1fc1,0x1077,0x1175
|
||||
vendor_part_id = 16,29216
|
||||
use_eager_rdma = 1
|
||||
mtu = 4096
|
||||
max_inline_data = 0
|
||||
|
||||
[QLogic InfiniPath 3]
|
||||
vendor_id = 0x1fc1,0x1077,0x1175
|
||||
vendor_part_id = 16,29474
|
||||
use_eager_rdma = 1
|
||||
mtu = 4096
|
||||
max_inline_data = 0
|
||||
|
||||
############################################################################
|
||||
|
||||
|
@ -83,6 +83,7 @@ int ompi_mtl_psm_module_init() {
|
||||
psm_mq_t mq;
|
||||
psm_epid_t epid; /* unique lid+port identifier */
|
||||
psm_uuid_t unique_job_key;
|
||||
struct psm_ep_open_opts ep_opt;
|
||||
unsigned long long *uu = (unsigned long long *) unique_job_key;
|
||||
char *generated_key;
|
||||
|
||||
@ -103,7 +104,22 @@ int ompi_mtl_psm_module_init() {
|
||||
/* Handle our own errors for opening endpoints */
|
||||
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
||||
|
||||
err = psm_ep_open(unique_job_key, NULL, &ep, &epid);
|
||||
bzero((void*) &ep_opt, sizeof(ep_opt));
|
||||
ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
|
||||
ep_opt.unit = ompi_mtl_psm.ib_unit;
|
||||
ep_opt.affinity = -1; /* Let PSM choose affinity */
|
||||
ep_opt.shm_mbytes = -1; /* Choose PSM defaults */
|
||||
ep_opt.sendbufs_num = -1; /* Choose PSM defaults */
|
||||
|
||||
#if PSM_VERNO >= 0x0101
|
||||
ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
|
||||
#endif
|
||||
|
||||
ep_opt.port = ompi_mtl_psm.ib_port;
|
||||
ep_opt.outsl = ompi_mtl_psm.ib_service_level;
|
||||
|
||||
/* Open PSM endpoint */
|
||||
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
||||
if (err) {
|
||||
opal_output(0, "Error in psm_ep_open (error %s)\n",
|
||||
psm_error_get_string(err));
|
||||
|
@ -65,11 +65,48 @@ static int
|
||||
ompi_mtl_psm_component_open(void)
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, "connect_timeout",
|
||||
"PSM connection timeout value in seconds",
|
||||
false, false, 30, &ompi_mtl_psm.connect_timeout);
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"connect_timeout",
|
||||
"PSM connection timeout value in seconds",
|
||||
false, false, 30, &ompi_mtl_psm.connect_timeout);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"debug",
|
||||
"PSM debug level",
|
||||
false, false, 1,
|
||||
&ompi_mtl_psm.debug_level);
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"ib_unit",
|
||||
"Truescale unit to use",
|
||||
false, false, -1,
|
||||
&ompi_mtl_psm.ib_unit);
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"ib_port",
|
||||
"Truescale port on unit to use",
|
||||
false, false, 0,
|
||||
&ompi_mtl_psm.ib_port);
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"ib_service_level",
|
||||
"Infiniband service level"
|
||||
"(0 <= SL <= 15)",
|
||||
false, false, 0, &ompi_mtl_psm.ib_service_level);
|
||||
|
||||
ompi_mtl_psm.ib_pkey = 0x7fffUL;
|
||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||
"ib_pkey",
|
||||
"Infiniband partition key",
|
||||
false, false, 0x7fffUL,
|
||||
&ompi_mtl_psm.ib_pkey);
|
||||
|
||||
if (ompi_mtl_psm.ib_service_level < 0)
|
||||
ompi_mtl_psm.ib_service_level = 0;
|
||||
else if (ompi_mtl_psm.ib_service_level > 15)
|
||||
ompi_mtl_psm.ib_service_level = 15;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
@ -97,6 +134,18 @@ ompi_mtl_psm_component_init(bool enable_progress_threads,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if PSM_VERNO >= 0x010c
|
||||
/* Set infinipath debug level */
|
||||
err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG,
|
||||
(const void*) &ompi_mtl_psm.debug_level,
|
||||
sizeof(unsigned));
|
||||
if (err) {
|
||||
/* Non fatal error. Can continue */
|
||||
opal_output(0, "Unable to set infinipath debug level (error %s)\n",
|
||||
psm_error_get_string(err));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Only allow for shm and ipath devices in 2.0 and earlier releases
|
||||
* (unless the user overrides the setting).
|
||||
*/
|
||||
@ -112,8 +161,8 @@ ompi_mtl_psm_component_init(bool enable_progress_threads,
|
||||
/*
|
||||
* Enable 'self' device only in a post-2.0 release(s)
|
||||
*/
|
||||
if (verno_major == 0x1 && verno_minor >= 0x04)
|
||||
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
||||
if (PSM_VERNO >= 0x0104)
|
||||
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
||||
|
||||
ompi_mtl_psm_module_init();
|
||||
|
||||
|
@ -69,9 +69,6 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
||||
mqtag,
|
||||
mtl_psm_request.buf,
|
||||
length);
|
||||
#if 0
|
||||
printf("send bits: 0x%016llx\n", mqtag);
|
||||
#endif
|
||||
|
||||
if (mtl_psm_request.free_after)
|
||||
free(mtl_psm_request.buf);
|
||||
@ -117,10 +114,6 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
||||
flags |= PSM_MQ_FLAG_SENDSYNC;
|
||||
|
||||
#if 0
|
||||
printf("isend bits: 0x%016llx\n", mqtag);
|
||||
#endif
|
||||
|
||||
psm_error = psm_mq_isend(ompi_mtl_psm.mq,
|
||||
psm_endpoint->peer_addr,
|
||||
flags,
|
||||
|
@ -40,7 +40,13 @@ extern "C" {
|
||||
struct mca_mtl_psm_module_t {
|
||||
mca_mtl_base_module_t super; /**< base MTL interface */
|
||||
|
||||
int32_t connect_timeout;
|
||||
int32_t connect_timeout;
|
||||
|
||||
uint32_t debug_level;
|
||||
int32_t ib_unit;
|
||||
int32_t ib_port;
|
||||
int32_t ib_service_level;
|
||||
uint64_t ib_pkey;
|
||||
|
||||
psm_ep_t ep;
|
||||
psm_mq_t mq;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user