From 38e48d4e2f80777df2b2b6dcb899e063f1fc31eb Mon Sep 17 00:00:00 2001 From: Avneesh Pant Date: Fri, 24 Jul 2009 20:09:39 +0000 Subject: [PATCH] Add support for MCA parameters for PSM MTL to specify IB unit, port, IB service level and PSM debug level to use. Also specify in the openib btl params file that QLogic hardware supports a max inlined messages size of 0 only. This commit was SVN r21734. --- .../openib/mca-btl-openib-device-params.ini | 3 + ompi/mca/mtl/psm/mtl_psm.c | 18 ++++- ompi/mca/mtl/psm/mtl_psm_component.c | 65 ++++++++++++++++--- ompi/mca/mtl/psm/mtl_psm_send.c | 9 +-- ompi/mca/mtl/psm/mtl_psm_types.h | 10 ++- 5 files changed, 86 insertions(+), 19 deletions(-) diff --git a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini index aeb4160b97..08a07ef461 100644 --- a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini @@ -190,18 +190,21 @@ vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 13 use_eager_rdma = 1 mtu = 2048 +max_inline_data = 0 [QLogic InfiniPath 2] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29216 use_eager_rdma = 1 mtu = 4096 +max_inline_data = 0 [QLogic InfiniPath 3] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29474 use_eager_rdma = 1 mtu = 4096 +max_inline_data = 0 ############################################################################ diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index bfbd612688..14e9ca03be 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -83,6 +83,7 @@ int ompi_mtl_psm_module_init() { psm_mq_t mq; psm_epid_t epid; /* unique lid+port identifier */ psm_uuid_t unique_job_key; + struct psm_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; @@ -103,7 +104,22 @@ int ompi_mtl_psm_module_init() { /* Handle our own errors for opening endpoints */ psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); - err = psm_ep_open(unique_job_key, NULL, &ep, &epid); + bzero((void*) &ep_opt, sizeof(ep_opt)); + ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9; + ep_opt.unit = ompi_mtl_psm.ib_unit; + ep_opt.affinity = -1; /* Let PSM choose affinity */ + ep_opt.shm_mbytes = -1; /* Choose PSM defaults */ + ep_opt.sendbufs_num = -1; /* Choose PSM defaults */ + +#if PSM_VERNO >= 0x0101 + ep_opt.network_pkey = ompi_mtl_psm.ib_pkey; +#endif + + ep_opt.port = ompi_mtl_psm.ib_port; + ep_opt.outsl = ompi_mtl_psm.ib_service_level; + + /* Open PSM endpoint */ + err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { opal_output(0, "Error in psm_ep_open (error %s)\n", psm_error_get_string(err)); diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index 348e069d88..67ca4030c6 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -65,11 +65,48 @@ static int ompi_mtl_psm_component_open(void) { - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, "connect_timeout", - "PSM connection timeout value in seconds", - false, false, 30, &ompi_mtl_psm.connect_timeout); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "connect_timeout", + "PSM connection timeout value in seconds", + false, false, 30, &ompi_mtl_psm.connect_timeout); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "debug", + "PSM debug level", + false, false, 1, + &ompi_mtl_psm.debug_level); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_unit", + "Truescale unit to use", + false, false, -1, + &ompi_mtl_psm.ib_unit); - return OMPI_SUCCESS; + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_port", + "Truescale port on unit to use", + false, false, 0, + &ompi_mtl_psm.ib_port); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_service_level", + "Infiniband service level" + "(0 <= SL <= 15)", + false, false, 0, &ompi_mtl_psm.ib_service_level); + + ompi_mtl_psm.ib_pkey = 0x7fffUL; + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_pkey", + "Infiniband partition key", + false, false, 0x7fffUL, + &ompi_mtl_psm.ib_pkey); + + if (ompi_mtl_psm.ib_service_level < 0) + ompi_mtl_psm.ib_service_level = 0; + else if (ompi_mtl_psm.ib_service_level > 15) + ompi_mtl_psm.ib_service_level = 15; + + return OMPI_SUCCESS; } @@ -96,7 +133,19 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, psm_error_get_string(err)); return NULL; } - + +#if PSM_VERNO >= 0x010c + /* Set infinipath debug level */ + err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG, + (const void*) &ompi_mtl_psm.debug_level, + sizeof(unsigned)); + if (err) { + /* Non fatal error. Can continue */ + opal_output(0, "Unable to set infinipath debug level (error %s)\n", + psm_error_get_string(err)); + } +#endif + /* Only allow for shm and ipath devices in 2.0 and earlier releases * (unless the user overrides the setting). */ @@ -112,9 +161,9 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, /* * Enable 'self' device only in a post-2.0 release(s) */ - if (verno_major == 0x1 && verno_minor >= 0x04) - setenv("PSM_DEVICES", "self,shm,ipath", 0); - + if (PSM_VERNO >= 0x0104) + setenv("PSM_DEVICES", "self,shm,ipath", 0); + ompi_mtl_psm_module_init(); ompi_mtl_psm.super.mtl_request_size = diff --git a/ompi/mca/mtl/psm/mtl_psm_send.c b/ompi/mca/mtl/psm/mtl_psm_send.c index ddc0852ad4..f0cee56ef3 100644 --- a/ompi/mca/mtl/psm/mtl_psm_send.c +++ b/ompi/mca/mtl/psm/mtl_psm_send.c @@ -69,11 +69,8 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, mqtag, mtl_psm_request.buf, length); -#if 0 - printf("send bits: 0x%016llx\n", mqtag); -#endif - if (mtl_psm_request.free_after) + if (mtl_psm_request.free_after) free(mtl_psm_request.buf); return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; @@ -117,10 +114,6 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; -#if 0 - printf("isend bits: 0x%016llx\n", mqtag); -#endif - psm_error = psm_mq_isend(ompi_mtl_psm.mq, psm_endpoint->peer_addr, flags, diff --git a/ompi/mca/mtl/psm/mtl_psm_types.h b/ompi/mca/mtl/psm/mtl_psm_types.h index 25ab0b9201..7b29cef202 100644 --- a/ompi/mca/mtl/psm/mtl_psm_types.h +++ b/ompi/mca/mtl/psm/mtl_psm_types.h @@ -40,8 +40,14 @@ extern "C" { struct mca_mtl_psm_module_t { mca_mtl_base_module_t super; /**< base MTL interface */ - int32_t connect_timeout; - + int32_t connect_timeout; + + uint32_t debug_level; + int32_t ib_unit; + int32_t ib_port; + int32_t ib_service_level; + uint64_t ib_pkey; + psm_ep_t ep; psm_mq_t mq; psm_epid_t epid;