diff --git a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini index aeb4160b97..08a07ef461 100644 --- a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini @@ -190,18 +190,21 @@ vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 13 use_eager_rdma = 1 mtu = 2048 +max_inline_data = 0 [QLogic InfiniPath 2] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29216 use_eager_rdma = 1 mtu = 4096 +max_inline_data = 0 [QLogic InfiniPath 3] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29474 use_eager_rdma = 1 mtu = 4096 +max_inline_data = 0 ############################################################################ diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index bfbd612688..14e9ca03be 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -83,6 +83,7 @@ int ompi_mtl_psm_module_init() { psm_mq_t mq; psm_epid_t epid; /* unique lid+port identifier */ psm_uuid_t unique_job_key; + struct psm_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; @@ -103,7 +104,22 @@ int ompi_mtl_psm_module_init() { /* Handle our own errors for opening endpoints */ psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); - err = psm_ep_open(unique_job_key, NULL, &ep, &epid); + bzero((void*) &ep_opt, sizeof(ep_opt)); + ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9; + ep_opt.unit = ompi_mtl_psm.ib_unit; + ep_opt.affinity = -1; /* Let PSM choose affinity */ + ep_opt.shm_mbytes = -1; /* Choose PSM defaults */ + ep_opt.sendbufs_num = -1; /* Choose PSM defaults */ + +#if PSM_VERNO >= 0x0101 + ep_opt.network_pkey = ompi_mtl_psm.ib_pkey; +#endif + + ep_opt.port = ompi_mtl_psm.ib_port; + ep_opt.outsl = ompi_mtl_psm.ib_service_level; + + /* Open PSM endpoint */ + err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { opal_output(0, "Error in psm_ep_open (error %s)\n", psm_error_get_string(err)); diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index 348e069d88..67ca4030c6 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -65,11 +65,48 @@ static int ompi_mtl_psm_component_open(void) { - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, "connect_timeout", - "PSM connection timeout value in seconds", - false, false, 30, &ompi_mtl_psm.connect_timeout); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "connect_timeout", + "PSM connection timeout value in seconds", + false, false, 30, &ompi_mtl_psm.connect_timeout); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "debug", + "PSM debug level", + false, false, 1, + &ompi_mtl_psm.debug_level); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_unit", + "Truescale unit to use", + false, false, -1, + &ompi_mtl_psm.ib_unit); - return OMPI_SUCCESS; + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_port", + "Truescale port on unit to use", + false, false, 0, + &ompi_mtl_psm.ib_port); + + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_service_level", + "Infiniband service level" + "(0 <= SL <= 15)", + false, false, 0, &ompi_mtl_psm.ib_service_level); + + ompi_mtl_psm.ib_pkey = 0x7fffUL; + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_pkey", + "Infiniband partition key", + false, false, 0x7fffUL, + &ompi_mtl_psm.ib_pkey); + + if (ompi_mtl_psm.ib_service_level < 0) + ompi_mtl_psm.ib_service_level = 0; + else if (ompi_mtl_psm.ib_service_level > 15) + ompi_mtl_psm.ib_service_level = 15; + + return OMPI_SUCCESS; } @@ -96,7 +133,19 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, psm_error_get_string(err)); return NULL; } - + +#if PSM_VERNO >= 0x010c + /* Set infinipath debug level */ + err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG, + (const void*) &ompi_mtl_psm.debug_level, + sizeof(unsigned)); + if (err) { + /* Non fatal error. Can continue */ + opal_output(0, "Unable to set infinipath debug level (error %s)\n", + psm_error_get_string(err)); + } +#endif + /* Only allow for shm and ipath devices in 2.0 and earlier releases * (unless the user overrides the setting). */ @@ -112,9 +161,9 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, /* * Enable 'self' device only in a post-2.0 release(s) */ - if (verno_major == 0x1 && verno_minor >= 0x04) - setenv("PSM_DEVICES", "self,shm,ipath", 0); - + if (PSM_VERNO >= 0x0104) + setenv("PSM_DEVICES", "self,shm,ipath", 0); + ompi_mtl_psm_module_init(); ompi_mtl_psm.super.mtl_request_size = diff --git a/ompi/mca/mtl/psm/mtl_psm_send.c b/ompi/mca/mtl/psm/mtl_psm_send.c index ddc0852ad4..f0cee56ef3 100644 --- a/ompi/mca/mtl/psm/mtl_psm_send.c +++ b/ompi/mca/mtl/psm/mtl_psm_send.c @@ -69,11 +69,8 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, mqtag, mtl_psm_request.buf, length); -#if 0 - printf("send bits: 0x%016llx\n", mqtag); -#endif - if (mtl_psm_request.free_after) + if (mtl_psm_request.free_after) free(mtl_psm_request.buf); return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; @@ -117,10 +114,6 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; -#if 0 - printf("isend bits: 0x%016llx\n", mqtag); -#endif - psm_error = psm_mq_isend(ompi_mtl_psm.mq, psm_endpoint->peer_addr, flags, diff --git a/ompi/mca/mtl/psm/mtl_psm_types.h b/ompi/mca/mtl/psm/mtl_psm_types.h index 25ab0b9201..7b29cef202 100644 --- a/ompi/mca/mtl/psm/mtl_psm_types.h +++ b/ompi/mca/mtl/psm/mtl_psm_types.h @@ -40,8 +40,14 @@ extern "C" { struct mca_mtl_psm_module_t { mca_mtl_base_module_t super; /**< base MTL interface */ - int32_t connect_timeout; - + int32_t connect_timeout; + + uint32_t debug_level; + int32_t ib_unit; + int32_t ib_port; + int32_t ib_service_level; + uint64_t ib_pkey; + psm_ep_t ep; psm_mq_t mq; psm_epid_t epid;