From 774b9657842e7999fdd41279b9ca251fc6f61a1a Mon Sep 17 00:00:00 2001 From: Avneesh Pant Date: Wed, 13 Jan 2010 18:58:00 +0000 Subject: [PATCH] Add in support to specify IB path record query mechanism and IB Application/Service ID for PSM MTL. Also fix a minor bug in calculating the minimum connection timeout. This commit was SVN r22397. --- ompi/mca/mtl/psm/help-mtl-psm.txt | 3 ++ ompi/mca/mtl/psm/mtl_psm.c | 16 +++++++--- ompi/mca/mtl/psm/mtl_psm_component.c | 46 +++++++++++++++++++++++++--- ompi/mca/mtl/psm/mtl_psm_types.h | 5 +++ 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/ompi/mca/mtl/psm/help-mtl-psm.txt b/ompi/mca/mtl/psm/help-mtl-psm.txt index 9514def12b..7ca60a5396 100644 --- a/ompi/mca/mtl/psm/help-mtl-psm.txt +++ b/ompi/mca/mtl/psm/help-mtl-psm.txt @@ -38,3 +38,6 @@ Unable to post application receive buffer (psm_mq_irecv). Error: %s Buffer: %p Length: %d +# +[path query mechanism unknown] +Unknown path record query mechanism %s. Supported mechanisms are %s. diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index 04f4b9bcc6..cc0844c7b0 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -108,7 +108,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { /* Handle our own errors for opening endpoints */ psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); - + /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware * contexts correctly. */ @@ -134,6 +134,11 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { ep_opt.outsl = ompi_mtl_psm.ib_service_level; #endif +#if PSM_VERNO >= 0x010d + ep_opt.service_id = ompi_mtl_psm.ib_service_id; + ep_opt.path_res_type = ompi_mtl_psm.path_res_type; +#endif + /* Open PSM endpoint */ err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { @@ -232,6 +237,10 @@ ompi_mtl_psm_connect_error_msg(psm_error_t err) # define min(a,b) ((a) < (b) ? (a) : (b)) #endif +#ifndef max +# define max(a,b) ((a) > (b) ? (a) : (b)) +#endif + int ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, @@ -275,10 +284,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, epids_in[i] = *epid; } - timeout_in_secs = min(180, 0.5 * nprocs); - if (ompi_mtl_psm.connect_timeout < timeout_in_secs) { - timeout_in_secs = ompi_mtl_psm.connect_timeout; - } + timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs); psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP); diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index b8399c50f2..5e8650b4d8 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +33,7 @@ static int ompi_mtl_psm_component_open(void); static int ompi_mtl_psm_component_close(void); +static int ompi_mtl_psm_component_register(void); static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads, bool enable_mpi_threads ); @@ -51,7 +52,9 @@ mca_mtl_psm_component_t mca_mtl_psm_component = { OMPI_MINOR_VERSION, /* MCA component minor version */ OMPI_RELEASE_VERSION, /* MCA component release version */ ompi_mtl_psm_component_open, /* component open */ - ompi_mtl_psm_component_close /* component close */ + ompi_mtl_psm_component_close, /* component close */ + NULL, + ompi_mtl_psm_component_register }, { /* The component is not checkpoint ready */ @@ -64,14 +67,16 @@ mca_mtl_psm_component_t mca_mtl_psm_component = { static int -ompi_mtl_psm_component_open(void) +ompi_mtl_psm_component_register(void) { int value; + char *service_id = NULL; + char *path_res = NULL; mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, "connect_timeout", "PSM connection timeout value in seconds", - false, false, 30, &ompi_mtl_psm.connect_timeout); + false, false, 180, &ompi_mtl_psm.connect_timeout); mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, "debug", @@ -106,6 +111,34 @@ ompi_mtl_psm_component_open(void) &value); ompi_mtl_psm.ib_pkey = value; +#if PSM_VERNO >= 0x010d + mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version, + "ib_service_id", + "Infiniband service ID to use for application (default is 0)", + false, false, "0x1000117500000000", + &service_id); + ompi_mtl_psm.ib_service_id = (uint64_t) strtoull(service_id, NULL, 0); + + mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version, + "path_query", + "Path record query mechanisms (valid values: opp, none)", + false, false, NULL, &path_res); + if ((NULL != path_res) && strcasecmp(path_res, "none")) { + if (!strcasecmp(path_res, "opp")) + ompi_mtl_psm.path_res_type = PSM_PATH_RES_OPP; + else { + orte_show_help("help-mtl-psm.txt", + "path query mechanism unknown", true, + path_res, "OfedPlus (opp) | Static Routes (none)"); + return OMPI_ERR_NOT_FOUND; + } + } + else { + /* Default is "static/none" path record queries */ + ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE; + } +#endif + if (ompi_mtl_psm.ib_service_level < 0) { ompi_mtl_psm.ib_service_level = 0; } else if (ompi_mtl_psm.ib_service_level > 15) { @@ -116,6 +149,11 @@ ompi_mtl_psm_component_open(void) } +static int +ompi_mtl_psm_component_open(void) +{ + return OMPI_SUCCESS; +} static int ompi_mtl_psm_component_close(void) diff --git a/ompi/mca/mtl/psm/mtl_psm_types.h b/ompi/mca/mtl/psm/mtl_psm_types.h index fd58d4b357..ceedf8fd26 100644 --- a/ompi/mca/mtl/psm/mtl_psm_types.h +++ b/ompi/mca/mtl/psm/mtl_psm_types.h @@ -46,6 +46,11 @@ struct mca_mtl_psm_module_t { int32_t ib_service_level; uint64_t ib_pkey; +#if PSM_VERNO >= 0x010d + uint64_t ib_service_id; + psm_path_res_t path_res_type; +#endif + psm_ep_t ep; psm_mq_t mq; psm_epid_t epid;