1
1

Add in support to specify IB path record query mechanism and IB Application/Service ID for PSM MTL. Also fix a minor bug in calculating the minimum connection timeout.

This commit was SVN r22397.
Этот коммит содержится в:
Avneesh Pant 2010-01-13 18:58:00 +00:00
родитель ae1719306b
Коммит 774b965784
4 изменённых файлов: 61 добавлений и 9 удалений

Просмотреть файл

@ -38,3 +38,6 @@ Unable to post application receive buffer (psm_mq_irecv).
Error: %s Error: %s
Buffer: %p Buffer: %p
Length: %d Length: %d
#
[path query mechanism unknown]
Unknown path record query mechanism %s. Supported mechanisms are %s.

Просмотреть файл

@ -108,7 +108,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
/* Handle our own errors for opening endpoints */ /* Handle our own errors for opening endpoints */
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
* contexts correctly. * contexts correctly.
*/ */
@ -134,6 +134,11 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
ep_opt.outsl = ompi_mtl_psm.ib_service_level; ep_opt.outsl = ompi_mtl_psm.ib_service_level;
#endif #endif
#if PSM_VERNO >= 0x010d
ep_opt.service_id = ompi_mtl_psm.ib_service_id;
ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
#endif
/* Open PSM endpoint */ /* Open PSM endpoint */
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
if (err) { if (err) {
@ -232,6 +237,10 @@ ompi_mtl_psm_connect_error_msg(psm_error_t err)
# define min(a,b) ((a) < (b) ? (a) : (b)) # define min(a,b) ((a) < (b) ? (a) : (b))
#endif #endif
#ifndef max
# define max(a,b) ((a) > (b) ? (a) : (b))
#endif
int int
ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
size_t nprocs, size_t nprocs,
@ -275,10 +284,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
epids_in[i] = *epid; epids_in[i] = *epid;
} }
timeout_in_secs = min(180, 0.5 * nprocs); timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs);
if (ompi_mtl_psm.connect_timeout < timeout_in_secs) {
timeout_in_secs = ompi_mtl_psm.connect_timeout;
}
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP); psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -33,6 +33,7 @@
static int ompi_mtl_psm_component_open(void); static int ompi_mtl_psm_component_open(void);
static int ompi_mtl_psm_component_close(void); static int ompi_mtl_psm_component_close(void);
static int ompi_mtl_psm_component_register(void);
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads, static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
bool enable_mpi_threads ); bool enable_mpi_threads );
@ -51,7 +52,9 @@ mca_mtl_psm_component_t mca_mtl_psm_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */ OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */ OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_psm_component_open, /* component open */ ompi_mtl_psm_component_open, /* component open */
ompi_mtl_psm_component_close /* component close */ ompi_mtl_psm_component_close, /* component close */
NULL,
ompi_mtl_psm_component_register
}, },
{ {
/* The component is not checkpoint ready */ /* The component is not checkpoint ready */
@ -64,14 +67,16 @@ mca_mtl_psm_component_t mca_mtl_psm_component = {
static int static int
ompi_mtl_psm_component_open(void) ompi_mtl_psm_component_register(void)
{ {
int value; int value;
char *service_id = NULL;
char *path_res = NULL;
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
"connect_timeout", "connect_timeout",
"PSM connection timeout value in seconds", "PSM connection timeout value in seconds",
false, false, 30, &ompi_mtl_psm.connect_timeout); false, false, 180, &ompi_mtl_psm.connect_timeout);
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
"debug", "debug",
@ -106,6 +111,34 @@ ompi_mtl_psm_component_open(void)
&value); &value);
ompi_mtl_psm.ib_pkey = value; ompi_mtl_psm.ib_pkey = value;
#if PSM_VERNO >= 0x010d
mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version,
"ib_service_id",
"Infiniband service ID to use for application (default is 0)",
false, false, "0x1000117500000000",
&service_id);
ompi_mtl_psm.ib_service_id = (uint64_t) strtoull(service_id, NULL, 0);
mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version,
"path_query",
"Path record query mechanisms (valid values: opp, none)",
false, false, NULL, &path_res);
if ((NULL != path_res) && strcasecmp(path_res, "none")) {
if (!strcasecmp(path_res, "opp"))
ompi_mtl_psm.path_res_type = PSM_PATH_RES_OPP;
else {
orte_show_help("help-mtl-psm.txt",
"path query mechanism unknown", true,
path_res, "OfedPlus (opp) | Static Routes (none)");
return OMPI_ERR_NOT_FOUND;
}
}
else {
/* Default is "static/none" path record queries */
ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE;
}
#endif
if (ompi_mtl_psm.ib_service_level < 0) { if (ompi_mtl_psm.ib_service_level < 0) {
ompi_mtl_psm.ib_service_level = 0; ompi_mtl_psm.ib_service_level = 0;
} else if (ompi_mtl_psm.ib_service_level > 15) { } else if (ompi_mtl_psm.ib_service_level > 15) {
@ -116,6 +149,11 @@ ompi_mtl_psm_component_open(void)
} }
static int
ompi_mtl_psm_component_open(void)
{
return OMPI_SUCCESS;
}
static int static int
ompi_mtl_psm_component_close(void) ompi_mtl_psm_component_close(void)

Просмотреть файл

@ -46,6 +46,11 @@ struct mca_mtl_psm_module_t {
int32_t ib_service_level; int32_t ib_service_level;
uint64_t ib_pkey; uint64_t ib_pkey;
#if PSM_VERNO >= 0x010d
uint64_t ib_service_id;
psm_path_res_t path_res_type;
#endif
psm_ep_t ep; psm_ep_t ep;
psm_mq_t mq; psm_mq_t mq;
psm_epid_t epid; psm_epid_t epid;