Add in support to specify IB path record query mechanism and IB Application/Service ID for PSM MTL. Also fix a minor bug in calculating the minimum connection timeout.
This commit was SVN r22397.
Этот коммит содержится в:
родитель
ae1719306b
Коммит
774b965784
@ -38,3 +38,6 @@ Unable to post application receive buffer (psm_mq_irecv).
|
|||||||
Error: %s
|
Error: %s
|
||||||
Buffer: %p
|
Buffer: %p
|
||||||
Length: %d
|
Length: %d
|
||||||
|
#
|
||||||
|
[path query mechanism unknown]
|
||||||
|
Unknown path record query mechanism %s. Supported mechanisms are %s.
|
||||||
|
@ -108,7 +108,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
|
|||||||
|
|
||||||
/* Handle our own errors for opening endpoints */
|
/* Handle our own errors for opening endpoints */
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
||||||
|
|
||||||
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
|
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
|
||||||
* contexts correctly.
|
* contexts correctly.
|
||||||
*/
|
*/
|
||||||
@ -134,6 +134,11 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
|
|||||||
ep_opt.outsl = ompi_mtl_psm.ib_service_level;
|
ep_opt.outsl = ompi_mtl_psm.ib_service_level;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if PSM_VERNO >= 0x010d
|
||||||
|
ep_opt.service_id = ompi_mtl_psm.ib_service_id;
|
||||||
|
ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Open PSM endpoint */
|
/* Open PSM endpoint */
|
||||||
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
||||||
if (err) {
|
if (err) {
|
||||||
@ -232,6 +237,10 @@ ompi_mtl_psm_connect_error_msg(psm_error_t err)
|
|||||||
# define min(a,b) ((a) < (b) ? (a) : (b))
|
# define min(a,b) ((a) < (b) ? (a) : (b))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef max
|
||||||
|
# define max(a,b) ((a) > (b) ? (a) : (b))
|
||||||
|
#endif
|
||||||
|
|
||||||
int
|
int
|
||||||
ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||||
size_t nprocs,
|
size_t nprocs,
|
||||||
@ -275,10 +284,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
epids_in[i] = *epid;
|
epids_in[i] = *epid;
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout_in_secs = min(180, 0.5 * nprocs);
|
timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs);
|
||||||
if (ompi_mtl_psm.connect_timeout < timeout_in_secs) {
|
|
||||||
timeout_in_secs = ompi_mtl_psm.connect_timeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);
|
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006-2009 QLogic Corporation. All rights reserved.
|
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
static int ompi_mtl_psm_component_open(void);
|
static int ompi_mtl_psm_component_open(void);
|
||||||
static int ompi_mtl_psm_component_close(void);
|
static int ompi_mtl_psm_component_close(void);
|
||||||
|
static int ompi_mtl_psm_component_register(void);
|
||||||
|
|
||||||
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
|
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
|
||||||
bool enable_mpi_threads );
|
bool enable_mpi_threads );
|
||||||
@ -51,7 +52,9 @@ mca_mtl_psm_component_t mca_mtl_psm_component = {
|
|||||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||||
ompi_mtl_psm_component_open, /* component open */
|
ompi_mtl_psm_component_open, /* component open */
|
||||||
ompi_mtl_psm_component_close /* component close */
|
ompi_mtl_psm_component_close, /* component close */
|
||||||
|
NULL,
|
||||||
|
ompi_mtl_psm_component_register
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
/* The component is not checkpoint ready */
|
/* The component is not checkpoint ready */
|
||||||
@ -64,14 +67,16 @@ mca_mtl_psm_component_t mca_mtl_psm_component = {
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ompi_mtl_psm_component_open(void)
|
ompi_mtl_psm_component_register(void)
|
||||||
{
|
{
|
||||||
int value;
|
int value;
|
||||||
|
char *service_id = NULL;
|
||||||
|
char *path_res = NULL;
|
||||||
|
|
||||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||||
"connect_timeout",
|
"connect_timeout",
|
||||||
"PSM connection timeout value in seconds",
|
"PSM connection timeout value in seconds",
|
||||||
false, false, 30, &ompi_mtl_psm.connect_timeout);
|
false, false, 180, &ompi_mtl_psm.connect_timeout);
|
||||||
|
|
||||||
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version,
|
||||||
"debug",
|
"debug",
|
||||||
@ -106,6 +111,34 @@ ompi_mtl_psm_component_open(void)
|
|||||||
&value);
|
&value);
|
||||||
ompi_mtl_psm.ib_pkey = value;
|
ompi_mtl_psm.ib_pkey = value;
|
||||||
|
|
||||||
|
#if PSM_VERNO >= 0x010d
|
||||||
|
mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version,
|
||||||
|
"ib_service_id",
|
||||||
|
"Infiniband service ID to use for application (default is 0)",
|
||||||
|
false, false, "0x1000117500000000",
|
||||||
|
&service_id);
|
||||||
|
ompi_mtl_psm.ib_service_id = (uint64_t) strtoull(service_id, NULL, 0);
|
||||||
|
|
||||||
|
mca_base_param_reg_string(&mca_mtl_psm_component.super.mtl_version,
|
||||||
|
"path_query",
|
||||||
|
"Path record query mechanisms (valid values: opp, none)",
|
||||||
|
false, false, NULL, &path_res);
|
||||||
|
if ((NULL != path_res) && strcasecmp(path_res, "none")) {
|
||||||
|
if (!strcasecmp(path_res, "opp"))
|
||||||
|
ompi_mtl_psm.path_res_type = PSM_PATH_RES_OPP;
|
||||||
|
else {
|
||||||
|
orte_show_help("help-mtl-psm.txt",
|
||||||
|
"path query mechanism unknown", true,
|
||||||
|
path_res, "OfedPlus (opp) | Static Routes (none)");
|
||||||
|
return OMPI_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Default is "static/none" path record queries */
|
||||||
|
ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (ompi_mtl_psm.ib_service_level < 0) {
|
if (ompi_mtl_psm.ib_service_level < 0) {
|
||||||
ompi_mtl_psm.ib_service_level = 0;
|
ompi_mtl_psm.ib_service_level = 0;
|
||||||
} else if (ompi_mtl_psm.ib_service_level > 15) {
|
} else if (ompi_mtl_psm.ib_service_level > 15) {
|
||||||
@ -116,6 +149,11 @@ ompi_mtl_psm_component_open(void)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ompi_mtl_psm_component_open(void)
|
||||||
|
{
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ompi_mtl_psm_component_close(void)
|
ompi_mtl_psm_component_close(void)
|
||||||
|
@ -46,6 +46,11 @@ struct mca_mtl_psm_module_t {
|
|||||||
int32_t ib_service_level;
|
int32_t ib_service_level;
|
||||||
uint64_t ib_pkey;
|
uint64_t ib_pkey;
|
||||||
|
|
||||||
|
#if PSM_VERNO >= 0x010d
|
||||||
|
uint64_t ib_service_id;
|
||||||
|
psm_path_res_t path_res_type;
|
||||||
|
#endif
|
||||||
|
|
||||||
psm_ep_t ep;
|
psm_ep_t ep;
|
||||||
psm_mq_t mq;
|
psm_mq_t mq;
|
||||||
psm_epid_t epid;
|
psm_epid_t epid;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user