366 строки
13 KiB
C
366 строки
13 KiB
C
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||
|
/*
|
||
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||
|
* University Research and Technology
|
||
|
* Corporation. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||
|
* of Tennessee Research Foundation. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||
|
* University of Stuttgart. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||
|
* All rights reserved.
|
||
|
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
|
||
|
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
|
||
|
* All rights reserved.
|
||
|
* Copyright (c) 2014 Intel Corporation. All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
#include "ompi_config.h"
|
||
|
|
||
|
#include "opal/mca/event/event.h"
|
||
|
#include "opal/util/output.h"
|
||
|
#include "opal/util/show_help.h"
|
||
|
#include "ompi/proc/proc.h"
|
||
|
|
||
|
#include "mtl_psm.h"
|
||
|
#include "mtl_psm_types.h"
|
||
|
#include "mtl_psm_request.h"
|
||
|
|
||
|
#include "psm.h"
|
||
|
|
||
|
#include <sys/types.h>
|
||
|
#include <sys/stat.h>
|
||
|
#include <unistd.h>
|
||
|
#include <glob.h>
|
||
|
|
||
|
static int param_priority;
|
||
|
|
||
|
static int ompi_mtl_psm_component_open(void);
|
||
|
static int ompi_mtl_psm_component_close(void);
|
||
|
static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority);
|
||
|
static int ompi_mtl_psm_component_register(void);
|
||
|
|
||
|
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
|
||
|
bool enable_mpi_threads );
|
||
|
|
||
|
mca_mtl_psm_component_t mca_mtl_psm_component = {
|
||
|
|
||
|
{
|
||
|
/* First, the mca_base_component_t struct containing meta
|
||
|
* information about the component itself */
|
||
|
|
||
|
.mtl_version = {
|
||
|
MCA_MTL_BASE_VERSION_2_0_0,
|
||
|
|
||
|
.mca_component_name = "psm",
|
||
|
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
||
|
OMPI_RELEASE_VERSION),
|
||
|
.mca_open_component = ompi_mtl_psm_component_open,
|
||
|
.mca_close_component = ompi_mtl_psm_component_close,
|
||
|
.mca_query_component = ompi_mtl_psm_component_query,
|
||
|
.mca_register_component_params = ompi_mtl_psm_component_register,
|
||
|
},
|
||
|
.mtl_data = {
|
||
|
/* The component is not checkpoint ready */
|
||
|
MCA_BASE_METADATA_PARAM_NONE
|
||
|
},
|
||
|
|
||
|
.mtl_init = ompi_mtl_psm_component_init,
|
||
|
}
|
||
|
};
|
||
|
|
||
|
#if PSM_VERNO >= 0x010d
|
||
|
static mca_base_var_enum_value_t path_query_values[] = {
|
||
|
{PSM_PATH_RES_NONE, "none"},
|
||
|
{PSM_PATH_RES_OPP, "opp"},
|
||
|
{0, NULL}
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
static int
|
||
|
ompi_mtl_psm_component_register(void)
|
||
|
{
|
||
|
#if PSM_VERNO >= 0x010d
|
||
|
mca_base_var_enum_t *new_enum;
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/* set priority high enough to beat ob1's default */
|
||
|
param_priority = 30;
|
||
|
(void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version,
|
||
|
"priority", "Priority of the PSM MTL component",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
¶m_priority);
|
||
|
|
||
|
ompi_mtl_psm.connect_timeout = 180;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"connect_timeout",
|
||
|
"PSM connection timeout value in seconds",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.connect_timeout);
|
||
|
|
||
|
ompi_mtl_psm.debug_level = 1;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"debug", "PSM debug level",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.debug_level);
|
||
|
|
||
|
ompi_mtl_psm.ib_unit = -1;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"ib_unit", "Truescale unit to use",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.ib_unit);
|
||
|
|
||
|
ompi_mtl_psm.ib_port = 0;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"ib_port", "Truescale port on unit to use",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.ib_port);
|
||
|
|
||
|
ompi_mtl_psm.ib_service_level = 0;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"ib_service_level", "Infiniband service level"
|
||
|
"(0 <= SL <= 15)", MCA_BASE_VAR_TYPE_INT,
|
||
|
NULL, 0, 0, OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.ib_service_level);
|
||
|
|
||
|
ompi_mtl_psm.ib_pkey = 0x7fffUL;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"ib_pkey", "Infiniband partition key",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.ib_pkey);
|
||
|
|
||
|
#if PSM_VERNO >= 0x010d
|
||
|
ompi_mtl_psm.ib_service_id = 0x1000117500000000ull;
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"ib_service_id",
|
||
|
"Infiniband service ID to use for application (default is 0)",
|
||
|
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.ib_service_id);
|
||
|
|
||
|
ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE;
|
||
|
mca_base_var_enum_create("mtl_psm_path_query", path_query_values, &new_enum);
|
||
|
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
||
|
"path_query",
|
||
|
"Path record query mechanisms",
|
||
|
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
|
||
|
OPAL_INFO_LVL_9,
|
||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||
|
&ompi_mtl_psm.path_res_type);
|
||
|
OBJ_RELEASE(new_enum);
|
||
|
#endif
|
||
|
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
ompi_mtl_psm_component_open(void)
|
||
|
{
|
||
|
struct stat st;
|
||
|
|
||
|
if (ompi_mtl_psm.ib_service_level < 0) {
|
||
|
ompi_mtl_psm.ib_service_level = 0;
|
||
|
} else if (ompi_mtl_psm.ib_service_level > 15) {
|
||
|
ompi_mtl_psm.ib_service_level = 15;
|
||
|
}
|
||
|
|
||
|
/* Component available only if Truescale hardware is present */
|
||
|
if (0 != stat("/dev/ipath", &st)) {
|
||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||
|
}
|
||
|
|
||
|
/* Component available only if at least one qib port is ACTIVE */
|
||
|
bool foundOnlineQibPort = false;
|
||
|
size_t i;
|
||
|
char portState[128];
|
||
|
FILE *devFile;
|
||
|
glob_t globbuf;
|
||
|
globbuf.gl_offs = 0;
|
||
|
if (glob("/sys/class/infiniband/qib*/ports/*/state",
|
||
|
GLOB_DOOFFS, NULL, &globbuf) != 0) {
|
||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||
|
}
|
||
|
|
||
|
for (i=0;i < globbuf.gl_pathc; i++) {
|
||
|
devFile = fopen(globbuf.gl_pathv[i], "r");
|
||
|
fgets(portState, sizeof(portState), devFile);
|
||
|
fclose(devFile);
|
||
|
|
||
|
if (strstr(portState, "ACTIVE") != NULL) {
|
||
|
/* Found at least one ACTIVE port */
|
||
|
foundOnlineQibPort = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
globfree(&globbuf);
|
||
|
|
||
|
if (!foundOnlineQibPort) {
|
||
|
return OPAL_ERR_NOT_AVAILABLE;
|
||
|
}
|
||
|
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority)
|
||
|
{
|
||
|
/*
|
||
|
* if we get here it means that PSM is available so give high priority
|
||
|
*/
|
||
|
|
||
|
*priority = param_priority;
|
||
|
*module = (mca_base_module_t *)&ompi_mtl_psm.super;
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
ompi_mtl_psm_component_close(void)
|
||
|
{
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
get_num_total_procs(int *out_ntp)
|
||
|
{
|
||
|
*out_ntp = (int)ompi_process_info.num_procs;
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
get_num_local_procs(int *out_nlp)
|
||
|
{
|
||
|
/* num_local_peers does not include us in
|
||
|
* its calculation, so adjust for that */
|
||
|
*out_nlp = (int)(1 + ompi_process_info.num_local_peers);
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
get_local_rank(int *out_rank)
|
||
|
{
|
||
|
ompi_node_rank_t my_node_rank;
|
||
|
|
||
|
*out_rank = 0;
|
||
|
|
||
|
if (OMPI_NODE_RANK_INVALID == (my_node_rank =
|
||
|
ompi_process_info.my_node_rank)) {
|
||
|
return OMPI_ERROR;
|
||
|
}
|
||
|
*out_rank = (int)my_node_rank;
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static mca_mtl_base_module_t *
|
||
|
ompi_mtl_psm_component_init(bool enable_progress_threads,
|
||
|
bool enable_mpi_threads)
|
||
|
{
|
||
|
psm_error_t err;
|
||
|
int verno_major = PSM_VERNO_MAJOR;
|
||
|
int verno_minor = PSM_VERNO_MINOR;
|
||
|
int local_rank = -1, num_local_procs = 0;
|
||
|
int num_total_procs = 0;
|
||
|
|
||
|
/* Compute the total number of processes on this host and our local rank
|
||
|
* on that node. We need to provide PSM with these values so it can
|
||
|
* allocate hardware contexts appropriately across processes.
|
||
|
*/
|
||
|
if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
|
||
|
opal_output(0, "Cannot determine number of local processes. "
|
||
|
"Cannot continue.\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
if (OMPI_SUCCESS != get_local_rank(&local_rank)) {
|
||
|
opal_output(0, "Cannot determine local rank. Cannot continue.\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) {
|
||
|
opal_output(0, "Cannot determine total number of processes. "
|
||
|
"Cannot continue.\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
#if PSM_VERNO >= 0x010c
|
||
|
/* Set infinipath debug level */
|
||
|
err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG,
|
||
|
(const void*) &ompi_mtl_psm.debug_level,
|
||
|
sizeof(unsigned));
|
||
|
if (err) {
|
||
|
/* Non fatal error. Can continue */
|
||
|
opal_show_help("help-mtl-psm.txt",
|
||
|
"psm init", false,
|
||
|
psm_error_get_string(err));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (getenv("PSM_DEVICES") == NULL) {
|
||
|
/* Only allow for shm and ipath devices in 2.0 and earlier releases
|
||
|
* (unless the user overrides the setting).
|
||
|
*/
|
||
|
if (PSM_VERNO >= 0x0104) {
|
||
|
if (num_local_procs == num_total_procs) {
|
||
|
setenv("PSM_DEVICES", "self,shm", 0);
|
||
|
} else {
|
||
|
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if (num_local_procs == num_total_procs) {
|
||
|
setenv("PSM_DEVICES", "shm", 0);
|
||
|
} else {
|
||
|
setenv("PSM_DEVICES", "shm,ipath", 0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
err = psm_init(&verno_major, &verno_minor);
|
||
|
if (err) {
|
||
|
opal_show_help("help-mtl-psm.txt",
|
||
|
"psm init", true,
|
||
|
psm_error_get_string(err));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Complete PSM initialization */
|
||
|
ompi_mtl_psm_module_init(local_rank, num_local_procs);
|
||
|
|
||
|
ompi_mtl_psm.super.mtl_request_size =
|
||
|
sizeof(mca_mtl_psm_request_t) -
|
||
|
sizeof(struct mca_mtl_request_t);
|
||
|
|
||
|
/* don't register the err handler until we know we will be active */
|
||
|
err = psm_error_register_handler(NULL /* no ep */,
|
||
|
PSM_ERRHANDLER_NOP);
|
||
|
if (err) {
|
||
|
opal_output(0, "Error in psm_error_register_handler (error %s)\n",
|
||
|
psm_error_get_string(err));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
return &ompi_mtl_psm.super;
|
||
|
}
|
||
|
|