diff --git a/ompi/mca/mtl/psm/Makefile.am b/ompi/mca/mtl/psm/Makefile.am index 7398fa5f9c..570effa236 100644 --- a/ompi/mca/mtl/psm/Makefile.am +++ b/ompi/mca/mtl/psm/Makefile.am @@ -20,6 +20,8 @@ EXTRA_DIST = post_configure.sh AM_CPPFLAGS = $(mtl_psm_CPPFLAGS) +dist_pkgdata_DATA = help-mtl-psm.txt + mtl_psm_sources = \ mtl_psm.c \ mtl_psm.h \ diff --git a/ompi/mca/mtl/psm/help-mtl-psm.txt b/ompi/mca/mtl/psm/help-mtl-psm.txt new file mode 100644 index 0000000000..9514def12b --- /dev/null +++ b/ompi/mca/mtl/psm/help-mtl-psm.txt @@ -0,0 +1,40 @@ +# -*- text -*- +# +# Copyright (C) 2009. QLogic Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[psm init] +Initialization of PSM library failed. + + Error: %s +# +[debug level] +Unable to set PSM debug level. + + Error: %s +# +[unable to open endpoint] +PSM was unable to open an endpoint. Please make sure that the network link is +active on the node and the hardware is functioning. + + Error: %s +# +[no uuid present] +Error obtaining unique transport key from ORTE (orte_precondition_transports %s +the environment). + + Local host: %s +# +[error polling network] +Error %s occurred in attempting to make network progress (psm_mq_ipeek). +# +[error posting receive] +Unable to post application receive buffer (psm_mq_irecv). + + Error: %s + Buffer: %p + Length: %d diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index 14e9ca03be..136da33ab5 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -19,6 +19,8 @@ #include "ompi_config.h" +#include "orte/util/show_help.h" +#include "orte/util/proc_info.h" #include "ompi/mca/mtl/mtl.h" #include "ompi/runtime/ompi_module_exchange.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h" @@ -62,7 +64,9 @@ ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error, case PSM_EP_NO_PORTS_AVAIL: case PSM_EP_NO_NETWORK: case PSM_EP_INVALID_UUID_KEY: - opal_output(0, "Open MPI failed to open a PSM endpoint: %s\n", error_string); + orte_show_help("help-mtl-psm.txt", + "unable to open endpoint", true, + psm_error_get_string(error)); break; /* We can't handle any other errors than the ones above */ @@ -77,7 +81,7 @@ ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error, int ompi_mtl_psm_progress( void ); -int ompi_mtl_psm_module_init() { +int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { psm_error_t err; psm_ep_t ep; /* endpoint handle */ psm_mq_t mq; @@ -86,6 +90,7 @@ int ompi_mtl_psm_module_init() { struct psm_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; + char env_string[256]; generated_key = getenv("OMPI_MCA_orte_precondition_transports"); memset(uu, 0, sizeof(psm_uuid_t)); @@ -93,17 +98,26 @@ int ompi_mtl_psm_module_init() { if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { - opal_output(0, "Error obtaining unique transport key from ORTE " - "(orte_precondition_transpots %s the environment)\n", - generated_key ? "could not be parsed from" : - "not present in"); - return OMPI_ERROR; - + orte_show_help("help-mtl-psm.txt", + "no uuid present", true, + generated_key ? "could not be parsed from" : + "not present in", orte_process_info.nodename); + return OMPI_ERROR; + } /* Handle our own errors for opening endpoints */ psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); - + + /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware + * contexts correctly. + */ + snprintf(env_string, sizeof(env_string), "%d", local_rank); + setenv("MPI_LOCALRANKID", env_string, 0); + snprintf(env_string, sizeof(env_string), "%d", num_local_procs); + setenv("MPI_LOCALNRANKS", env_string, 0); + + /* Setup the endpoint options. */ bzero((void*) &ep_opt, sizeof(ep_opt)); ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9; ep_opt.unit = ompi_mtl_psm.ib_unit; @@ -121,9 +135,10 @@ int ompi_mtl_psm_module_init() { /* Open PSM endpoint */ err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { - opal_output(0, "Error in psm_ep_open (error %s)\n", - psm_error_get_string(err)); - return OMPI_ERROR; + orte_show_help("help-mtl-psm.txt", + "unable to open endpoint", true, + psm_error_get_string(err)); + return OMPI_ERROR; } /* Future errors are handled by the default error handler */ @@ -135,9 +150,10 @@ int ompi_mtl_psm_module_init() { 0, &mq); if (err) { - opal_output(0, "Error in psm_mq_init (error %s)\n", - psm_error_get_string(err)); - return OMPI_ERROR; + orte_show_help("help-mtl-psm.txt", + "psm init", true, + psm_error_get_string(err)); + return OMPI_ERROR; } ompi_mtl_psm.ep = ep; @@ -151,7 +167,7 @@ int ompi_mtl_psm_module_init() { opal_output(0, "Open MPI couldn't send PSM epid to head node process"); return OMPI_ERROR; } - + /* register the psm progress function */ opal_progress_register(ompi_mtl_psm_progress); @@ -228,35 +244,39 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, psm_error_t *errs_out = NULL, err; size_t size; int proc_errors[PSM_ERROR_LAST] = { 0 }; - int timeout_in_secs; + int proc, my_local_rank = -1, num_local_procs = 0, timeout_in_secs; assert(mtl == &ompi_mtl_psm.super); rc = OMPI_ERR_OUT_OF_RESOURCE; errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t)); - if (errs_out == NULL) + if (errs_out == NULL) { goto bail; + } epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t)); - if (epids_in == NULL) + if (epids_in == NULL) { goto bail; + } epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t)); - if (epaddrs_out == NULL) + if (epaddrs_out == NULL) { goto bail; - + } rc = OMPI_SUCCESS; /* Get the epids for all the processes from modex */ for (i = 0; i < (int) nprocs; i++) { rc = ompi_modex_recv(&mca_mtl_psm_component.super.mtl_version, procs[i], (void**)&epid, &size); - if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) - return OMPI_ERROR; + if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) { + return OMPI_ERROR; + } epids_in[i] = *epid; } timeout_in_secs = min(180, 0.5 * nprocs); - if (ompi_mtl_psm.connect_timeout < timeout_in_secs) + if (ompi_mtl_psm.connect_timeout < timeout_in_secs) { timeout_in_secs = ompi_mtl_psm.connect_timeout; + } psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP); @@ -281,8 +301,9 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, opal_output(0, "PSM EP connect error (%s):", errstr ? errstr : "unknown connect error"); for (j = 0; j < (int) nprocs; j++) { - if (errs_out[j] == thiserr) - opal_output(0, " %s", procs[j]->proc_hostname); + if (errs_out[j] == thiserr) { + opal_output(0, " %s", procs[j]->proc_hostname); + } } opal_output(0, "\n"); } @@ -308,12 +329,16 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, } bail: - if (epids_in != NULL) + if (epids_in != NULL) { free(epids_in); - if (errs_out != NULL) + } + if (errs_out != NULL) { free(errs_out); - if (epaddrs_out != NULL) + } + if (epaddrs_out != NULL) { free(epaddrs_out); + } + return rc; } @@ -336,16 +361,18 @@ int ompi_mtl_psm_progress( void ) { do { err = psm_mq_ipeek(ompi_mtl_psm.mq, &req, NULL); - if (err == PSM_MQ_INCOMPLETE) + if (err == PSM_MQ_INCOMPLETE) { return completed; - else if (err != PSM_OK) + } else if (err != PSM_OK) { goto error; - + } + completed++; err = psm_mq_test(&req, &psm_status); - if (err != PSM_OK) + if (err != PSM_OK) { goto error; + } mtl_psm_request = (mca_mtl_psm_request_t*) psm_status.context; @@ -363,8 +390,9 @@ int ompi_mtl_psm_progress( void ) { } if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) { - if (mtl_psm_request->free_after) + if (mtl_psm_request->free_after) { free(mtl_psm_request->buf); + } } switch (psm_status.error_code) { @@ -387,8 +415,9 @@ int ompi_mtl_psm_progress( void ) { while (1); error: - opal_output(0, "Error in psm progress function: %s\n", - psm_error_get_string(err)); + orte_show_help("help-mtl-psm.txt", + "error polling network", true, + psm_error_get_string(err)); return 1; } diff --git a/ompi/mca/mtl/psm/mtl_psm.h b/ompi/mca/mtl/psm/mtl_psm.h index 9cb49106ae..cd699b326a 100644 --- a/ompi/mca/mtl/psm/mtl_psm.h +++ b/ompi/mca/mtl/psm/mtl_psm.h @@ -81,7 +81,7 @@ extern int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl, extern int ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl); -int ompi_mtl_psm_module_init(void); +int ompi_mtl_psm_module_init(int local_rank, int num_local_procs); diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index 67ca4030c6..12020278a5 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2009 QLogic Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,9 +19,11 @@ #include "ompi_config.h" +#include "orte/util/show_help.h" #include "opal/event/event.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" +#include "ompi/proc/proc.h" #include "mtl_psm.h" #include "mtl_psm_types.h" @@ -65,46 +67,47 @@ static int ompi_mtl_psm_component_open(void) { - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "connect_timeout", - "PSM connection timeout value in seconds", - false, false, 30, &ompi_mtl_psm.connect_timeout); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "connect_timeout", + "PSM connection timeout value in seconds", + false, false, 30, &ompi_mtl_psm.connect_timeout); - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "debug", - "PSM debug level", - false, false, 1, - &ompi_mtl_psm.debug_level); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "debug", + "PSM debug level", + false, false, 1, + &ompi_mtl_psm.debug_level); - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "ib_unit", - "Truescale unit to use", - false, false, -1, - &ompi_mtl_psm.ib_unit); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_unit", + "Truescale unit to use", + false, false, -1, + &ompi_mtl_psm.ib_unit); - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "ib_port", - "Truescale port on unit to use", - false, false, 0, - &ompi_mtl_psm.ib_port); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_port", + "Truescale port on unit to use", + false, false, 0, + &ompi_mtl_psm.ib_port); - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "ib_service_level", - "Infiniband service level" - "(0 <= SL <= 15)", - false, false, 0, &ompi_mtl_psm.ib_service_level); + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_service_level", + "Infiniband service level" + "(0 <= SL <= 15)", + false, false, 0, &ompi_mtl_psm.ib_service_level); - ompi_mtl_psm.ib_pkey = 0x7fffUL; - mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, - "ib_pkey", - "Infiniband partition key", - false, false, 0x7fffUL, - &ompi_mtl_psm.ib_pkey); + ompi_mtl_psm.ib_pkey = 0x7fffUL; + mca_base_param_reg_int(&mca_mtl_psm_component.super.mtl_version, + "ib_pkey", + "Infiniband partition key", + false, false, 0x7fffUL, + &ompi_mtl_psm.ib_pkey); - if (ompi_mtl_psm.ib_service_level < 0) - ompi_mtl_psm.ib_service_level = 0; - else if (ompi_mtl_psm.ib_service_level > 15) - ompi_mtl_psm.ib_service_level = 15; + if (ompi_mtl_psm.ib_service_level < 0) { + ompi_mtl_psm.ib_service_level = 0; + } else if (ompi_mtl_psm.ib_service_level > 15) { + ompi_mtl_psm.ib_service_level = 15; + } return OMPI_SUCCESS; @@ -123,9 +126,40 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, bool enable_mpi_threads) { psm_error_t err; + int rc; int verno_major = PSM_VERNO_MAJOR; int verno_minor = PSM_VERNO_MINOR; - + ompi_proc_t *my_proc, **procs; + size_t num_total_procs; + int local_rank = -1, num_local_procs = 0, proc; + + /* Compute the total number of processes on this host and our local rank + * on that node. We need to provide PSM with these values so it can + * allocate hardware contexts appropriately across processes. + */ + if ((rc = ompi_proc_refresh()) != OMPI_SUCCESS) { + return NULL; + } + + my_proc = ompi_proc_local(); + if (NULL == (procs = ompi_proc_world(&num_total_procs))) { + return NULL; + } + + for (proc = 0; proc < num_total_procs; proc++) { + if (my_proc == procs[proc]) { + local_rank = num_local_procs++; + continue; + } + + if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) { + num_local_procs++; + } + } + + assert(local_rank >= 0 && num_local_procs > 0); + free(procs); + err = psm_error_register_handler(NULL /* no ep */, PSM_ERRHANDLER_NOP); if (err) { @@ -141,34 +175,37 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, sizeof(unsigned)); if (err) { /* Non fatal error. Can continue */ - opal_output(0, "Unable to set infinipath debug level (error %s)\n", - psm_error_get_string(err)); + orte_show_help("help-mtl-psm.txt", + "psm init", false, + psm_error_get_string(err)); } #endif /* Only allow for shm and ipath devices in 2.0 and earlier releases * (unless the user overrides the setting). */ - setenv("PSM_DEVICES", "shm,ipath", 0); - + + if (PSM_VERNO >= 0x0104) { + setenv("PSM_DEVICES", "self,shm,ipath", 0); + } + else { + setenv("PSM_DEVICES", "shm,ipath", 0); + } + err = psm_init(&verno_major, &verno_minor); if (err) { - opal_output(0, "Error in psm_init (error %s)\n", - psm_error_get_string(err)); - return NULL; + orte_show_help("help-mtl-psm.txt", + "psm init", true, + psm_error_get_string(err)); + return NULL; } - - /* - * Enable 'self' device only in a post-2.0 release(s) - */ - if (PSM_VERNO >= 0x0104) - setenv("PSM_DEVICES", "self,shm,ipath", 0); - - ompi_mtl_psm_module_init(); + /* Complete PSM initialization */ + ompi_mtl_psm_module_init(local_rank, num_local_procs); + ompi_mtl_psm.super.mtl_request_size = - sizeof(mca_mtl_psm_request_t) - - sizeof(struct mca_mtl_request_t); + sizeof(mca_mtl_psm_request_t) - + sizeof(struct mca_mtl_request_t); return &ompi_mtl_psm.super; } diff --git a/ompi/mca/mtl/psm/mtl_psm_recv.c b/ompi/mca/mtl/psm/mtl_psm_recv.c index c615548ea8..4b4f02e08a 100644 --- a/ompi/mca/mtl/psm/mtl_psm_recv.c +++ b/ompi/mca/mtl/psm/mtl_psm_recv.c @@ -68,8 +68,11 @@ ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, &mtl_psm_request->psm_request); if (err) { - opal_output(0, "Error in psm_mq_irecv (error %s)\n", psm_error_get_string(err)); - return OMPI_ERROR; + orte_show_help("help-mtl-psm.txt", + "error posting receive", true, + psm_error_get_string(err), + mtl_psm_request->buf, length); + return OMPI_ERROR; } return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/psm/mtl_psm_send.c b/ompi/mca/mtl/psm/mtl_psm_send.c index f0cee56ef3..e9cc308cac 100644 --- a/ompi/mca/mtl/psm/mtl_psm_send.c +++ b/ompi/mca/mtl/psm/mtl_psm_send.c @@ -70,8 +70,9 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, mtl_psm_request.buf, length); - if (mtl_psm_request.free_after) + if (mtl_psm_request.free_after) { free(mtl_psm_request.buf); + } return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; }