1
1

Fix the PSM MTL in trunk by gathering node locality information differently.

This commit was SVN r27063.
Этот коммит содержится в:
Samuel Gutierrez 2012-08-16 00:50:24 +00:00
родитель bcf6a41243
Коммит 7867330dcc

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,6 +22,7 @@
#include "ompi_config.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
@ -173,46 +176,53 @@ ompi_mtl_psm_component_close(void)
return OMPI_SUCCESS;
}
static int
get_num_local_procs(int *out_nlp)
{
/* num_local_peers does not include us in
* its calculation, so adjust for that */
*out_nlp = (int)(1 + orte_process_info.num_local_peers);
return OMPI_SUCCESS;
}
static mca_mtl_base_module_t*
static int
get_local_rank(int *out_rank)
{
orte_node_rank_t my_node_rank;
*out_rank = 0;
if (ORTE_NODE_RANK_INVALID == (my_node_rank =
orte_process_info.my_node_rank)) {
return OMPI_ERROR;
}
*out_rank = (int)my_node_rank;
return OMPI_SUCCESS;
}
static mca_mtl_base_module_t *
ompi_mtl_psm_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
bool enable_mpi_threads)
{
psm_error_t err;
int rc;
int verno_major = PSM_VERNO_MAJOR;
int verno_minor = PSM_VERNO_MINOR;
ompi_proc_t *my_proc, **procs;
size_t num_total_procs, proc;
int local_rank = -1, num_local_procs = 0;
/* Compute the total number of processes on this host and our local rank
* on that node. We need to provide PSM with these values so it can
* allocate hardware contexts appropriately across processes.
*/
if ((rc = ompi_proc_refresh()) != OMPI_SUCCESS) {
return NULL;
if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
opal_output(0, "Cannot determine number of local processes. "
"Cannot continue.\n");
return NULL;
}
my_proc = ompi_proc_local();
if (NULL == (procs = ompi_proc_world(&num_total_procs))) {
return NULL;
if (OMPI_SUCCESS != get_local_rank(&local_rank)) {
opal_output(0, "Cannot determine local rank. Cannot continue.\n");
return NULL;
}
for (proc = 0; proc < num_total_procs; proc++) {
if (my_proc == procs[proc]) {
local_rank = num_local_procs++;
continue;
}
if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
num_local_procs++;
}
}
assert(local_rank >= 0 && num_local_procs > 0);
free(procs);
err = psm_error_register_handler(NULL /* no ep */,
PSM_ERRHANDLER_NOP);
if (err) {