1
1

Cleanup some cruft resulting from the move of the btl's to opal. We had created the ability to delay modex operations, which included a need to delay retrieving hostname info for remote procs. This allowed us to not retrieve the modex info until first message unless required - the hostname is generally only required for debug and error messages.

Properly setup the opal_process_info structure early in the initialization procedure. Define the local hostname right at the beginning of opal_init so all parts of opal can use it. Overlay that during orte_init as the user may choose to remove fqdn and strip prefixes during that time. Setup the job_session_dir and other such info immediately when it becomes available during orte_init.
Этот коммит содержится в:
Ralph Castain 2014-10-03 14:19:48 -07:00 коммит произвёл Howard Pritchard
родитель b44a244fbc
Коммит fd6a044b7f
26 изменённых файлов: 260 добавлений и 199 удалений

Просмотреть файл

@ -476,7 +476,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
/* no select is required as this is a static framework */
/* Setup RTE - note that we are an MPI process */
/* Setup RTE */
if (OMPI_SUCCESS != (ret = ompi_rte_init(NULL, NULL))) {
error = "ompi_mpi_init: ompi_rte_init failed";
goto error;
@ -498,15 +498,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
#endif
opal_process_info.nodename = ompi_process_info.nodename;
opal_process_info.job_session_dir = ompi_process_info.job_session_dir;
opal_process_info.proc_session_dir = ompi_process_info.proc_session_dir;
opal_process_info.num_local_peers = (int32_t)ompi_process_info.num_local_peers;
opal_process_info.my_local_rank = (int32_t)ompi_process_info.my_local_rank;
#if OPAL_HAVE_HWLOC
opal_process_info.cpuset = ompi_process_info.cpuset;
#endif /* OPAL_HAVE_HWLOC */
/* Register the default errhandler callback - RTE will ignore if it
* doesn't support this capability
*/

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,6 +26,7 @@
#include <stdarg.h>
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "base.h"
#include "btl_base_error.h"
@ -64,7 +66,7 @@ void mca_btl_base_error_no_nics(const char* transport,
asprintf(&procid, "%s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_show_help("help-mpi-btl-base.txt", "btl:no-nics",
true, procid, transport, opal_proc_local_get()->proc_hostname,
true, procid, transport, opal_process_info.nodename,
nic_name);
free(procid);
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -36,53 +36,54 @@ OPAL_DECLSPEC extern int mca_btl_base_verbose;
OPAL_DECLSPEC extern int mca_btl_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
OPAL_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
#define BTL_OUTPUT(args) \
do { \
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
opal_proc_local_get()->proc_hostname, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_out args; \
mca_btl_base_out("\n"); \
} while(0);
#define BTL_OUTPUT(args) \
do { \
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
opal_process_info.nodename, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_out args; \
mca_btl_base_out("\n"); \
} while(0);
#define BTL_ERROR(args) \
do { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
opal_proc_local_get()->proc_hostname, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#define BTL_ERROR(args) \
do { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
opal_process_info.nodename, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#define BTL_PEER_ERROR(proc, args) \
do { \
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
opal_proc_local_get()->proc_hostname); \
if(proc) { \
mca_btl_base_err("to: %s ", proc->proc_hostname); \
} \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#define BTL_PEER_ERROR(proc, args) \
do { \
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
opal_process_info.nodename); \
if (proc) { \
mca_btl_base_err("to: %s ", \
opal_get_proc_hostname(proc)); \
} \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#if OPAL_ENABLE_DEBUG
#define BTL_VERBOSE(args) \
do { \
if(mca_btl_base_verbose > 0) { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
opal_proc_local_get()->proc_hostname, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} \
} while(0);
#define BTL_VERBOSE(args) \
do { \
if(mca_btl_base_verbose > 0) { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
opal_process_info.nodename, \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} \
} while(0);
#else
#define BTL_VERBOSE(args)
#endif

Просмотреть файл

@ -17,7 +17,7 @@
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -34,6 +34,7 @@
#include "opal/class/opal_bitmap.h"
#include "opal/util/output.h"
#include "opal/util/arch.h"
#include "opal/util/proc.h"
#include "opal/include/opal_stdint.h"
#include "opal/util/show_help.h"
#include "opal/mca/btl/btl.h"
@ -142,13 +143,13 @@ void mca_btl_openib_show_init_error(const char *file, int line,
}
opal_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
file, line, func, dev, str_limit);
if (NULL != str_limit) free(str_limit);
} else {
opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
file, line, func, strerror(errno), errno, dev);
}
}
@ -473,13 +474,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
if(mca_btl_openib_get_transport_type(openib_btl) != endpoint->rem_info.rem_transport_type) {
opal_show_help("help-mpi-btl-openib.txt",
"conflicting transport types", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev),
(openib_btl->device->ib_dev_attr).vendor_id,
(openib_btl->device->ib_dev_attr).vendor_part_id,
mca_btl_openib_transport_name_strings[mca_btl_openib_get_transport_type(openib_btl)],
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
endpoint->rem_info.rem_vendor_id,
endpoint->rem_info.rem_vendor_part_id,
mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]);
@ -495,7 +495,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
OPAL_ERR_NOT_FOUND != ret) {
opal_show_help("help-mpi-btl-openib.txt",
"error in device init", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev));
return ret;
}
@ -539,13 +539,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
recv_qps)) {
opal_show_help("help-mpi-btl-openib.txt",
"unsupported queues configuration", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev),
(openib_btl->device->ib_dev_attr).vendor_id,
(openib_btl->device->ib_dev_attr).vendor_part_id,
mca_btl_openib_component.receive_queues,
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown": endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
endpoint->rem_info.rem_vendor_id,
endpoint->rem_info.rem_vendor_part_id,
recv_qps);
@ -562,13 +561,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
values.receive_queues)) {
opal_show_help("help-mpi-btl-openib.txt",
"unsupported queues configuration", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev),
(openib_btl->device->ib_dev_attr).vendor_id,
(openib_btl->device->ib_dev_attr).vendor_part_id,
mca_btl_openib_component.receive_queues,
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown": endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
endpoint->rem_info.rem_vendor_id,
endpoint->rem_info.rem_vendor_part_id,
values.receive_queues);
@ -679,7 +677,7 @@ static uint64_t calculate_max_reg (void)
action = "Your MPI job will continue, but may be behave poorly and/or hang.";
}
opal_show_help("help-mpi-btl-openib.txt", "reg mem limit low", true,
opal_proc_local_get()->proc_hostname, (unsigned long)(max_reg >> 20),
opal_process_info.nodename, (unsigned long)(max_reg >> 20),
(unsigned long)(mem_total >> 20), action);
return 0; /* signal that we can't have enough memory */
}

Просмотреть файл

@ -6,6 +6,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,6 +25,8 @@
#include <errno.h>
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/base/base.h"
#include "btl_openib.h"
#include "btl_openib_mca.h"
@ -405,14 +408,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
case IBV_EVENT_PATH_MIG_ERR:
case IBV_EVENT_SRQ_ERR:
opal_show_help("help-mpi-btl-openib.txt", "of error event",
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
true,opal_process_info.nodename, (int)getpid(),
event_type,
openib_event_to_str((enum ibv_event_type)event_type),
xrc_event ? "true" : "false");
break;
case IBV_EVENT_PORT_ERR:
opal_show_help("help-mpi-btl-openib.txt", "of error event",
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
true,opal_process_info.nodename, (int)getpid(),
event_type,
openib_event_to_str((enum ibv_event_type)event_type),
xrc_event ? "true" : "false");
@ -442,7 +445,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
break;
default:
opal_show_help("help-mpi-btl-openib.txt", "of unknown event",
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
true,opal_process_info.nodename, (int)getpid(),
event_type, xrc_event ? "true" : "false");
}
ibv_ack_async_event(&event);

Просмотреть файл

@ -84,6 +84,7 @@
#include "opal/runtime/opal_params.h"
#include "opal/runtime/opal.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/proc.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
@ -540,8 +541,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
break;
case MCA_BTL_OPENIB_CONTROL_CTS:
OPAL_OUTPUT((-1, "received CTS from %s (buffer %p): posted recvs %d, sent cts %d",
(NULL == ep->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : ep->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(ep->endpoint_proc->proc_opal),
(void*) ctl_hdr,
ep->endpoint_posted_recvs, ep->endpoint_cts_sent));
ep->endpoint_cts_received = true;
@ -676,7 +676,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
if (mca_btl_openib_component.gid_index >
ib_port_attr->gid_tbl_len) {
opal_show_help("help-mpi-btl-openib.txt", "gid index too large",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
ibv_get_device_name(device->ib_dev), port_num,
mca_btl_openib_component.gid_index,
ib_port_attr->gid_tbl_len);
@ -734,7 +734,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
IB_DEFAULT_GID_PREFIX == subnet_id &&
mca_btl_openib_component.warn_default_gid_prefix) {
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
true, opal_proc_local_get()->proc_hostname);
true, opal_process_info.nodename);
}
lmc = (1 << ib_port_attr->lmc);
@ -1200,7 +1200,7 @@ static int setup_qps(void)
if (0 == opal_argv_count(queues)) {
opal_show_help("help-mpi-btl-openib.txt",
"no qps in receive_queues", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OPAL_ERROR;
goto error;
@ -1219,7 +1219,7 @@ static int setup_qps(void)
num_xrc_qps++;
#else
opal_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OPAL_ERR_NOT_AVAILABLE;
goto error;
@ -1227,7 +1227,7 @@ static int setup_qps(void)
} else {
opal_show_help("help-mpi-btl-openib.txt",
"invalid qp type in receive_queues", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
mca_btl_openib_component.receive_queues,
queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
@ -1239,7 +1239,7 @@ static int setup_qps(void)
and SRQ */
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
opal_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
mca_btl_openib_component.receive_queues);
ret = OPAL_ERR_BAD_PARAM;
goto error;
@ -1248,7 +1248,7 @@ static int setup_qps(void)
/* Current XRC implementation can't used with btls_per_lid > 1 */
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
opal_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
mca_btl_openib_component.receive_queues, num_xrc_qps);
ret = OPAL_ERR_BAD_PARAM;
goto error;
@ -1279,7 +1279,7 @@ static int setup_qps(void)
if (count < 3 || count > 6) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid pp qp specification", true,
opal_proc_local_get()->proc_hostname, queues[qp]);
opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
@ -1310,7 +1310,7 @@ static int setup_qps(void)
if (count < 3 || count > 7) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid srq specification", true,
opal_proc_local_get()->proc_hostname, queues[qp]);
opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
@ -1346,14 +1346,14 @@ static int setup_qps(void)
if (rd_num < rd_init) {
opal_show_help("help-mpi-btl-openib.txt", "rd_num must be >= rd_init",
true, opal_proc_local_get()->proc_hostname, queues[qp]);
true, opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
if (rd_num < srq_limit) {
opal_show_help("help-mpi-btl-openib.txt", "srq_limit must be > rd_num",
true, opal_proc_local_get()->proc_hostname, queues[qp]);
true, opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
@ -1365,7 +1365,7 @@ static int setup_qps(void)
if (rd_num <= rd_low) {
opal_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
true, opal_proc_local_get()->proc_hostname, queues[qp]);
true, opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
@ -1386,21 +1386,21 @@ static int setup_qps(void)
if (max_qp_size < max_size_needed) {
opal_show_help("help-mpi-btl-openib.txt",
"biggest qp size is too small", true,
opal_proc_local_get()->proc_hostname, max_qp_size,
opal_process_info.nodename, max_qp_size,
max_size_needed);
ret = OPAL_ERR_BAD_PARAM;
goto error;
} else if (max_qp_size > max_size_needed) {
opal_show_help("help-mpi-btl-openib.txt",
"biggest qp size is too big", true,
opal_proc_local_get()->proc_hostname, max_qp_size,
opal_process_info.nodename, max_qp_size,
max_size_needed);
}
if (mca_btl_openib_component.ib_free_list_max > 0 &&
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
opal_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
mca_btl_openib_component.ib_free_list_max,
min_freelist_size);
ret = OPAL_ERR_BAD_PARAM;
@ -1514,7 +1514,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
if (mca_btl_openib_component.warn_no_device_params_found) {
opal_show_help("help-mpi-btl-openib.txt",
"no device params found", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(device->ib_dev),
device->ib_dev_attr.vendor_id,
device->ib_dev_attr.vendor_part_id);
@ -1997,7 +1997,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
opal_show_help("help-mpi-btl-openib.txt",
"locally conflicting receive_queues", true,
opal_install_dirs.opaldatadir,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(receive_queues_device->ib_dev),
receive_queues_device->ib_dev_attr.vendor_id,
receive_queues_device->ib_dev_attr.vendor_part_id,
@ -2021,7 +2021,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
opal_show_help("help-mpi-btl-openib.txt",
"locally conflicting receive_queues", true,
opal_install_dirs.opaldatadir,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(receive_queues_device->ib_dev),
receive_queues_device->ib_dev_attr.vendor_id,
receive_queues_device->ib_dev_attr.vendor_part_id,
@ -2059,7 +2059,7 @@ error:
if (OPAL_SUCCESS != ret) {
opal_show_help("help-mpi-btl-openib.txt",
"error in device init", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(device->ib_dev));
}
@ -2402,7 +2402,7 @@ btl_openib_component_init(int *num_btl_modules,
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
opal_show_help("help-mpi-btl-openib.txt",
"ptmalloc2 with no threads", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
goto no_btls;
}
#endif
@ -2517,7 +2517,7 @@ btl_openib_component_init(int *num_btl_modules,
if (mca_btl_openib_component.want_fork_support > 0) {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork_init fail", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
goto no_btls;
}
}
@ -2636,7 +2636,7 @@ btl_openib_component_init(int *num_btl_modules,
free(dev_sorted);
if (!found) {
opal_show_help("help-mpi-btl-openib.txt", "no devices right type",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
"InfiniBand" :
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
@ -2653,7 +2653,7 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.warn_nonexistent_if) {
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
opal_show_help("help-mpi-btl-openib.txt", "nonexistent port",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
((NULL != mca_btl_openib_component.if_include) ?
"in" : "ex"), str);
free(str);
@ -2665,7 +2665,7 @@ btl_openib_component_init(int *num_btl_modules,
if (num_devices_intentionally_ignored < num_devs) {
opal_show_help("help-mpi-btl-openib.txt",
"no active ports found", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
}
goto no_btls;
}
@ -3394,11 +3394,7 @@ error:
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
IBV_WC_RETRY_EXC_ERR == wc->status) {
const char *peer_hostname;
if (endpoint->endpoint_proc->proc_opal && endpoint->endpoint_proc->proc_opal->proc_hostname) {
peer_hostname = endpoint->endpoint_proc->proc_opal->proc_hostname;
} else {
peer_hostname = "<unknown -- please run with mpi_keep_peer_hostnames=1>";
}
peer_hostname = opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal);
const char *device_name =
ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device);
@ -3410,21 +3406,21 @@ error:
opal_show_help("help-mpi-btl-openib.txt",
"pp rnr retry exceeded",
true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
device_name,
peer_hostname);
} else {
opal_show_help("help-mpi-btl-openib.txt",
"srq rnr retry exceeded",
true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
device_name,
peer_hostname);
}
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
opal_show_help("help-mpi-btl-openib.txt",
"pp retry exceeded", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
device_name, peer_hostname);
}
}

Просмотреть файл

@ -17,7 +17,7 @@
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2010-2011 IBM Corporation. All rights reserved.
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* $COPYRIGHT$
@ -38,6 +38,7 @@
#include "opal_stdint.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/class/ompi_free_list.h"
@ -507,8 +508,7 @@ static void cts_sent(mca_btl_base_module_t* btl,
/* Nothing to do/empty function (we can't pass in a NULL pointer
for the des_cbfunc) */
OPAL_OUTPUT((-1, "CTS send to %s completed",
(NULL == ep->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : ep->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(ep->endpoint_proc->proc_opal)));
}
/*
@ -523,8 +523,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
mca_btl_openib_control_header_t *ctl_hdr;
OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
mca_btl_openib_component.credits_qp,
endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num));
sc_frag = alloc_control_frag(endpoint->endpoint_btl);
@ -594,8 +593,7 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type);
#endif
OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
transport_type_ib_p,
endpoint->endpoint_initiator,
endpoint->endpoint_cts_received));
@ -608,15 +606,13 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
mark us as connected */
if (endpoint->endpoint_cts_received) {
OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
mca_btl_openib_endpoint_connected(endpoint);
}
}
OPAL_OUTPUT((-1, "cpc_complete to %s -- done",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
return;
}
@ -1054,7 +1050,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
if (NULL == btl || NULL == btl->error_cb) {
opal_show_help("help-mpi-btl-openib.txt",
"cannot raise btl error", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
__FILE__, __LINE__);
exit(1);
}

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (c) 2008 Chelsio, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
*
* Additional copyrights may follow
*
@ -21,6 +22,7 @@
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "connect/connect.h"
@ -198,7 +200,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
NULL != temp[2]) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid ipaddr_inexclude", true, "include",
opal_proc_local_get()->proc_hostname, list[i],
opal_process_info.nodename, list[i],
"Invalid specification (missing \"/\")");
if (NULL != temp) {
opal_argv_free(temp);
@ -209,7 +211,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid ipaddr_inexclude", true, "include",
opal_proc_local_get()->proc_hostname, list[i],
opal_process_info.nodename, list[i],
"Invalid specification (inet_pton() failed)");
opal_argv_free(temp);
continue;
@ -240,7 +242,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
NULL != temp[2]) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid ipaddr_inexclude", true, "exclude",
opal_proc_local_get()->proc_hostname, list[i],
opal_process_info.nodename, list[i],
"Invalid specification (missing \"/\")");
if (NULL != temp) {
opal_argv_free(temp);
@ -251,7 +253,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid ipaddr_inexclude", true, "exclude",
opal_proc_local_get()->proc_hostname, list[i],
opal_process_info.nodename, list[i],
"Invalid specification (inet_pton() failed)");
opal_argv_free(temp);
continue;

Просмотреть файл

@ -18,6 +18,7 @@
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,6 +35,8 @@
#include "opal/util/os_dirpath.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_openib.h"
#include "btl_openib_mca.h"
#include "btl_openib_ini.h"
@ -621,13 +624,13 @@ int btl_openib_register_mca_params(void)
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) {
opal_show_help("help-mpi-btl-openib.txt",
"CUDA_no_gdr_support", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
return OPAL_ERROR;
}
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.driver_have_gdr) {
opal_show_help("help-mpi-btl-openib.txt",
"driver_no_gdr_support", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
return OPAL_ERROR;
}
#if OPAL_CUDA_GDR_SUPPORT
@ -733,7 +736,7 @@ int btl_openib_verify_mca_params (void)
if (1 == mca_btl_openib_component.want_fork_support) {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork requested but not supported", true,
opal_proc_local_get()->proc_hostname);
opal_process_info.nodename);
return OPAL_ERR_BAD_PARAM;
}
#endif
@ -778,7 +781,7 @@ int btl_openib_verify_mca_params (void)
if(mca_btl_openib_component.buffer_alignment <= 1 ||
(mca_btl_openib_component.buffer_alignment & (mca_btl_openib_component.buffer_alignment - 1))) {
opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
true, mca_btl_openib_component.buffer_alignment, opal_proc_local_get()->proc_hostname, 64);
true, mca_btl_openib_component.buffer_alignment, opal_process_info.nodename, 64);
mca_btl_openib_component.buffer_alignment = 64;
}
@ -801,7 +804,7 @@ int btl_openib_verify_mca_params (void)
mca_btl_openib_component.driver_have_gdr) {
if (1 == mca_btl_openib_component.want_fork_support) {
opal_show_help("help-mpi-btl-openib.txt", "no_fork_with_gdr",
true, opal_proc_local_get()->proc_hostname);
true, opal_process_info.nodename);
return OPAL_ERR_BAD_PARAM;
}
if (-1 == mca_btl_openib_component.want_fork_support) {

Просмотреть файл

@ -4,7 +4,7 @@
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
*
* $COPYRIGHT$
*
@ -27,6 +27,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
/*
@ -127,7 +128,7 @@ int opal_btl_openib_connect_base_register(void)
if (NULL == all[i]) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"include", opal_proc_local_get()->proc_hostname,
"include", opal_process_info.nodename,
"include", btl_openib_cpc_include, temp[j],
all_cpc_names);
opal_argv_free(temp);
@ -153,7 +154,7 @@ int opal_btl_openib_connect_base_register(void)
if (NULL == all[i]) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"exclude", opal_proc_local_get()->proc_hostname,
"exclude", opal_process_info.nodename,
"exclude", btl_openib_cpc_exclude, temp[j],
all_cpc_names);
opal_argv_free(temp);
@ -299,7 +300,7 @@ int opal_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
if (0 == cpc_index) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"no cpcs for port", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(btl->device->ib_dev),
btl->port_num, msg);
free(cpcs);
@ -454,8 +455,7 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint)
mca_btl_openib_component.credits_qp;
endpoint->endpoint_cts_frag.super.endpoint = endpoint;
OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
(void*) endpoint->endpoint_cts_frag.super.sg_entry.addr,
endpoint->endpoint_cts_frag.super.sg_entry.length,
endpoint->endpoint_cts_frag.super.sg_entry.lkey));

Просмотреть файл

@ -6,7 +6,7 @@
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
*
* $COPYRIGHT$
*
@ -50,6 +50,7 @@
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_openib_fd.h"
#include "btl_openib_proc.h"
@ -532,7 +533,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"inline truncated", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(contents->openib_btl->device->ib_dev),
contents->openib_btl->port_num,
req_inline, attr.cap.max_inline_data);
@ -888,8 +889,7 @@ static int rdmacm_module_start_connect(opal_btl_openib_connect_base_module_t *cp
(void*) endpoint,
(void*) endpoint->endpoint_local_cpc,
endpoint->endpoint_initiator ? "am" : "am NOT",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
/* If we're the initiator, then open all the QPs */
if (contents->endpoint->endpoint_initiator) {
@ -942,14 +942,14 @@ static void *show_help_cant_find_endpoint(void *context)
msg = stringify(c->peer_ip_addr);
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"could not find matching endpoint", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
c->device_name,
c->peer_tcp_port);
free(msg);
} else {
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"could not find matching endpoint", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
"<unknown>", "<unknown>", -1);
}
free(context);
@ -1032,8 +1032,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
(void*) endpoint,
(void*) endpoint->endpoint_local_cpc,
endpoint->endpoint_initiator ? "am" : "am NOT",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
if (endpoint->endpoint_initiator) {
reject_reason_t reason = REJECT_WRONG_DIRECTION;
@ -1094,8 +1093,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
}
OPAL_OUTPUT((-1, "Posted CTS receiver buffer (%p) for peer %s, qp index %d (QP num %d), WR ID %p, SG addr %p, len %d, lkey %d",
(void*)((uintptr_t*) wr->sg_list[0].addr),
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
qpnum,
endpoint->qps[qpnum].qp->lcl_qp->qp_num,
(void*)((uintptr_t*) wr->wr_id),
@ -1286,8 +1284,7 @@ static void *local_endpoint_cpc_complete(void *context)
mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t *)context;
OPAL_OUTPUT((-1, "MAIN local_endpoint_cpc_complete to %s",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
mca_btl_openib_endpoint_cpc_complete(endpoint);
return NULL;
@ -1307,8 +1304,7 @@ static int rdmacm_connect_endpoint(id_context_t *context,
if (contents->server) {
endpoint = context->endpoint;
OPAL_OUTPUT((-1, "SERVICE Server CPC complete to %s",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
} else {
endpoint = contents->endpoint;
endpoint->rem_info.rem_index =
@ -1323,8 +1319,7 @@ static int rdmacm_connect_endpoint(id_context_t *context,
contents->on_client_list = true;
}
OPAL_OUTPUT((-1, "SERVICE Client CPC complete to %s",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
}
if (NULL == endpoint) {
BTL_ERROR(("Can't find endpoint"));
@ -1337,11 +1332,9 @@ static int rdmacm_connect_endpoint(id_context_t *context,
connected */
if (++data->rdmacm_counter < mca_btl_openib_component.num_qps) {
BTL_VERBOSE(("%s to peer %s, count == %d", contents->server?"server":"client",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter));
OPAL_OUTPUT((-1, "%s to peer %s, count == %d", contents->server?"server":"client",
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter));
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter));
return OPAL_SUCCESS;
}
@ -1578,8 +1571,7 @@ static int finish_connect(id_context_t *context)
OPAL_OUTPUT((-1, "Posted initiator CTS buffer (%p, length %d) for peer %s, qp index %d (QP num %d)",
(void*)((uintptr_t*) wr->sg_list[0].addr),
wr->sg_list[0].length,
(NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal),
context->qpnum,
contents->endpoint->qps[context->qpnum].qp->lcl_qp->qp_num));
}
@ -1651,8 +1643,7 @@ static int finish_connect(id_context_t *context)
(void*) contents->endpoint,
(void*) contents->endpoint->endpoint_local_cpc,
contents->endpoint->endpoint_initiator ? "am" : "am NOT",
(NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname));
opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal)));
rc = rdma_connect(context->id, &conn_param);
if (0 != rc) {
BTL_ERROR(("rdma_connect Failed with %d", rc));
@ -1680,7 +1671,7 @@ static void *show_help_rdmacm_event_error(void *c)
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"rdma cm device removal", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
ibv_get_device_name(event->id->verbs->device));
} else {
const char *device = "Unknown";
@ -1691,11 +1682,10 @@ static void *show_help_rdmacm_event_error(void *c)
}
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"rdma cm event error", true,
opal_proc_local_get()->proc_hostname,
opal_process_info.nodename,
device,
rdma_event_str(event->event),
(NULL == context->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
"unknown" : context->endpoint->endpoint_proc->proc_opal->proc_hostname);
opal_get_proc_hostname(context->endpoint->endpoint_proc->proc_opal));
}
return NULL;

Просмотреть файл

@ -1,9 +1,10 @@
/*
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
*
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -15,6 +16,8 @@
#include "opal/util/show_help.h"
#include "opal/util/sys_limits.h"
#include "opal/util/proc.h"
#include "connect/btl_openib_connect_sl.h"
#include <infiniband/iba/ib_types.h>
@ -113,7 +116,7 @@ static int init_ud_qp(struct ibv_context *context_arg,
if (NULL == cache->cq) {
BTL_ERROR(("error creating cq, errno says %s", strerror(errno)));
opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
__FILE__, __LINE__, "ibv_create_cq",
strerror(errno), errno,
ibv_get_device_name(context_arg->device));

Просмотреть файл

@ -7,6 +7,7 @@
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -59,6 +60,7 @@
#include <pthread.h>
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "opal/util/alfg.h"
@ -1246,7 +1248,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"ibv_create_qp failed", true, opal_proc_local_get()->proc_hostname,
"ibv_create_qp failed", true, opal_process_info.nodename,
ibv_get_device_name(m->btl->device->ib_dev),
"Reliable connected (RC)");
@ -1256,7 +1258,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
if (init_attr.cap.max_inline_data < req_inline) {
lcl_ep->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"inline truncated", true, opal_proc_local_get()->proc_hostname,
"inline truncated", true, opal_process_info.nodename,
ibv_get_device_name(m->btl->device->ib_dev),
m->btl->port_num, req_inline,
init_attr.cap.max_inline_data);

Просмотреть файл

@ -16,6 +16,7 @@
* All rights reserved.
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -513,28 +514,28 @@ set_uniq_paths_for_init_rndv(mca_btl_sm_component_t *comp_ptr)
if (asprintf(&comp_ptr->sm_mpool_ctl_file_name,
"%s"OPAL_PATH_SEP"shared_mem_pool.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_mpool_rndv_file_name,
"%s"OPAL_PATH_SEP"shared_mem_pool_rndv.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_ctl_file_name,
"%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_rndv_file_name,
"%s"OPAL_PATH_SEP"shared_mem_btl_rndv.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
@ -806,10 +807,10 @@ mca_btl_sm_component_init(int *num_btls,
sbuf.st_mode = 0;
}
opal_show_help("help-mpi-btl-sm.txt", "knem permission denied",
true, opal_proc_local_get()->proc_hostname, sbuf.st_mode);
true, opal_process_info.nodename, sbuf.st_mode);
} else {
opal_show_help("help-mpi-btl-sm.txt", "knem fail open",
true, opal_proc_local_get()->proc_hostname, errno,
true, opal_process_info.nodename, errno,
strerror(errno));
}
goto no_knem;
@ -821,13 +822,13 @@ mca_btl_sm_component_init(int *num_btls,
&mca_btl_sm_component.knem_info);
if (rc < 0) {
opal_show_help("help-mpi-btl-sm.txt", "knem get ABI fail",
true, opal_proc_local_get()->proc_hostname, errno,
true, opal_process_info.nodename, errno,
strerror(errno));
goto no_knem;
}
if (KNEM_ABI_VERSION != mca_btl_sm_component.knem_info.abi) {
opal_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch",
true, opal_proc_local_get()->proc_hostname, KNEM_ABI_VERSION,
true, opal_process_info.nodename, KNEM_ABI_VERSION,
mca_btl_sm_component.knem_info.abi);
goto no_knem;
}
@ -849,7 +850,7 @@ mca_btl_sm_component_init(int *num_btls,
KNEM_STATUS_ARRAY_FILE_OFFSET);
if (MAP_FAILED == mca_btl_sm.knem_status_array) {
opal_show_help("help-mpi-btl-sm.txt", "knem mmap fail",
true, opal_proc_local_get()->proc_hostname, errno,
true, opal_process_info.nodename, errno,
strerror(errno));
goto no_knem;
}

Просмотреть файл

@ -15,6 +15,7 @@
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -442,28 +443,28 @@ set_uniq_paths_for_init_rndv(mca_btl_smcuda_component_t *comp_ptr)
if (asprintf(&comp_ptr->sm_mpool_ctl_file_name,
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_mpool_rndv_file_name,
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool_rndv.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_ctl_file_name,
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_module.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}
if (asprintf(&comp_ptr->sm_rndv_file_name,
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_rndv.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname) < 0) {
opal_process_info.nodename) < 0) {
/* rc set */
goto out;
}

Просмотреть файл

@ -58,6 +58,7 @@
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/net.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/mca/btl/btl.h"
@ -193,14 +194,14 @@ static int mca_btl_tcp_component_verify(void)
{
if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v4", opal_proc_local_get()->proc_hostname,
true, "v4", opal_process_info.nodename,
mca_btl_tcp_component.tcp_port_min );
mca_btl_tcp_component.tcp_port_min = 1024;
}
#if OPAL_ENABLE_IPV6
if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v6", opal_proc_local_get()->proc_hostname,
true, "v6", opal_process_info.nodename,
mca_btl_tcp_component.tcp6_port_min );
mca_btl_tcp_component.tcp6_port_min = 1024;
}
@ -479,7 +480,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
str = strchr(argv[i], '/');
if (NULL == str) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
true, name, opal_proc_local_get()->proc_hostname,
true, name, opal_process_info.nodename,
tmp, "Invalid specification (missing \"/\")");
free(argv[i]);
free(tmp);
@ -496,7 +497,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
if (1 != ret) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
true, name, opal_proc_local_get()->proc_hostname, tmp,
true, name, opal_process_info.nodename, tmp,
"Invalid specification (inet_pton() failed)");
free(tmp);
continue;
@ -524,7 +525,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
if (if_index < 0) {
if (reqd || mca_btl_tcp_component.report_all_unfound_interfaces) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
true, name, opal_proc_local_get()->proc_hostname, tmp,
true, name, opal_process_info.nodename, tmp,
"Did not find interface matching this subnet");
}
free(tmp);

Просмотреть файл

@ -53,6 +53,7 @@
#include "opal/mca/event/event.h"
#include "opal/util/net.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "btl_tcp.h"
@ -505,7 +506,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
return OPAL_ERROR;
}
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
true, opal_proc_local_get()->proc_hostname,
true, opal_process_info.nodename,
getpid(),
"did not receive entire connect ACK from peer");
return OPAL_ERR_UNREACH;

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/util/proc.h"
#include "btl_tcp.h"
#include "btl_tcp_proc.h"
@ -379,7 +380,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
int rc, *a = NULL;
size_t i, j;
if (NULL == (proc_hostname = btl_proc->proc_opal->proc_hostname)) {
if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) {
return OPAL_ERR_UNREACH;
}

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,6 +14,7 @@
#include "opal_config.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_usnic_util.h"
#include "btl_usnic_proc.h"
@ -284,7 +286,7 @@ opal_btl_usnic_check_connectivity(opal_btl_usnic_module_t *module,
endpoint->endpoint_remote_addr.cidrmask,
endpoint->endpoint_remote_addr.connectivity_udp_port,
endpoint->endpoint_remote_addr.mac,
endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
endpoint->endpoint_remote_addr.mtu);
endpoint->endpoint_connectivity_checked = true;
}

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -14,6 +15,7 @@
#include <unistd.h>
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_usnic.h"
#include "btl_usnic_util.h"
@ -205,11 +207,7 @@ static void map_output_procs(FILE *fp)
/* Loop over and print the sorted module device information */
for (i = 0; i < num_procs; ++i) {
fprintf(fp, "peer=%d,", opal_process_name_vpid(procs[i]->proc_opal->proc_name));
if (procs[i]->proc_opal->proc_hostname) {
fprintf(fp, "hostname=%s,",
procs[i]->proc_opal->proc_hostname);
}
fprintf(fp, "hostname=%s,", opal_get_proc_hostname(procs[i]->proc_opal));
map_output_endpoints(fp, procs[i]);
}
@ -235,7 +233,7 @@ void opal_btl_usnic_connectivity_map(void)
rank>.txt */
asprintf(&filename, "%s-%s.pid%d.job%d.mcwrank%d.txt",
mca_btl_usnic_component.connectivity_map_prefix,
opal_process_info.nodename,
opal_get_proc_hostname(opal_proc_local_get()),
getpid(),
opal_process_name_jobid(opal_proc_local_get()->proc_name),
opal_process_name_vpid(opal_proc_local_get()->proc_name));

Просмотреть файл

@ -15,6 +15,7 @@
* Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -36,6 +37,7 @@
#include "opal/datatype/opal_convertor.h"
#include "opal/include/opal_stdint.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/btl_base_error.h"
@ -195,7 +197,7 @@ static void add_procs_warn_ah_fail(opal_btl_usnic_module_t *module,
local,
module->if_name,
ibv_get_device_name(module->device),
endpoint->endpoint_proc->proc_opal->proc_hostname,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
remote);
}

Просмотреть файл

@ -29,6 +29,7 @@
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/proc.h"
#include "btl_usnic.h"
#include "btl_usnic_proc.h"
@ -248,7 +249,7 @@ static int create_proc(opal_proc_t *opal_proc,
"transport mismatch",
true,
opal_process_info.nodename,
proc->proc_opal->proc_hostname);
opal_get_proc_hostname(proc->proc_opal));
OBJ_RELEASE(proc);
return OPAL_ERR_BAD_PARAM;
}
@ -628,8 +629,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
ibv_get_device_name(module->device),
module->if_name,
module->if_mtu,
(NULL == proc->proc_opal->proc_hostname) ?
"unknown" : proc->proc_opal->proc_hostname,
opal_get_proc_hostname(proc->proc_opal),
proc->proc_modex[*index_out].mtu);
*index_out = -1;
return OPAL_ERR_UNREACH;

Просмотреть файл

@ -255,6 +255,7 @@ opal_init_util(int* pargc, char*** pargv)
{
int ret;
char *error = NULL;
char hostname[512];
if( ++opal_util_initialized != 1 ) {
if( opal_util_initialized < 1 ) {
@ -263,6 +264,13 @@ opal_init_util(int* pargc, char*** pargv)
return OPAL_SUCCESS;
}
/* set the nodename right away so anyone who needs it has it. Note
* that we don't bother with fqdn and prefix issues here - we let
* the RTE later replace this with a modified name if the user
* requests it */
gethostname(hostname, 512);
opal_process_info.nodename = strdup(hostname);
/* initialize the memory allocator */
opal_malloc_init();

Просмотреть файл

@ -3,6 +3,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2013 Inria. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -17,9 +18,9 @@
#include "opal/mca/pmix/pmix.h"
opal_process_info_t opal_process_info = {
.nodename = "not yet named",
.job_session_dir = "not yet defined",
.proc_session_dir = "not yet defined",
.nodename = NULL,
.job_session_dir = NULL,
.proc_session_dir = NULL,
.num_local_peers = 0, /* there is nobody else but me */
.my_local_rank = 0, /* I'm the only process around here */
#if OPAL_HAVE_HWLOC
@ -34,7 +35,7 @@ static opal_proc_t opal_local_proc = {
0,
0,
NULL,
"localhost - unnamed"
NULL
};
static opal_proc_t* opal_proc_my_name = &opal_local_proc;
@ -120,3 +121,34 @@ char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_
uint32_t (*opal_process_name_vpid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
uint32_t (*opal_process_name_jobid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
char* opal_get_proc_hostname(const opal_proc_t *proc)
{
int ret;
/* if the proc is NULL, then we can't know */
if (NULL == proc) {
return "unknown";
}
/* if it is my own hostname we are after, then just hand back
* the value in opal_process_info */
if (proc == opal_proc_my_name) {
return opal_process_info.nodename;
}
/* see if we already have the data - if so, pass it back */
if (NULL != proc->proc_hostname) {
return proc->proc_hostname;
}
/* if we don't already have it, then try to get it */
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, proc,
(char**)&(proc->proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
OPAL_ERROR_LOG(ret);
return "unknown"; // return something so the caller doesn't segfault
}
/* user is not allowed to release the data */
return proc->proc_hostname;
}

Просмотреть файл

@ -108,4 +108,10 @@ OPAL_DECLSPEC extern uint32_t (*opal_process_name_jobid)(const opal_process_name
#define OPAL_PROC_MY_HOSTNAME (opal_proc_local_get()->proc_hostname)
#define OPAL_NAME_INVALID 0xffffffffffffffff
/* provide a safe way to retrieve the hostname of a proc, including
* our own. This is to be used by all BTLs so we don't retrieve hostnames
* unless needed. The returned value MUST NOT be free'd as it is
* owned by the proc_t */
OPAL_DECLSPEC char* opal_get_proc_hostname(const opal_proc_t *proc);
#endif /* OPAL_PROC_H */

Просмотреть файл

@ -170,6 +170,15 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
goto error;
}
/* we may have modified the local nodename according to
* request to retain/strip the FQDN and prefix, so update
* it here. The OPAL layer will strdup the hostname, so
* we have to free it first to avoid a memory leak */
if (NULL != opal_process_info.nodename) {
free(opal_process_info.nodename);
}
opal_process_info.nodename = orte_process_info.nodename;
/* setup the dstore framework */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
@ -232,6 +241,18 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
goto error;
}
/* set the remaining opal_process_info fields. Note that
* the OPAL layer will have initialized these to NULL, and
* anyone between us would not have strdup'd the string, so
* we cannot free it here */
opal_process_info.job_session_dir = orte_process_info.job_session_dir;
opal_process_info.proc_session_dir = orte_process_info.proc_session_dir;
opal_process_info.num_local_peers = (int32_t)orte_process_info.num_local_peers;
opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank;
#if OPAL_HAVE_HWLOC
opal_process_info.cpuset = orte_process_info.cpuset;
#endif /* OPAL_HAVE_HWLOC */
#if OPAL_ENABLE_TIMING
opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
#endif