Cleanup some cruft resulting from the move of the btl's to opal. We had created the ability to delay modex operations, which included a need to delay retrieving hostname info for remote procs. This allowed us to not retrieve the modex info until first message unless required - the hostname is generally only required for debug and error messages.
Properly setup the opal_process_info structure early in the initialization procedure. Define the local hostname right at the beginning of opal_init so all parts of opal can use it. Overlay that during orte_init as the user may choose to remove fqdn and strip prefixes during that time. Setup the job_session_dir and other such info immediately when it becomes available during orte_init.
Этот коммит содержится в:
родитель
b44a244fbc
Коммит
fd6a044b7f
@ -476,7 +476,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
/* no select is required as this is a static framework */
|
||||
|
||||
/* Setup RTE - note that we are an MPI process */
|
||||
/* Setup RTE */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init(NULL, NULL))) {
|
||||
error = "ompi_mpi_init: ompi_rte_init failed";
|
||||
goto error;
|
||||
@ -498,15 +498,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
#endif
|
||||
|
||||
opal_process_info.nodename = ompi_process_info.nodename;
|
||||
opal_process_info.job_session_dir = ompi_process_info.job_session_dir;
|
||||
opal_process_info.proc_session_dir = ompi_process_info.proc_session_dir;
|
||||
opal_process_info.num_local_peers = (int32_t)ompi_process_info.num_local_peers;
|
||||
opal_process_info.my_local_rank = (int32_t)ompi_process_info.my_local_rank;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
opal_process_info.cpuset = ompi_process_info.cpuset;
|
||||
#endif /* OPAL_HAVE_HWLOC */
|
||||
|
||||
/* Register the default errhandler callback - RTE will ignore if it
|
||||
* doesn't support this capability
|
||||
*/
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,6 +26,7 @@
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "base.h"
|
||||
#include "btl_base_error.h"
|
||||
@ -64,7 +66,7 @@ void mca_btl_base_error_no_nics(const char* transport,
|
||||
asprintf(&procid, "%s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
|
||||
opal_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||
true, procid, transport, opal_proc_local_get()->proc_hostname,
|
||||
true, procid, transport, opal_process_info.nodename,
|
||||
nic_name);
|
||||
free(procid);
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -36,53 +36,54 @@ OPAL_DECLSPEC extern int mca_btl_base_verbose;
|
||||
OPAL_DECLSPEC extern int mca_btl_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
|
||||
OPAL_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
|
||||
|
||||
#define BTL_OUTPUT(args) \
|
||||
do { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
opal_proc_local_get()->proc_hostname, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
mca_btl_base_out("\n"); \
|
||||
} while(0);
|
||||
#define BTL_OUTPUT(args) \
|
||||
do { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
opal_process_info.nodename, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
mca_btl_base_out("\n"); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#define BTL_ERROR(args) \
|
||||
do { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
opal_proc_local_get()->proc_hostname, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} while(0);
|
||||
#define BTL_ERROR(args) \
|
||||
do { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
opal_process_info.nodename, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} while(0);
|
||||
|
||||
#define BTL_PEER_ERROR(proc, args) \
|
||||
do { \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__, \
|
||||
opal_proc_local_get()->proc_hostname); \
|
||||
if(proc) { \
|
||||
mca_btl_base_err("to: %s ", proc->proc_hostname); \
|
||||
} \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} while(0);
|
||||
#define BTL_PEER_ERROR(proc, args) \
|
||||
do { \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__, \
|
||||
opal_process_info.nodename); \
|
||||
if (proc) { \
|
||||
mca_btl_base_err("to: %s ", \
|
||||
opal_get_proc_hostname(proc)); \
|
||||
} \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
#define BTL_VERBOSE(args) \
|
||||
do { \
|
||||
if(mca_btl_base_verbose > 0) { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
opal_proc_local_get()->proc_hostname, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} \
|
||||
} while(0);
|
||||
#define BTL_VERBOSE(args) \
|
||||
do { \
|
||||
if(mca_btl_base_verbose > 0) { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
opal_process_info.nodename, \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
} \
|
||||
} while(0);
|
||||
#else
|
||||
#define BTL_VERBOSE(args)
|
||||
#endif
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -34,6 +34,7 @@
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/include/opal_stdint.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/mca/btl/btl.h"
|
||||
@ -142,13 +143,13 @@ void mca_btl_openib_show_init_error(const char *file, int line,
|
||||
}
|
||||
|
||||
opal_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
file, line, func, dev, str_limit);
|
||||
|
||||
if (NULL != str_limit) free(str_limit);
|
||||
} else {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
file, line, func, strerror(errno), errno, dev);
|
||||
}
|
||||
}
|
||||
@ -473,13 +474,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
if(mca_btl_openib_get_transport_type(openib_btl) != endpoint->rem_info.rem_transport_type) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"conflicting transport types", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
mca_btl_openib_transport_name_strings[mca_btl_openib_get_transport_type(openib_btl)],
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
endpoint->rem_info.rem_vendor_id,
|
||||
endpoint->rem_info.rem_vendor_part_id,
|
||||
mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]);
|
||||
@ -495,7 +495,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
OPAL_ERR_NOT_FOUND != ret) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
return ret;
|
||||
}
|
||||
@ -539,13 +539,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
recv_qps)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"unsupported queues configuration", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown": endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
endpoint->rem_info.rem_vendor_id,
|
||||
endpoint->rem_info.rem_vendor_part_id,
|
||||
recv_qps);
|
||||
@ -562,13 +561,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
||||
values.receive_queues)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"unsupported queues configuration", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
(openib_btl->device->ib_dev_attr).vendor_id,
|
||||
(openib_btl->device->ib_dev_attr).vendor_part_id,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown": endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
endpoint->rem_info.rem_vendor_id,
|
||||
endpoint->rem_info.rem_vendor_part_id,
|
||||
values.receive_queues);
|
||||
@ -679,7 +677,7 @@ static uint64_t calculate_max_reg (void)
|
||||
action = "Your MPI job will continue, but may be behave poorly and/or hang.";
|
||||
}
|
||||
opal_show_help("help-mpi-btl-openib.txt", "reg mem limit low", true,
|
||||
opal_proc_local_get()->proc_hostname, (unsigned long)(max_reg >> 20),
|
||||
opal_process_info.nodename, (unsigned long)(max_reg >> 20),
|
||||
(unsigned long)(mem_total >> 20), action);
|
||||
return 0; /* signal that we can't have enough memory */
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -24,6 +25,8 @@
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "opal/mca/btl/base/base.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_mca.h"
|
||||
@ -405,14 +408,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
case IBV_EVENT_PATH_MIG_ERR:
|
||||
case IBV_EVENT_SRQ_ERR:
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
break;
|
||||
case IBV_EVENT_PORT_ERR:
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
@ -442,7 +445,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
break;
|
||||
default:
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||
true,opal_proc_local_get()->proc_hostname, (int)getpid(),
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type, xrc_event ? "true" : "false");
|
||||
}
|
||||
ibv_ack_async_event(&event);
|
||||
|
@ -84,6 +84,7 @@
|
||||
#include "opal/runtime/opal_params.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
@ -540,8 +541,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
|
||||
break;
|
||||
case MCA_BTL_OPENIB_CONTROL_CTS:
|
||||
OPAL_OUTPUT((-1, "received CTS from %s (buffer %p): posted recvs %d, sent cts %d",
|
||||
(NULL == ep->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : ep->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(ep->endpoint_proc->proc_opal),
|
||||
(void*) ctl_hdr,
|
||||
ep->endpoint_posted_recvs, ep->endpoint_cts_sent));
|
||||
ep->endpoint_cts_received = true;
|
||||
@ -676,7 +676,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
if (mca_btl_openib_component.gid_index >
|
||||
ib_port_attr->gid_tbl_len) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "gid index too large",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev), port_num,
|
||||
mca_btl_openib_component.gid_index,
|
||||
ib_port_attr->gid_tbl_len);
|
||||
@ -734,7 +734,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, opal_proc_local_get()->proc_hostname);
|
||||
true, opal_process_info.nodename);
|
||||
}
|
||||
|
||||
lmc = (1 << ib_port_attr->lmc);
|
||||
@ -1200,7 +1200,7 @@ static int setup_qps(void)
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"no qps in receive_queues", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OPAL_ERROR;
|
||||
goto error;
|
||||
@ -1219,7 +1219,7 @@ static int setup_qps(void)
|
||||
num_xrc_qps++;
|
||||
#else
|
||||
opal_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OPAL_ERR_NOT_AVAILABLE;
|
||||
goto error;
|
||||
@ -1227,7 +1227,7 @@ static int setup_qps(void)
|
||||
} else {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
@ -1239,7 +1239,7 @@ static int setup_qps(void)
|
||||
and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1248,7 +1248,7 @@ static int setup_qps(void)
|
||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1279,7 +1279,7 @@ static int setup_qps(void)
|
||||
if (count < 3 || count > 6) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
opal_proc_local_get()->proc_hostname, queues[qp]);
|
||||
opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1310,7 +1310,7 @@ static int setup_qps(void)
|
||||
if (count < 3 || count > 7) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid srq specification", true,
|
||||
opal_proc_local_get()->proc_hostname, queues[qp]);
|
||||
opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1346,14 +1346,14 @@ static int setup_qps(void)
|
||||
|
||||
if (rd_num < rd_init) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "rd_num must be >= rd_init",
|
||||
true, opal_proc_local_get()->proc_hostname, queues[qp]);
|
||||
true, opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (rd_num < srq_limit) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "srq_limit must be > rd_num",
|
||||
true, opal_proc_local_get()->proc_hostname, queues[qp]);
|
||||
true, opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1365,7 +1365,7 @@ static int setup_qps(void)
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, opal_proc_local_get()->proc_hostname, queues[qp]);
|
||||
true, opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1386,21 +1386,21 @@ static int setup_qps(void)
|
||||
if (max_qp_size < max_size_needed) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too small", true,
|
||||
opal_proc_local_get()->proc_hostname, max_qp_size,
|
||||
opal_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too big", true,
|
||||
opal_proc_local_get()->proc_hostname, max_qp_size,
|
||||
opal_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
@ -1514,7 +1514,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
if (mca_btl_openib_component.warn_no_device_params_found) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"no device params found", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
device->ib_dev_attr.vendor_id,
|
||||
device->ib_dev_attr.vendor_part_id);
|
||||
@ -1997,7 +1997,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.opaldatadir,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(receive_queues_device->ib_dev),
|
||||
receive_queues_device->ib_dev_attr.vendor_id,
|
||||
receive_queues_device->ib_dev_attr.vendor_part_id,
|
||||
@ -2021,7 +2021,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"locally conflicting receive_queues", true,
|
||||
opal_install_dirs.opaldatadir,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(receive_queues_device->ib_dev),
|
||||
receive_queues_device->ib_dev_attr.vendor_id,
|
||||
receive_queues_device->ib_dev_attr.vendor_part_id,
|
||||
@ -2059,7 +2059,7 @@ error:
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
|
||||
@ -2402,7 +2402,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"ptmalloc2 with no threads", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
#endif
|
||||
@ -2517,7 +2517,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (mca_btl_openib_component.want_fork_support > 0) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork_init fail", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
}
|
||||
@ -2636,7 +2636,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
free(dev_sorted);
|
||||
if (!found) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
|
||||
"InfiniBand" :
|
||||
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
|
||||
@ -2653,7 +2653,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_component.warn_nonexistent_if) {
|
||||
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
||||
opal_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
((NULL != mca_btl_openib_component.if_include) ?
|
||||
"in" : "ex"), str);
|
||||
free(str);
|
||||
@ -2665,7 +2665,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (num_devices_intentionally_ignored < num_devs) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"no active ports found", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
}
|
||||
goto no_btls;
|
||||
}
|
||||
@ -3394,11 +3394,7 @@ error:
|
||||
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
|
||||
IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||
const char *peer_hostname;
|
||||
if (endpoint->endpoint_proc->proc_opal && endpoint->endpoint_proc->proc_opal->proc_hostname) {
|
||||
peer_hostname = endpoint->endpoint_proc->proc_opal->proc_hostname;
|
||||
} else {
|
||||
peer_hostname = "<unknown -- please run with mpi_keep_peer_hostnames=1>";
|
||||
}
|
||||
peer_hostname = opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal);
|
||||
const char *device_name =
|
||||
ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device);
|
||||
|
||||
@ -3410,21 +3406,21 @@ error:
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"pp rnr retry exceeded",
|
||||
true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
device_name,
|
||||
peer_hostname);
|
||||
} else {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"srq rnr retry exceeded",
|
||||
true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
device_name,
|
||||
peer_hostname);
|
||||
}
|
||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2011 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
@ -38,6 +38,7 @@
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/class/ompi_free_list.h"
|
||||
|
||||
@ -507,8 +508,7 @@ static void cts_sent(mca_btl_base_module_t* btl,
|
||||
/* Nothing to do/empty function (we can't pass in a NULL pointer
|
||||
for the des_cbfunc) */
|
||||
OPAL_OUTPUT((-1, "CTS send to %s completed",
|
||||
(NULL == ep->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : ep->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(ep->endpoint_proc->proc_opal)));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -523,8 +523,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
|
||||
mca_btl_openib_control_header_t *ctl_hdr;
|
||||
|
||||
OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
mca_btl_openib_component.credits_qp,
|
||||
endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num));
|
||||
sc_frag = alloc_control_frag(endpoint->endpoint_btl);
|
||||
@ -594,8 +593,7 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
|
||||
transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type);
|
||||
#endif
|
||||
OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
transport_type_ib_p,
|
||||
endpoint->endpoint_initiator,
|
||||
endpoint->endpoint_cts_received));
|
||||
@ -608,15 +606,13 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
|
||||
mark us as connected */
|
||||
if (endpoint->endpoint_cts_received) {
|
||||
OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
mca_btl_openib_endpoint_connected(endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT((-1, "cpc_complete to %s -- done",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1054,7 +1050,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
|
||||
if (NULL == btl || NULL == btl->error_cb) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"cannot raise btl error", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
__FILE__, __LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Copyright (c) 2008 Chelsio, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
@ -21,6 +22,7 @@
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "connect/connect.h"
|
||||
@ -198,7 +200,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
NULL != temp[2]) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
opal_proc_local_get()->proc_hostname, list[i],
|
||||
opal_process_info.nodename, list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
@ -209,7 +211,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
opal_proc_local_get()->proc_hostname, list[i],
|
||||
opal_process_info.nodename, list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
@ -240,7 +242,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
NULL != temp[2]) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
opal_proc_local_get()->proc_hostname, list[i],
|
||||
opal_process_info.nodename, list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
@ -251,7 +253,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
opal_proc_local_get()->proc_hostname, list[i],
|
||||
opal_process_info.nodename, list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
|
@ -18,6 +18,7 @@
|
||||
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -34,6 +35,8 @@
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_mca.h"
|
||||
#include "btl_openib_ini.h"
|
||||
@ -621,13 +624,13 @@ int btl_openib_register_mca_params(void)
|
||||
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"CUDA_no_gdr_support", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.driver_have_gdr) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"driver_no_gdr_support", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
@ -733,7 +736,7 @@ int btl_openib_verify_mca_params (void)
|
||||
if (1 == mca_btl_openib_component.want_fork_support) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork requested but not supported", true,
|
||||
opal_proc_local_get()->proc_hostname);
|
||||
opal_process_info.nodename);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
#endif
|
||||
@ -778,7 +781,7 @@ int btl_openib_verify_mca_params (void)
|
||||
if(mca_btl_openib_component.buffer_alignment <= 1 ||
|
||||
(mca_btl_openib_component.buffer_alignment & (mca_btl_openib_component.buffer_alignment - 1))) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
||||
true, mca_btl_openib_component.buffer_alignment, opal_proc_local_get()->proc_hostname, 64);
|
||||
true, mca_btl_openib_component.buffer_alignment, opal_process_info.nodename, 64);
|
||||
mca_btl_openib_component.buffer_alignment = 64;
|
||||
}
|
||||
|
||||
@ -801,7 +804,7 @@ int btl_openib_verify_mca_params (void)
|
||||
mca_btl_openib_component.driver_have_gdr) {
|
||||
if (1 == mca_btl_openib_component.want_fork_support) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "no_fork_with_gdr",
|
||||
true, opal_proc_local_get()->proc_hostname);
|
||||
true, opal_process_info.nodename);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
if (-1 == mca_btl_openib_component.want_fork_support) {
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -27,6 +27,7 @@
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
/*
|
||||
@ -127,7 +128,7 @@ int opal_btl_openib_connect_base_register(void)
|
||||
if (NULL == all[i]) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"include", opal_proc_local_get()->proc_hostname,
|
||||
"include", opal_process_info.nodename,
|
||||
"include", btl_openib_cpc_include, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -153,7 +154,7 @@ int opal_btl_openib_connect_base_register(void)
|
||||
if (NULL == all[i]) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"exclude", opal_proc_local_get()->proc_hostname,
|
||||
"exclude", opal_process_info.nodename,
|
||||
"exclude", btl_openib_cpc_exclude, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -299,7 +300,7 @@ int opal_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
||||
if (0 == cpc_index) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"no cpcs for port", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num, msg);
|
||||
free(cpcs);
|
||||
@ -454,8 +455,7 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint)
|
||||
mca_btl_openib_component.credits_qp;
|
||||
endpoint->endpoint_cts_frag.super.endpoint = endpoint;
|
||||
OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
(void*) endpoint->endpoint_cts_frag.super.sg_entry.addr,
|
||||
endpoint->endpoint_cts_frag.super.sg_entry.length,
|
||||
endpoint->endpoint_cts_frag.super.sg_entry.lkey));
|
||||
|
@ -6,7 +6,7 @@
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -50,6 +50,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_openib_fd.h"
|
||||
#include "btl_openib_proc.h"
|
||||
@ -532,7 +533,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
|
||||
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
||||
contents->openib_btl->port_num,
|
||||
req_inline, attr.cap.max_inline_data);
|
||||
@ -888,8 +889,7 @@ static int rdmacm_module_start_connect(opal_btl_openib_connect_base_module_t *cp
|
||||
(void*) endpoint,
|
||||
(void*) endpoint->endpoint_local_cpc,
|
||||
endpoint->endpoint_initiator ? "am" : "am NOT",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
|
||||
/* If we're the initiator, then open all the QPs */
|
||||
if (contents->endpoint->endpoint_initiator) {
|
||||
@ -942,14 +942,14 @@ static void *show_help_cant_find_endpoint(void *context)
|
||||
msg = stringify(c->peer_ip_addr);
|
||||
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
c->device_name,
|
||||
c->peer_tcp_port);
|
||||
free(msg);
|
||||
} else {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
"<unknown>", "<unknown>", -1);
|
||||
}
|
||||
free(context);
|
||||
@ -1032,8 +1032,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
|
||||
(void*) endpoint,
|
||||
(void*) endpoint->endpoint_local_cpc,
|
||||
endpoint->endpoint_initiator ? "am" : "am NOT",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
if (endpoint->endpoint_initiator) {
|
||||
reject_reason_t reason = REJECT_WRONG_DIRECTION;
|
||||
|
||||
@ -1094,8 +1093,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
|
||||
}
|
||||
OPAL_OUTPUT((-1, "Posted CTS receiver buffer (%p) for peer %s, qp index %d (QP num %d), WR ID %p, SG addr %p, len %d, lkey %d",
|
||||
(void*)((uintptr_t*) wr->sg_list[0].addr),
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
qpnum,
|
||||
endpoint->qps[qpnum].qp->lcl_qp->qp_num,
|
||||
(void*)((uintptr_t*) wr->wr_id),
|
||||
@ -1286,8 +1284,7 @@ static void *local_endpoint_cpc_complete(void *context)
|
||||
mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t *)context;
|
||||
|
||||
OPAL_OUTPUT((-1, "MAIN local_endpoint_cpc_complete to %s",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
mca_btl_openib_endpoint_cpc_complete(endpoint);
|
||||
|
||||
return NULL;
|
||||
@ -1307,8 +1304,7 @@ static int rdmacm_connect_endpoint(id_context_t *context,
|
||||
if (contents->server) {
|
||||
endpoint = context->endpoint;
|
||||
OPAL_OUTPUT((-1, "SERVICE Server CPC complete to %s",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
} else {
|
||||
endpoint = contents->endpoint;
|
||||
endpoint->rem_info.rem_index =
|
||||
@ -1323,8 +1319,7 @@ static int rdmacm_connect_endpoint(id_context_t *context,
|
||||
contents->on_client_list = true;
|
||||
}
|
||||
OPAL_OUTPUT((-1, "SERVICE Client CPC complete to %s",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
|
||||
}
|
||||
if (NULL == endpoint) {
|
||||
BTL_ERROR(("Can't find endpoint"));
|
||||
@ -1337,11 +1332,9 @@ static int rdmacm_connect_endpoint(id_context_t *context,
|
||||
connected */
|
||||
if (++data->rdmacm_counter < mca_btl_openib_component.num_qps) {
|
||||
BTL_VERBOSE(("%s to peer %s, count == %d", contents->server?"server":"client",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter));
|
||||
OPAL_OUTPUT((-1, "%s to peer %s, count == %d", contents->server?"server":"client",
|
||||
(NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter));
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter));
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1578,8 +1571,7 @@ static int finish_connect(id_context_t *context)
|
||||
OPAL_OUTPUT((-1, "Posted initiator CTS buffer (%p, length %d) for peer %s, qp index %d (QP num %d)",
|
||||
(void*)((uintptr_t*) wr->sg_list[0].addr),
|
||||
wr->sg_list[0].length,
|
||||
(NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal),
|
||||
context->qpnum,
|
||||
contents->endpoint->qps[context->qpnum].qp->lcl_qp->qp_num));
|
||||
}
|
||||
@ -1651,8 +1643,7 @@ static int finish_connect(id_context_t *context)
|
||||
(void*) contents->endpoint,
|
||||
(void*) contents->endpoint->endpoint_local_cpc,
|
||||
contents->endpoint->endpoint_initiator ? "am" : "am NOT",
|
||||
(NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname));
|
||||
opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal)));
|
||||
rc = rdma_connect(context->id, &conn_param);
|
||||
if (0 != rc) {
|
||||
BTL_ERROR(("rdma_connect Failed with %d", rc));
|
||||
@ -1680,7 +1671,7 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm device removal", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
ibv_get_device_name(event->id->verbs->device));
|
||||
} else {
|
||||
const char *device = "Unknown";
|
||||
@ -1691,11 +1682,10 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
}
|
||||
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm event error", true,
|
||||
opal_proc_local_get()->proc_hostname,
|
||||
opal_process_info.nodename,
|
||||
device,
|
||||
rdma_event_str(event->event),
|
||||
(NULL == context->endpoint->endpoint_proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : context->endpoint->endpoint_proc->proc_opal->proc_hostname);
|
||||
opal_get_proc_hostname(context->endpoint->endpoint_proc->proc_opal));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -1,9 +1,10 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -15,6 +16,8 @@
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/sys_limits.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "connect/btl_openib_connect_sl.h"
|
||||
#include <infiniband/iba/ib_types.h>
|
||||
|
||||
@ -113,7 +116,7 @@ static int init_ud_qp(struct ibv_context *context_arg,
|
||||
if (NULL == cache->cq) {
|
||||
BTL_ERROR(("error creating cq, errno says %s", strerror(errno)));
|
||||
opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
__FILE__, __LINE__, "ibv_create_cq",
|
||||
strerror(errno), errno,
|
||||
ibv_get_device_name(context_arg->device));
|
||||
|
@ -7,6 +7,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -59,6 +60,7 @@
|
||||
#include <pthread.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/alfg.h"
|
||||
@ -1246,7 +1248,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
|
||||
|
||||
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true, opal_proc_local_get()->proc_hostname,
|
||||
"ibv_create_qp failed", true, opal_process_info.nodename,
|
||||
ibv_get_device_name(m->btl->device->ib_dev),
|
||||
"Reliable connected (RC)");
|
||||
|
||||
@ -1256,7 +1258,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
|
||||
if (init_attr.cap.max_inline_data < req_inline) {
|
||||
lcl_ep->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, opal_proc_local_get()->proc_hostname,
|
||||
"inline truncated", true, opal_process_info.nodename,
|
||||
ibv_get_device_name(m->btl->device->ib_dev),
|
||||
m->btl->port_num, req_inline,
|
||||
init_attr.cap.max_inline_data);
|
||||
|
@ -16,6 +16,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -513,28 +514,28 @@ set_uniq_paths_for_init_rndv(mca_btl_sm_component_t *comp_ptr)
|
||||
if (asprintf(&comp_ptr->sm_mpool_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_mpool_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_pool_rndv.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_btl_rndv.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
@ -806,10 +807,10 @@ mca_btl_sm_component_init(int *num_btls,
|
||||
sbuf.st_mode = 0;
|
||||
}
|
||||
opal_show_help("help-mpi-btl-sm.txt", "knem permission denied",
|
||||
true, opal_proc_local_get()->proc_hostname, sbuf.st_mode);
|
||||
true, opal_process_info.nodename, sbuf.st_mode);
|
||||
} else {
|
||||
opal_show_help("help-mpi-btl-sm.txt", "knem fail open",
|
||||
true, opal_proc_local_get()->proc_hostname, errno,
|
||||
true, opal_process_info.nodename, errno,
|
||||
strerror(errno));
|
||||
}
|
||||
goto no_knem;
|
||||
@ -821,13 +822,13 @@ mca_btl_sm_component_init(int *num_btls,
|
||||
&mca_btl_sm_component.knem_info);
|
||||
if (rc < 0) {
|
||||
opal_show_help("help-mpi-btl-sm.txt", "knem get ABI fail",
|
||||
true, opal_proc_local_get()->proc_hostname, errno,
|
||||
true, opal_process_info.nodename, errno,
|
||||
strerror(errno));
|
||||
goto no_knem;
|
||||
}
|
||||
if (KNEM_ABI_VERSION != mca_btl_sm_component.knem_info.abi) {
|
||||
opal_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch",
|
||||
true, opal_proc_local_get()->proc_hostname, KNEM_ABI_VERSION,
|
||||
true, opal_process_info.nodename, KNEM_ABI_VERSION,
|
||||
mca_btl_sm_component.knem_info.abi);
|
||||
goto no_knem;
|
||||
}
|
||||
@ -849,7 +850,7 @@ mca_btl_sm_component_init(int *num_btls,
|
||||
KNEM_STATUS_ARRAY_FILE_OFFSET);
|
||||
if (MAP_FAILED == mca_btl_sm.knem_status_array) {
|
||||
opal_show_help("help-mpi-btl-sm.txt", "knem mmap fail",
|
||||
true, opal_proc_local_get()->proc_hostname, errno,
|
||||
true, opal_process_info.nodename, errno,
|
||||
strerror(errno));
|
||||
goto no_knem;
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -442,28 +443,28 @@ set_uniq_paths_for_init_rndv(mca_btl_smcuda_component_t *comp_ptr)
|
||||
if (asprintf(&comp_ptr->sm_mpool_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_mpool_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool_rndv.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_module.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_rndv.%s",
|
||||
opal_process_info.job_session_dir,
|
||||
opal_proc_local_get()->proc_hostname) < 0) {
|
||||
opal_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
|
@ -58,6 +58,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/constants.h"
|
||||
#include "opal/mca/btl/btl.h"
|
||||
@ -193,14 +194,14 @@ static int mca_btl_tcp_component_verify(void)
|
||||
{
|
||||
if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v4", opal_proc_local_get()->proc_hostname,
|
||||
true, "v4", opal_process_info.nodename,
|
||||
mca_btl_tcp_component.tcp_port_min );
|
||||
mca_btl_tcp_component.tcp_port_min = 1024;
|
||||
}
|
||||
#if OPAL_ENABLE_IPV6
|
||||
if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
|
||||
true, "v6", opal_proc_local_get()->proc_hostname,
|
||||
true, "v6", opal_process_info.nodename,
|
||||
mca_btl_tcp_component.tcp6_port_min );
|
||||
mca_btl_tcp_component.tcp6_port_min = 1024;
|
||||
}
|
||||
@ -479,7 +480,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
|
||||
str = strchr(argv[i], '/');
|
||||
if (NULL == str) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, opal_proc_local_get()->proc_hostname,
|
||||
true, name, opal_process_info.nodename,
|
||||
tmp, "Invalid specification (missing \"/\")");
|
||||
free(argv[i]);
|
||||
free(tmp);
|
||||
@ -496,7 +497,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
|
||||
|
||||
if (1 != ret) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, opal_proc_local_get()->proc_hostname, tmp,
|
||||
true, name, opal_process_info.nodename, tmp,
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
free(tmp);
|
||||
continue;
|
||||
@ -524,7 +525,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd)
|
||||
if (if_index < 0) {
|
||||
if (reqd || mca_btl_tcp_component.report_all_unfound_interfaces) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||
true, name, opal_proc_local_get()->proc_hostname, tmp,
|
||||
true, name, opal_process_info.nodename, tmp,
|
||||
"Did not find interface matching this subnet");
|
||||
}
|
||||
free(tmp);
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/mca/btl/base/btl_base_error.h"
|
||||
|
||||
#include "btl_tcp.h"
|
||||
@ -505,7 +506,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
|
||||
true, opal_proc_local_get()->proc_hostname,
|
||||
true, opal_process_info.nodename,
|
||||
getpid(),
|
||||
"did not receive entire connect ACK from peer");
|
||||
return OPAL_ERR_UNREACH;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_tcp.h"
|
||||
#include "btl_tcp_proc.h"
|
||||
@ -379,7 +380,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
||||
int rc, *a = NULL;
|
||||
size_t i, j;
|
||||
|
||||
if (NULL == (proc_hostname = btl_proc->proc_opal->proc_hostname)) {
|
||||
if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) {
|
||||
return OPAL_ERR_UNREACH;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -13,6 +14,7 @@
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_usnic_util.h"
|
||||
#include "btl_usnic_proc.h"
|
||||
@ -284,7 +286,7 @@ opal_btl_usnic_check_connectivity(opal_btl_usnic_module_t *module,
|
||||
endpoint->endpoint_remote_addr.cidrmask,
|
||||
endpoint->endpoint_remote_addr.connectivity_udp_port,
|
||||
endpoint->endpoint_remote_addr.mac,
|
||||
endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
endpoint->endpoint_remote_addr.mtu);
|
||||
endpoint->endpoint_connectivity_checked = true;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -14,6 +15,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_usnic.h"
|
||||
#include "btl_usnic_util.h"
|
||||
@ -205,11 +207,7 @@ static void map_output_procs(FILE *fp)
|
||||
/* Loop over and print the sorted module device information */
|
||||
for (i = 0; i < num_procs; ++i) {
|
||||
fprintf(fp, "peer=%d,", opal_process_name_vpid(procs[i]->proc_opal->proc_name));
|
||||
if (procs[i]->proc_opal->proc_hostname) {
|
||||
fprintf(fp, "hostname=%s,",
|
||||
procs[i]->proc_opal->proc_hostname);
|
||||
}
|
||||
|
||||
fprintf(fp, "hostname=%s,", opal_get_proc_hostname(procs[i]->proc_opal));
|
||||
map_output_endpoints(fp, procs[i]);
|
||||
}
|
||||
|
||||
@ -235,7 +233,7 @@ void opal_btl_usnic_connectivity_map(void)
|
||||
rank>.txt */
|
||||
asprintf(&filename, "%s-%s.pid%d.job%d.mcwrank%d.txt",
|
||||
mca_btl_usnic_component.connectivity_map_prefix,
|
||||
opal_process_info.nodename,
|
||||
opal_get_proc_hostname(opal_proc_local_get()),
|
||||
getpid(),
|
||||
opal_process_name_jobid(opal_proc_local_get()->proc_name),
|
||||
opal_process_name_vpid(opal_proc_local_get()->proc_name));
|
||||
|
@ -15,6 +15,7 @@
|
||||
* Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -36,6 +37,7 @@
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/include/opal_stdint.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "opal/mca/btl/btl.h"
|
||||
#include "opal/mca/btl/base/btl_base_error.h"
|
||||
@ -195,7 +197,7 @@ static void add_procs_warn_ah_fail(opal_btl_usnic_module_t *module,
|
||||
local,
|
||||
module->if_name,
|
||||
ibv_get_device_name(module->device),
|
||||
endpoint->endpoint_proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
|
||||
remote);
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/constants.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "btl_usnic.h"
|
||||
#include "btl_usnic_proc.h"
|
||||
@ -248,7 +249,7 @@ static int create_proc(opal_proc_t *opal_proc,
|
||||
"transport mismatch",
|
||||
true,
|
||||
opal_process_info.nodename,
|
||||
proc->proc_opal->proc_hostname);
|
||||
opal_get_proc_hostname(proc->proc_opal));
|
||||
OBJ_RELEASE(proc);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
@ -628,8 +629,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
|
||||
ibv_get_device_name(module->device),
|
||||
module->if_name,
|
||||
module->if_mtu,
|
||||
(NULL == proc->proc_opal->proc_hostname) ?
|
||||
"unknown" : proc->proc_opal->proc_hostname,
|
||||
opal_get_proc_hostname(proc->proc_opal),
|
||||
proc->proc_modex[*index_out].mtu);
|
||||
*index_out = -1;
|
||||
return OPAL_ERR_UNREACH;
|
||||
|
@ -255,6 +255,7 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
char hostname[512];
|
||||
|
||||
if( ++opal_util_initialized != 1 ) {
|
||||
if( opal_util_initialized < 1 ) {
|
||||
@ -263,6 +264,13 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* set the nodename right away so anyone who needs it has it. Note
|
||||
* that we don't bother with fqdn and prefix issues here - we let
|
||||
* the RTE later replace this with a modified name if the user
|
||||
* requests it */
|
||||
gethostname(hostname, 512);
|
||||
opal_process_info.nodename = strdup(hostname);
|
||||
|
||||
/* initialize the memory allocator */
|
||||
opal_malloc_init();
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Inria. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -17,9 +18,9 @@
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
opal_process_info_t opal_process_info = {
|
||||
.nodename = "not yet named",
|
||||
.job_session_dir = "not yet defined",
|
||||
.proc_session_dir = "not yet defined",
|
||||
.nodename = NULL,
|
||||
.job_session_dir = NULL,
|
||||
.proc_session_dir = NULL,
|
||||
.num_local_peers = 0, /* there is nobody else but me */
|
||||
.my_local_rank = 0, /* I'm the only process around here */
|
||||
#if OPAL_HAVE_HWLOC
|
||||
@ -34,7 +35,7 @@ static opal_proc_t opal_local_proc = {
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
"localhost - unnamed"
|
||||
NULL
|
||||
};
|
||||
static opal_proc_t* opal_proc_my_name = &opal_local_proc;
|
||||
|
||||
@ -120,3 +121,34 @@ char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_
|
||||
uint32_t (*opal_process_name_vpid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
|
||||
uint32_t (*opal_process_name_jobid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
|
||||
|
||||
char* opal_get_proc_hostname(const opal_proc_t *proc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* if the proc is NULL, then we can't know */
|
||||
if (NULL == proc) {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/* if it is my own hostname we are after, then just hand back
|
||||
* the value in opal_process_info */
|
||||
if (proc == opal_proc_my_name) {
|
||||
return opal_process_info.nodename;
|
||||
}
|
||||
|
||||
/* see if we already have the data - if so, pass it back */
|
||||
if (NULL != proc->proc_hostname) {
|
||||
return proc->proc_hostname;
|
||||
}
|
||||
|
||||
/* if we don't already have it, then try to get it */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, proc,
|
||||
(char**)&(proc->proc_hostname), OPAL_STRING);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
OPAL_ERROR_LOG(ret);
|
||||
return "unknown"; // return something so the caller doesn't segfault
|
||||
}
|
||||
|
||||
/* user is not allowed to release the data */
|
||||
return proc->proc_hostname;
|
||||
}
|
||||
|
@ -108,4 +108,10 @@ OPAL_DECLSPEC extern uint32_t (*opal_process_name_jobid)(const opal_process_name
|
||||
#define OPAL_PROC_MY_HOSTNAME (opal_proc_local_get()->proc_hostname)
|
||||
#define OPAL_NAME_INVALID 0xffffffffffffffff
|
||||
|
||||
/* provide a safe way to retrieve the hostname of a proc, including
|
||||
* our own. This is to be used by all BTLs so we don't retrieve hostnames
|
||||
* unless needed. The returned value MUST NOT be free'd as it is
|
||||
* owned by the proc_t */
|
||||
OPAL_DECLSPEC char* opal_get_proc_hostname(const opal_proc_t *proc);
|
||||
|
||||
#endif /* OPAL_PROC_H */
|
||||
|
@ -170,6 +170,15 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* we may have modified the local nodename according to
|
||||
* request to retain/strip the FQDN and prefix, so update
|
||||
* it here. The OPAL layer will strdup the hostname, so
|
||||
* we have to free it first to avoid a memory leak */
|
||||
if (NULL != opal_process_info.nodename) {
|
||||
free(opal_process_info.nodename);
|
||||
}
|
||||
opal_process_info.nodename = orte_process_info.nodename;
|
||||
|
||||
/* setup the dstore framework */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -232,6 +241,18 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* set the remaining opal_process_info fields. Note that
|
||||
* the OPAL layer will have initialized these to NULL, and
|
||||
* anyone between us would not have strdup'd the string, so
|
||||
* we cannot free it here */
|
||||
opal_process_info.job_session_dir = orte_process_info.job_session_dir;
|
||||
opal_process_info.proc_session_dir = orte_process_info.proc_session_dir;
|
||||
opal_process_info.num_local_peers = (int32_t)orte_process_info.num_local_peers;
|
||||
opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
opal_process_info.cpuset = orte_process_info.cpuset;
|
||||
#endif /* OPAL_HAVE_HWLOC */
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
#endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user