From fb1f487d85295f334983ac672bb7200767f5b0fe Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 3 Oct 2014 14:19:48 -0700 Subject: [PATCH] Cleanup some cruft resulting from the move of the btl's to opal. We had created the ability to delay modex operations, which included a need to delay retrieving hostname info for remote procs. This allowed us to not retrieve the modex info until first message unless required - the hostname is generally only required for debug and error messages. Properly setup the opal_process_info structure early in the initialization procedure. Define the local hostname right at the beginning of opal_init so all parts of opal can use it. Overlay that during orte_init as the user may choose to remove fqdn and strip prefixes during that time. Setup the job_session_dir and other such info immediately when it becomes available during orte_init. --- ompi/runtime/ompi_mpi_init.c | 11 +-- opal/mca/btl/base/btl_base_error.c | 4 +- opal/mca/btl/base/btl_base_error.h | 85 ++++++++++--------- opal/mca/btl/openib/btl_openib.c | 26 +++--- opal/mca/btl/openib/btl_openib_async.c | 9 +- opal/mca/btl/openib/btl_openib_component.c | 64 +++++++------- opal/mca/btl/openib/btl_openib_endpoint.c | 20 ++--- opal/mca/btl/openib/btl_openib_ip.c | 10 ++- opal/mca/btl/openib/btl_openib_mca.c | 13 +-- .../openib/connect/btl_openib_connect_base.c | 12 +-- .../connect/btl_openib_connect_rdmacm.c | 46 ++++------ .../openib/connect/btl_openib_connect_sl.c | 7 +- .../openib/connect/btl_openib_connect_udcm.c | 6 +- opal/mca/btl/sm/btl_sm_component.c | 19 +++-- opal/mca/btl/smcuda/btl_smcuda_component.c | 9 +- opal/mca/btl/tcp/btl_tcp_component.c | 11 +-- opal/mca/btl/tcp/btl_tcp_endpoint.c | 3 +- opal/mca/btl/tcp/btl_tcp_proc.c | 3 +- opal/mca/btl/usnic/btl_usnic_connectivity.h | 6 +- opal/mca/btl/usnic/btl_usnic_map.c | 10 +-- opal/mca/btl/usnic/btl_usnic_module.c | 4 +- opal/mca/btl/usnic/btl_usnic_proc.c | 6 +- opal/runtime/opal_init.c | 8 ++ opal/util/proc.c | 40 ++++++++- opal/util/proc.h | 6 ++ orte/runtime/orte_init.c | 21 +++++ 26 files changed, 260 insertions(+), 199 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 958286a3c3..12f9b63568 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -476,7 +476,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } /* no select is required as this is a static framework */ - /* Setup RTE - note that we are an MPI process */ + /* Setup RTE */ if (OMPI_SUCCESS != (ret = ompi_rte_init(NULL, NULL))) { error = "ompi_mpi_init: ompi_rte_init failed"; goto error; @@ -498,15 +498,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } #endif - opal_process_info.nodename = ompi_process_info.nodename; - opal_process_info.job_session_dir = ompi_process_info.job_session_dir; - opal_process_info.proc_session_dir = ompi_process_info.proc_session_dir; - opal_process_info.num_local_peers = (int32_t)ompi_process_info.num_local_peers; - opal_process_info.my_local_rank = (int32_t)ompi_process_info.my_local_rank; -#if OPAL_HAVE_HWLOC - opal_process_info.cpuset = ompi_process_info.cpuset; -#endif /* OPAL_HAVE_HWLOC */ - /* Register the default errhandler callback - RTE will ignore if it * doesn't support this capability */ diff --git a/opal/mca/btl/base/btl_base_error.c b/opal/mca/btl/base/btl_base_error.c index 5bd81a3877..ec437b1afa 100644 --- a/opal/mca/btl/base/btl_base_error.c +++ b/opal/mca/btl/base/btl_base_error.c @@ -12,6 +12,7 @@ * Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +26,7 @@ #include #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "base.h" #include "btl_base_error.h" @@ -64,7 +66,7 @@ void mca_btl_base_error_no_nics(const char* transport, asprintf(&procid, "%s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", - true, procid, transport, opal_proc_local_get()->proc_hostname, + true, procid, transport, opal_process_info.nodename, nic_name); free(procid); } diff --git a/opal/mca/btl/base/btl_base_error.h b/opal/mca/btl/base/btl_base_error.h index 2f91510bbd..58ac9378cf 100644 --- a/opal/mca/btl/base/btl_base_error.h +++ b/opal/mca/btl/base/btl_base_error.h @@ -13,7 +13,7 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,53 +36,54 @@ OPAL_DECLSPEC extern int mca_btl_base_verbose; OPAL_DECLSPEC extern int mca_btl_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2); OPAL_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_format__(__printf__, 1, 2); -#define BTL_OUTPUT(args) \ -do { \ - mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ - opal_proc_local_get()->proc_hostname, \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_btl_base_out args; \ - mca_btl_base_out("\n"); \ -} while(0); +#define BTL_OUTPUT(args) \ + do { \ + mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ + opal_process_info.nodename, \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, __func__); \ + mca_btl_base_out args; \ + mca_btl_base_out("\n"); \ + } while(0); -#define BTL_ERROR(args) \ -do { \ - mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ - opal_proc_local_get()->proc_hostname, \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_btl_base_err args; \ - mca_btl_base_err("\n"); \ -} while(0); +#define BTL_ERROR(args) \ + do { \ + mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ + opal_process_info.nodename, \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, __func__); \ + mca_btl_base_err args; \ + mca_btl_base_err("\n"); \ + } while(0); -#define BTL_PEER_ERROR(proc, args) \ -do { \ - mca_btl_base_err("%s[%s:%d:%s] from %s ", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__, \ - opal_proc_local_get()->proc_hostname); \ - if(proc) { \ - mca_btl_base_err("to: %s ", proc->proc_hostname); \ - } \ - mca_btl_base_err args; \ - mca_btl_base_err("\n"); \ -} while(0); +#define BTL_PEER_ERROR(proc, args) \ + do { \ + mca_btl_base_err("%s[%s:%d:%s] from %s ", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, __func__, \ + opal_process_info.nodename); \ + if (proc) { \ + mca_btl_base_err("to: %s ", \ + opal_get_proc_hostname(proc)); \ + } \ + mca_btl_base_err args; \ + mca_btl_base_err("\n"); \ + } while(0); #if OPAL_ENABLE_DEBUG -#define BTL_VERBOSE(args) \ -do { \ - if(mca_btl_base_verbose > 0) { \ - mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ - opal_proc_local_get()->proc_hostname, \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_btl_base_err args; \ - mca_btl_base_err("\n"); \ - } \ -} while(0); +#define BTL_VERBOSE(args) \ + do { \ + if(mca_btl_base_verbose > 0) { \ + mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ + opal_process_info.nodename, \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, __func__); \ + mca_btl_base_err args; \ + mca_btl_base_err("\n"); \ + } \ + } while(0); #else #define BTL_VERBOSE(args) #endif diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 3927f0f8e7..de83da447c 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -17,7 +17,7 @@ * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -34,6 +34,7 @@ #include "opal/class/opal_bitmap.h" #include "opal/util/output.h" #include "opal/util/arch.h" +#include "opal/util/proc.h" #include "opal/include/opal_stdint.h" #include "opal/util/show_help.h" #include "opal/mca/btl/btl.h" @@ -142,13 +143,13 @@ void mca_btl_openib_show_init_error(const char *file, int line, } opal_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, file, line, func, dev, str_limit); if (NULL != str_limit) free(str_limit); } else { opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, file, line, func, strerror(errno), errno, dev); } } @@ -473,13 +474,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, if(mca_btl_openib_get_transport_type(openib_btl) != endpoint->rem_info.rem_transport_type) { opal_show_help("help-mpi-btl-openib.txt", "conflicting transport types", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), (openib_btl->device->ib_dev_attr).vendor_id, (openib_btl->device->ib_dev_attr).vendor_part_id, mca_btl_openib_transport_name_strings[mca_btl_openib_get_transport_type(openib_btl)], - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), endpoint->rem_info.rem_vendor_id, endpoint->rem_info.rem_vendor_part_id, mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]); @@ -495,7 +495,7 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, OPAL_ERR_NOT_FOUND != ret) { opal_show_help("help-mpi-btl-openib.txt", "error in device init", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev)); return ret; } @@ -539,13 +539,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, recv_qps)) { opal_show_help("help-mpi-btl-openib.txt", "unsupported queues configuration", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), (openib_btl->device->ib_dev_attr).vendor_id, (openib_btl->device->ib_dev_attr).vendor_part_id, mca_btl_openib_component.receive_queues, - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown": endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), endpoint->rem_info.rem_vendor_id, endpoint->rem_info.rem_vendor_part_id, recv_qps); @@ -562,13 +561,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, values.receive_queues)) { opal_show_help("help-mpi-btl-openib.txt", "unsupported queues configuration", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), (openib_btl->device->ib_dev_attr).vendor_id, (openib_btl->device->ib_dev_attr).vendor_part_id, mca_btl_openib_component.receive_queues, - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown": endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), endpoint->rem_info.rem_vendor_id, endpoint->rem_info.rem_vendor_part_id, values.receive_queues); @@ -679,7 +677,7 @@ static uint64_t calculate_max_reg (void) action = "Your MPI job will continue, but may be behave poorly and/or hang."; } opal_show_help("help-mpi-btl-openib.txt", "reg mem limit low", true, - opal_proc_local_get()->proc_hostname, (unsigned long)(max_reg >> 20), + opal_process_info.nodename, (unsigned long)(max_reg >> 20), (unsigned long)(mem_total >> 20), action); return 0; /* signal that we can't have enough memory */ } diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index 7f9e1327f9..0e19ffdf2e 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -6,6 +6,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +25,8 @@ #include #include "opal/util/show_help.h" +#include "opal/util/proc.h" + #include "opal/mca/btl/base/base.h" #include "btl_openib.h" #include "btl_openib_mca.h" @@ -405,14 +408,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_SRQ_ERR: opal_show_help("help-mpi-btl-openib.txt", "of error event", - true,opal_proc_local_get()->proc_hostname, (int)getpid(), + true,opal_process_info.nodename, (int)getpid(), event_type, openib_event_to_str((enum ibv_event_type)event_type), xrc_event ? "true" : "false"); break; case IBV_EVENT_PORT_ERR: opal_show_help("help-mpi-btl-openib.txt", "of error event", - true,opal_proc_local_get()->proc_hostname, (int)getpid(), + true,opal_process_info.nodename, (int)getpid(), event_type, openib_event_to_str((enum ibv_event_type)event_type), xrc_event ? "true" : "false"); @@ -442,7 +445,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po break; default: opal_show_help("help-mpi-btl-openib.txt", "of unknown event", - true,opal_proc_local_get()->proc_hostname, (int)getpid(), + true,opal_process_info.nodename, (int)getpid(), event_type, xrc_event ? "true" : "false"); } ibv_ack_async_event(&event); diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index caa62f90e1..d876e215d5 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -84,6 +84,7 @@ #include "opal/runtime/opal_params.h" #include "opal/runtime/opal.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/proc.h" #include "btl_openib.h" #include "btl_openib_frag.h" @@ -540,8 +541,7 @@ static void btl_openib_control(mca_btl_base_module_t* btl, break; case MCA_BTL_OPENIB_CONTROL_CTS: OPAL_OUTPUT((-1, "received CTS from %s (buffer %p): posted recvs %d, sent cts %d", - (NULL == ep->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : ep->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(ep->endpoint_proc->proc_opal), (void*) ctl_hdr, ep->endpoint_posted_recvs, ep->endpoint_cts_sent)); ep->endpoint_cts_received = true; @@ -676,7 +676,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, if (mca_btl_openib_component.gid_index > ib_port_attr->gid_tbl_len) { opal_show_help("help-mpi-btl-openib.txt", "gid index too large", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, ibv_get_device_name(device->ib_dev), port_num, mca_btl_openib_component.gid_index, ib_port_attr->gid_tbl_len); @@ -734,7 +734,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, IB_DEFAULT_GID_PREFIX == subnet_id && mca_btl_openib_component.warn_default_gid_prefix) { opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix", - true, opal_proc_local_get()->proc_hostname); + true, opal_process_info.nodename); } lmc = (1 << ib_port_attr->lmc); @@ -1200,7 +1200,7 @@ static int setup_qps(void) if (0 == opal_argv_count(queues)) { opal_show_help("help-mpi-btl-openib.txt", "no qps in receive_queues", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, mca_btl_openib_component.receive_queues); ret = OPAL_ERROR; goto error; @@ -1219,7 +1219,7 @@ static int setup_qps(void) num_xrc_qps++; #else opal_show_help("help-mpi-btl-openib.txt", "No XRC support", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, mca_btl_openib_component.receive_queues); ret = OPAL_ERR_NOT_AVAILABLE; goto error; @@ -1227,7 +1227,7 @@ static int setup_qps(void) } else { opal_show_help("help-mpi-btl-openib.txt", "invalid qp type in receive_queues", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, mca_btl_openib_component.receive_queues, queues[qp]); ret = OPAL_ERR_BAD_PARAM; @@ -1239,7 +1239,7 @@ static int setup_qps(void) and SRQ */ if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) { opal_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, mca_btl_openib_component.receive_queues); ret = OPAL_ERR_BAD_PARAM; goto error; @@ -1248,7 +1248,7 @@ static int setup_qps(void) /* Current XRC implementation can't used with btls_per_lid > 1 */ if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) { opal_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, mca_btl_openib_component.receive_queues, num_xrc_qps); ret = OPAL_ERR_BAD_PARAM; goto error; @@ -1279,7 +1279,7 @@ static int setup_qps(void) if (count < 3 || count > 6) { opal_show_help("help-mpi-btl-openib.txt", "invalid pp qp specification", true, - opal_proc_local_get()->proc_hostname, queues[qp]); + opal_process_info.nodename, queues[qp]); ret = OPAL_ERR_BAD_PARAM; goto error; } @@ -1310,7 +1310,7 @@ static int setup_qps(void) if (count < 3 || count > 7) { opal_show_help("help-mpi-btl-openib.txt", "invalid srq specification", true, - opal_proc_local_get()->proc_hostname, queues[qp]); + opal_process_info.nodename, queues[qp]); ret = OPAL_ERR_BAD_PARAM; goto error; } @@ -1346,14 +1346,14 @@ static int setup_qps(void) if (rd_num < rd_init) { opal_show_help("help-mpi-btl-openib.txt", "rd_num must be >= rd_init", - true, opal_proc_local_get()->proc_hostname, queues[qp]); + true, opal_process_info.nodename, queues[qp]); ret = OPAL_ERR_BAD_PARAM; goto error; } if (rd_num < srq_limit) { opal_show_help("help-mpi-btl-openib.txt", "srq_limit must be > rd_num", - true, opal_proc_local_get()->proc_hostname, queues[qp]); + true, opal_process_info.nodename, queues[qp]); ret = OPAL_ERR_BAD_PARAM; goto error; } @@ -1365,7 +1365,7 @@ static int setup_qps(void) if (rd_num <= rd_low) { opal_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low", - true, opal_proc_local_get()->proc_hostname, queues[qp]); + true, opal_process_info.nodename, queues[qp]); ret = OPAL_ERR_BAD_PARAM; goto error; } @@ -1386,21 +1386,21 @@ static int setup_qps(void) if (max_qp_size < max_size_needed) { opal_show_help("help-mpi-btl-openib.txt", "biggest qp size is too small", true, - opal_proc_local_get()->proc_hostname, max_qp_size, + opal_process_info.nodename, max_qp_size, max_size_needed); ret = OPAL_ERR_BAD_PARAM; goto error; } else if (max_qp_size > max_size_needed) { opal_show_help("help-mpi-btl-openib.txt", "biggest qp size is too big", true, - opal_proc_local_get()->proc_hostname, max_qp_size, + opal_process_info.nodename, max_qp_size, max_size_needed); } if (mca_btl_openib_component.ib_free_list_max > 0 && min_freelist_size > mca_btl_openib_component.ib_free_list_max) { opal_show_help("help-mpi-btl-openib.txt", "freelist too small", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, mca_btl_openib_component.ib_free_list_max, min_freelist_size); ret = OPAL_ERR_BAD_PARAM; @@ -1514,7 +1514,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) if (mca_btl_openib_component.warn_no_device_params_found) { opal_show_help("help-mpi-btl-openib.txt", "no device params found", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(device->ib_dev), device->ib_dev_attr.vendor_id, device->ib_dev_attr.vendor_part_id); @@ -1997,7 +1997,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) opal_show_help("help-mpi-btl-openib.txt", "locally conflicting receive_queues", true, opal_install_dirs.opaldatadir, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(receive_queues_device->ib_dev), receive_queues_device->ib_dev_attr.vendor_id, receive_queues_device->ib_dev_attr.vendor_part_id, @@ -2021,7 +2021,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) opal_show_help("help-mpi-btl-openib.txt", "locally conflicting receive_queues", true, opal_install_dirs.opaldatadir, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(receive_queues_device->ib_dev), receive_queues_device->ib_dev_attr.vendor_id, receive_queues_device->ib_dev_attr.vendor_part_id, @@ -2059,7 +2059,7 @@ error: if (OPAL_SUCCESS != ret) { opal_show_help("help-mpi-btl-openib.txt", "error in device init", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(device->ib_dev)); } @@ -2402,7 +2402,7 @@ btl_openib_component_init(int *num_btl_modules, ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) { opal_show_help("help-mpi-btl-openib.txt", "ptmalloc2 with no threads", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); goto no_btls; } #endif @@ -2517,7 +2517,7 @@ btl_openib_component_init(int *num_btl_modules, if (mca_btl_openib_component.want_fork_support > 0) { opal_show_help("help-mpi-btl-openib.txt", "ibv_fork_init fail", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); goto no_btls; } } @@ -2636,7 +2636,7 @@ btl_openib_component_init(int *num_btl_modules, free(dev_sorted); if (!found) { opal_show_help("help-mpi-btl-openib.txt", "no devices right type", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, ((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ? "InfiniBand" : (BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ? @@ -2653,7 +2653,7 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.warn_nonexistent_if) { char *str = opal_argv_join(mca_btl_openib_component.if_list, ','); opal_show_help("help-mpi-btl-openib.txt", "nonexistent port", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, ((NULL != mca_btl_openib_component.if_include) ? "in" : "ex"), str); free(str); @@ -2665,7 +2665,7 @@ btl_openib_component_init(int *num_btl_modules, if (num_devices_intentionally_ignored < num_devs) { opal_show_help("help-mpi-btl-openib.txt", "no active ports found", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); } goto no_btls; } @@ -3394,11 +3394,7 @@ error: if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status || IBV_WC_RETRY_EXC_ERR == wc->status) { const char *peer_hostname; - if (endpoint->endpoint_proc->proc_opal && endpoint->endpoint_proc->proc_opal->proc_hostname) { - peer_hostname = endpoint->endpoint_proc->proc_opal->proc_hostname; - } else { - peer_hostname = ""; - } + peer_hostname = opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal); const char *device_name = ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device); @@ -3410,21 +3406,21 @@ error: opal_show_help("help-mpi-btl-openib.txt", "pp rnr retry exceeded", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, device_name, peer_hostname); } else { opal_show_help("help-mpi-btl-openib.txt", "srq rnr retry exceeded", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, device_name, peer_hostname); } } else if (IBV_WC_RETRY_EXC_ERR == wc->status) { opal_show_help("help-mpi-btl-openib.txt", "pp retry exceeded", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, device_name, peer_hostname); } } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index e1e7af4293..e0e43ef286 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -17,7 +17,7 @@ * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2010-2011 IBM Corporation. All rights reserved. * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * * $COPYRIGHT$ @@ -38,6 +38,7 @@ #include "opal_stdint.h" #include "opal/util/output.h" +#include "opal/util/proc.h" #include "opal/util/show_help.h" #include "opal/class/ompi_free_list.h" @@ -507,8 +508,7 @@ static void cts_sent(mca_btl_base_module_t* btl, /* Nothing to do/empty function (we can't pass in a NULL pointer for the des_cbfunc) */ OPAL_OUTPUT((-1, "CTS send to %s completed", - (NULL == ep->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : ep->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(ep->endpoint_proc->proc_opal))); } /* @@ -523,8 +523,7 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint) mca_btl_openib_control_header_t *ctl_hdr; OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), mca_btl_openib_component.credits_qp, endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num)); sc_frag = alloc_control_frag(endpoint->endpoint_btl); @@ -594,8 +593,7 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint) transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type); #endif OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), transport_type_ib_p, endpoint->endpoint_initiator, endpoint->endpoint_cts_received)); @@ -608,15 +606,13 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint) mark us as connected */ if (endpoint->endpoint_cts_received) { OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); mca_btl_openib_endpoint_connected(endpoint); } } OPAL_OUTPUT((-1, "cpc_complete to %s -- done", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); return; } @@ -1054,7 +1050,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context) if (NULL == btl || NULL == btl->error_cb) { opal_show_help("help-mpi-btl-openib.txt", "cannot raise btl error", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, __FILE__, __LINE__); exit(1); } diff --git a/opal/mca/btl/openib/btl_openib_ip.c b/opal/mca/btl/openib/btl_openib_ip.c index 0c86cb434a..2589890153 100644 --- a/opal/mca/btl/openib/btl_openib_ip.c +++ b/opal/mca/btl/openib/btl_openib_ip.c @@ -2,6 +2,7 @@ * Copyright (c) 2008 Chelsio, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * * Additional copyrights may follow * @@ -21,6 +22,7 @@ #include "opal/util/argv.h" #include "opal/util/if.h" +#include "opal/util/proc.h" #include "opal/util/show_help.h" #include "connect/connect.h" @@ -198,7 +200,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) NULL != temp[2]) { opal_show_help("help-mpi-btl-openib.txt", "invalid ipaddr_inexclude", true, "include", - opal_proc_local_get()->proc_hostname, list[i], + opal_process_info.nodename, list[i], "Invalid specification (missing \"/\")"); if (NULL != temp) { opal_argv_free(temp); @@ -209,7 +211,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) { opal_show_help("help-mpi-btl-openib.txt", "invalid ipaddr_inexclude", true, "include", - opal_proc_local_get()->proc_hostname, list[i], + opal_process_info.nodename, list[i], "Invalid specification (inet_pton() failed)"); opal_argv_free(temp); continue; @@ -240,7 +242,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) NULL != temp[2]) { opal_show_help("help-mpi-btl-openib.txt", "invalid ipaddr_inexclude", true, "exclude", - opal_proc_local_get()->proc_hostname, list[i], + opal_process_info.nodename, list[i], "Invalid specification (missing \"/\")"); if (NULL != temp) { opal_argv_free(temp); @@ -251,7 +253,7 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) { opal_show_help("help-mpi-btl-openib.txt", "invalid ipaddr_inexclude", true, "exclude", - opal_proc_local_get()->proc_hostname, list[i], + opal_process_info.nodename, list[i], "Invalid specification (inet_pton() failed)"); opal_argv_free(temp); continue; diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 19e5ccc808..42ea77f9ff 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -18,6 +18,7 @@ * Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +35,8 @@ #include "opal/util/os_dirpath.h" #include "opal/util/output.h" #include "opal/util/show_help.h" +#include "opal/util/proc.h" + #include "btl_openib.h" #include "btl_openib_mca.h" #include "btl_openib_ini.h" @@ -621,13 +624,13 @@ int btl_openib_register_mca_params(void) if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) { opal_show_help("help-mpi-btl-openib.txt", "CUDA_no_gdr_support", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); return OPAL_ERROR; } if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.driver_have_gdr) { opal_show_help("help-mpi-btl-openib.txt", "driver_no_gdr_support", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); return OPAL_ERROR; } #if OPAL_CUDA_GDR_SUPPORT @@ -733,7 +736,7 @@ int btl_openib_verify_mca_params (void) if (1 == mca_btl_openib_component.want_fork_support) { opal_show_help("help-mpi-btl-openib.txt", "ibv_fork requested but not supported", true, - opal_proc_local_get()->proc_hostname); + opal_process_info.nodename); return OPAL_ERR_BAD_PARAM; } #endif @@ -778,7 +781,7 @@ int btl_openib_verify_mca_params (void) if(mca_btl_openib_component.buffer_alignment <= 1 || (mca_btl_openib_component.buffer_alignment & (mca_btl_openib_component.buffer_alignment - 1))) { opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment", - true, mca_btl_openib_component.buffer_alignment, opal_proc_local_get()->proc_hostname, 64); + true, mca_btl_openib_component.buffer_alignment, opal_process_info.nodename, 64); mca_btl_openib_component.buffer_alignment = 64; } @@ -801,7 +804,7 @@ int btl_openib_verify_mca_params (void) mca_btl_openib_component.driver_have_gdr) { if (1 == mca_btl_openib_component.want_fork_support) { opal_show_help("help-mpi-btl-openib.txt", "no_fork_with_gdr", - true, opal_proc_local_get()->proc_hostname); + true, opal_process_info.nodename); return OPAL_ERR_BAD_PARAM; } if (-1 == mca_btl_openib_component.want_fork_support) { diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_base.c b/opal/mca/btl/openib/connect/btl_openib_connect_base.c index 1a93ff873b..e92105cd42 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_base.c @@ -4,7 +4,7 @@ * Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -27,6 +27,7 @@ #include "opal/util/argv.h" #include "opal/util/output.h" +#include "opal/util/proc.h" #include "opal/util/show_help.h" /* @@ -127,7 +128,7 @@ int opal_btl_openib_connect_base_register(void) if (NULL == all[i]) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "cpc name not found", true, - "include", opal_proc_local_get()->proc_hostname, + "include", opal_process_info.nodename, "include", btl_openib_cpc_include, temp[j], all_cpc_names); opal_argv_free(temp); @@ -153,7 +154,7 @@ int opal_btl_openib_connect_base_register(void) if (NULL == all[i]) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "cpc name not found", true, - "exclude", opal_proc_local_get()->proc_hostname, + "exclude", opal_process_info.nodename, "exclude", btl_openib_cpc_exclude, temp[j], all_cpc_names); opal_argv_free(temp); @@ -299,7 +300,7 @@ int opal_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t * if (0 == cpc_index) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "no cpcs for port", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(btl->device->ib_dev), btl->port_num, msg); free(cpcs); @@ -454,8 +455,7 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) mca_btl_openib_component.credits_qp; endpoint->endpoint_cts_frag.super.endpoint = endpoint; OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), (void*) endpoint->endpoint_cts_frag.super.sg_entry.addr, endpoint->endpoint_cts_frag.super.sg_entry.length, endpoint->endpoint_cts_frag.super.sg_entry.lkey)); diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index ae8efb380a..4f69531d60 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -6,7 +6,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -50,6 +50,7 @@ #include "opal/util/output.h" #include "opal/util/error.h" #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "btl_openib_fd.h" #include "btl_openib_proc.h" @@ -532,7 +533,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents, endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data; opal_show_help("help-mpi-btl-openib-cpc-base.txt", "inline truncated", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(contents->openib_btl->device->ib_dev), contents->openib_btl->port_num, req_inline, attr.cap.max_inline_data); @@ -888,8 +889,7 @@ static int rdmacm_module_start_connect(opal_btl_openib_connect_base_module_t *cp (void*) endpoint, (void*) endpoint->endpoint_local_cpc, endpoint->endpoint_initiator ? "am" : "am NOT", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); /* If we're the initiator, then open all the QPs */ if (contents->endpoint->endpoint_initiator) { @@ -942,14 +942,14 @@ static void *show_help_cant_find_endpoint(void *context) msg = stringify(c->peer_ip_addr); opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt", "could not find matching endpoint", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, c->device_name, c->peer_tcp_port); free(msg); } else { opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt", "could not find matching endpoint", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, "", "", -1); } free(context); @@ -1032,8 +1032,7 @@ static int handle_connect_request(struct rdma_cm_event *event) (void*) endpoint, (void*) endpoint->endpoint_local_cpc, endpoint->endpoint_initiator ? "am" : "am NOT", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); if (endpoint->endpoint_initiator) { reject_reason_t reason = REJECT_WRONG_DIRECTION; @@ -1094,8 +1093,7 @@ static int handle_connect_request(struct rdma_cm_event *event) } OPAL_OUTPUT((-1, "Posted CTS receiver buffer (%p) for peer %s, qp index %d (QP num %d), WR ID %p, SG addr %p, len %d, lkey %d", (void*)((uintptr_t*) wr->sg_list[0].addr), - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), qpnum, endpoint->qps[qpnum].qp->lcl_qp->qp_num, (void*)((uintptr_t*) wr->wr_id), @@ -1286,8 +1284,7 @@ static void *local_endpoint_cpc_complete(void *context) mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t *)context; OPAL_OUTPUT((-1, "MAIN local_endpoint_cpc_complete to %s", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); mca_btl_openib_endpoint_cpc_complete(endpoint); return NULL; @@ -1307,8 +1304,7 @@ static int rdmacm_connect_endpoint(id_context_t *context, if (contents->server) { endpoint = context->endpoint; OPAL_OUTPUT((-1, "SERVICE Server CPC complete to %s", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); } else { endpoint = contents->endpoint; endpoint->rem_info.rem_index = @@ -1323,8 +1319,7 @@ static int rdmacm_connect_endpoint(id_context_t *context, contents->on_client_list = true; } OPAL_OUTPUT((-1, "SERVICE Client CPC complete to %s", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal))); } if (NULL == endpoint) { BTL_ERROR(("Can't find endpoint")); @@ -1337,11 +1332,9 @@ static int rdmacm_connect_endpoint(id_context_t *context, connected */ if (++data->rdmacm_counter < mca_btl_openib_component.num_qps) { BTL_VERBOSE(("%s to peer %s, count == %d", contents->server?"server":"client", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter)); OPAL_OUTPUT((-1, "%s to peer %s, count == %d", contents->server?"server":"client", - (NULL == endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : endpoint->endpoint_proc->proc_opal->proc_hostname, data->rdmacm_counter)); + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), data->rdmacm_counter)); return OPAL_SUCCESS; } @@ -1578,8 +1571,7 @@ static int finish_connect(id_context_t *context) OPAL_OUTPUT((-1, "Posted initiator CTS buffer (%p, length %d) for peer %s, qp index %d (QP num %d)", (void*)((uintptr_t*) wr->sg_list[0].addr), wr->sg_list[0].length, - (NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal), context->qpnum, contents->endpoint->qps[context->qpnum].qp->lcl_qp->qp_num)); } @@ -1651,8 +1643,7 @@ static int finish_connect(id_context_t *context) (void*) contents->endpoint, (void*) contents->endpoint->endpoint_local_cpc, contents->endpoint->endpoint_initiator ? "am" : "am NOT", - (NULL == contents->endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : contents->endpoint->endpoint_proc->proc_opal->proc_hostname)); + opal_get_proc_hostname(contents->endpoint->endpoint_proc->proc_opal))); rc = rdma_connect(context->id, &conn_param); if (0 != rc) { BTL_ERROR(("rdma_connect Failed with %d", rc)); @@ -1680,7 +1671,7 @@ static void *show_help_rdmacm_event_error(void *c) if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) { opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt", "rdma cm device removal", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, ibv_get_device_name(event->id->verbs->device)); } else { const char *device = "Unknown"; @@ -1691,11 +1682,10 @@ static void *show_help_rdmacm_event_error(void *c) } opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt", "rdma cm event error", true, - opal_proc_local_get()->proc_hostname, + opal_process_info.nodename, device, rdma_event_str(event->event), - (NULL == context->endpoint->endpoint_proc->proc_opal->proc_hostname) ? - "unknown" : context->endpoint->endpoint_proc->proc_opal->proc_hostname); + opal_get_proc_hostname(context->endpoint->endpoint_proc->proc_opal)); } return NULL; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_sl.c b/opal/mca/btl/openib/connect/btl_openib_connect_sl.c index 19481a34c1..0d7b01b6d7 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_sl.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_sl.c @@ -1,9 +1,10 @@ /* * Copyright (c) 2011 Mellanox Technologies. All rights reserved. * - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +16,8 @@ #include "opal/util/show_help.h" #include "opal/util/sys_limits.h" +#include "opal/util/proc.h" + #include "connect/btl_openib_connect_sl.h" #include @@ -113,7 +116,7 @@ static int init_ud_qp(struct ibv_context *context_arg, if (NULL == cache->cq) { BTL_ERROR(("error creating cq, errno says %s", strerror(errno))); opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, __FILE__, __LINE__, "ibv_create_cq", strerror(errno), errno, ibv_get_device_name(context_arg->device)); diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 11161998da..8d78b40114 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -7,6 +7,7 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -59,6 +60,7 @@ #include #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "opal/util/output.h" #include "opal/util/error.h" #include "opal/util/alfg.h" @@ -1246,7 +1248,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ if (NULL == lcl_ep->qps[qp].qp->lcl_qp) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", - "ibv_create_qp failed", true, opal_proc_local_get()->proc_hostname, + "ibv_create_qp failed", true, opal_process_info.nodename, ibv_get_device_name(m->btl->device->ib_dev), "Reliable connected (RC)"); @@ -1256,7 +1258,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ if (init_attr.cap.max_inline_data < req_inline) { lcl_ep->qps[qp].ib_inline_max = init_attr.cap.max_inline_data; opal_show_help("help-mpi-btl-openib-cpc-base.txt", - "inline truncated", true, opal_proc_local_get()->proc_hostname, + "inline truncated", true, opal_process_info.nodename, ibv_get_device_name(m->btl->device->ib_dev), m->btl->port_num, req_inline, init_attr.cap.max_inline_data); diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index bda89a4de1..c7cb46c965 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -16,6 +16,7 @@ * All rights reserved. * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -513,28 +514,28 @@ set_uniq_paths_for_init_rndv(mca_btl_sm_component_t *comp_ptr) if (asprintf(&comp_ptr->sm_mpool_ctl_file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_mpool_rndv_file_name, "%s"OPAL_PATH_SEP"shared_mem_pool_rndv.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_ctl_file_name, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_rndv_file_name, "%s"OPAL_PATH_SEP"shared_mem_btl_rndv.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } @@ -806,10 +807,10 @@ mca_btl_sm_component_init(int *num_btls, sbuf.st_mode = 0; } opal_show_help("help-mpi-btl-sm.txt", "knem permission denied", - true, opal_proc_local_get()->proc_hostname, sbuf.st_mode); + true, opal_process_info.nodename, sbuf.st_mode); } else { opal_show_help("help-mpi-btl-sm.txt", "knem fail open", - true, opal_proc_local_get()->proc_hostname, errno, + true, opal_process_info.nodename, errno, strerror(errno)); } goto no_knem; @@ -821,13 +822,13 @@ mca_btl_sm_component_init(int *num_btls, &mca_btl_sm_component.knem_info); if (rc < 0) { opal_show_help("help-mpi-btl-sm.txt", "knem get ABI fail", - true, opal_proc_local_get()->proc_hostname, errno, + true, opal_process_info.nodename, errno, strerror(errno)); goto no_knem; } if (KNEM_ABI_VERSION != mca_btl_sm_component.knem_info.abi) { opal_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch", - true, opal_proc_local_get()->proc_hostname, KNEM_ABI_VERSION, + true, opal_process_info.nodename, KNEM_ABI_VERSION, mca_btl_sm_component.knem_info.abi); goto no_knem; } @@ -849,7 +850,7 @@ mca_btl_sm_component_init(int *num_btls, KNEM_STATUS_ARRAY_FILE_OFFSET); if (MAP_FAILED == mca_btl_sm.knem_status_array) { opal_show_help("help-mpi-btl-sm.txt", "knem mmap fail", - true, opal_proc_local_get()->proc_hostname, errno, + true, opal_process_info.nodename, errno, strerror(errno)); goto no_knem; } diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index ad9d84b87d..3a6281a674 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -15,6 +15,7 @@ * Copyright (c) 2010-2014 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -442,28 +443,28 @@ set_uniq_paths_for_init_rndv(mca_btl_smcuda_component_t *comp_ptr) if (asprintf(&comp_ptr->sm_mpool_ctl_file_name, "%s"OPAL_PATH_SEP"shared_mem_cuda_pool.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_mpool_rndv_file_name, "%s"OPAL_PATH_SEP"shared_mem_cuda_pool_rndv.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_ctl_file_name, "%s"OPAL_PATH_SEP"shared_mem_cuda_btl_module.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } if (asprintf(&comp_ptr->sm_rndv_file_name, "%s"OPAL_PATH_SEP"shared_mem_cuda_btl_rndv.%s", opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname) < 0) { + opal_process_info.nodename) < 0) { /* rc set */ goto out; } diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index b5dd5fcc7e..3cff5d0e19 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -58,6 +58,7 @@ #include "opal/util/output.h" #include "opal/util/argv.h" #include "opal/util/net.h" +#include "opal/util/proc.h" #include "opal/util/show_help.h" #include "opal/constants.h" #include "opal/mca/btl/btl.h" @@ -193,14 +194,14 @@ static int mca_btl_tcp_component_verify(void) { if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) { opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", - true, "v4", opal_proc_local_get()->proc_hostname, + true, "v4", opal_process_info.nodename, mca_btl_tcp_component.tcp_port_min ); mca_btl_tcp_component.tcp_port_min = 1024; } #if OPAL_ENABLE_IPV6 if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) { opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port", - true, "v6", opal_proc_local_get()->proc_hostname, + true, "v6", opal_process_info.nodename, mca_btl_tcp_component.tcp6_port_min ); mca_btl_tcp_component.tcp6_port_min = 1024; } @@ -479,7 +480,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) str = strchr(argv[i], '/'); if (NULL == str) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", - true, name, opal_proc_local_get()->proc_hostname, + true, name, opal_process_info.nodename, tmp, "Invalid specification (missing \"/\")"); free(argv[i]); free(tmp); @@ -496,7 +497,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) if (1 != ret) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", - true, name, opal_proc_local_get()->proc_hostname, tmp, + true, name, opal_process_info.nodename, tmp, "Invalid specification (inet_pton() failed)"); free(tmp); continue; @@ -524,7 +525,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) if (if_index < 0) { if (reqd || mca_btl_tcp_component.report_all_unfound_interfaces) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", - true, name, opal_proc_local_get()->proc_hostname, tmp, + true, name, opal_process_info.nodename, tmp, "Did not find interface matching this subnet"); } free(tmp); diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index dc85cd98df..48d3e38434 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -53,6 +53,7 @@ #include "opal/mca/event/event.h" #include "opal/util/net.h" #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "opal/mca/btl/base/btl_base_error.h" #include "btl_tcp.h" @@ -505,7 +506,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en return OPAL_ERROR; } opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail", - true, opal_proc_local_get()->proc_hostname, + true, opal_process_info.nodename, getpid(), "did not receive entire connect ACK from peer"); return OPAL_ERR_UNREACH; diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 1af130980f..4ae02169b0 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -35,6 +35,7 @@ #include "opal/util/argv.h" #include "opal/util/if.h" #include "opal/util/net.h" +#include "opal/util/proc.h" #include "btl_tcp.h" #include "btl_tcp_proc.h" @@ -379,7 +380,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, int rc, *a = NULL; size_t i, j; - if (NULL == (proc_hostname = btl_proc->proc_opal->proc_hostname)) { + if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) { return OPAL_ERR_UNREACH; } diff --git a/opal/mca/btl/usnic/btl_usnic_connectivity.h b/opal/mca/btl/usnic/btl_usnic_connectivity.h index db54ae5ff5..59d8b7682a 100644 --- a/opal/mca/btl/usnic/btl_usnic_connectivity.h +++ b/opal/mca/btl/usnic/btl_usnic_connectivity.h @@ -1,5 +1,6 @@ /* - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,6 +14,7 @@ #include "opal_config.h" #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "btl_usnic_util.h" #include "btl_usnic_proc.h" @@ -284,7 +286,7 @@ opal_btl_usnic_check_connectivity(opal_btl_usnic_module_t *module, endpoint->endpoint_remote_addr.cidrmask, endpoint->endpoint_remote_addr.connectivity_udp_port, endpoint->endpoint_remote_addr.mac, - endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), endpoint->endpoint_remote_addr.mtu); endpoint->endpoint_connectivity_checked = true; } diff --git a/opal/mca/btl/usnic/btl_usnic_map.c b/opal/mca/btl/usnic/btl_usnic_map.c index f3668564bf..c86a935be0 100644 --- a/opal/mca/btl/usnic/btl_usnic_map.c +++ b/opal/mca/btl/usnic/btl_usnic_map.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,6 +15,7 @@ #include #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "btl_usnic.h" #include "btl_usnic_util.h" @@ -205,11 +207,7 @@ static void map_output_procs(FILE *fp) /* Loop over and print the sorted module device information */ for (i = 0; i < num_procs; ++i) { fprintf(fp, "peer=%d,", opal_process_name_vpid(procs[i]->proc_opal->proc_name)); - if (procs[i]->proc_opal->proc_hostname) { - fprintf(fp, "hostname=%s,", - procs[i]->proc_opal->proc_hostname); - } - + fprintf(fp, "hostname=%s,", opal_get_proc_hostname(procs[i]->proc_opal)); map_output_endpoints(fp, procs[i]); } @@ -235,7 +233,7 @@ void opal_btl_usnic_connectivity_map(void) rank>.txt */ asprintf(&filename, "%s-%s.pid%d.job%d.mcwrank%d.txt", mca_btl_usnic_component.connectivity_map_prefix, - opal_process_info.nodename, + opal_get_proc_hostname(opal_proc_local_get()), getpid(), opal_process_name_jobid(opal_proc_local_get()->proc_name), opal_process_name_vpid(opal_proc_local_get()->proc_name)); diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index d4be977b55..f37893dfbb 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -15,6 +15,7 @@ * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,6 +37,7 @@ #include "opal/datatype/opal_convertor.h" #include "opal/include/opal_stdint.h" #include "opal/util/show_help.h" +#include "opal/util/proc.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/btl_base_error.h" @@ -195,7 +197,7 @@ static void add_procs_warn_ah_fail(opal_btl_usnic_module_t *module, local, module->if_name, ibv_get_device_name(module->device), - endpoint->endpoint_proc->proc_opal->proc_hostname, + opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), remote); } diff --git a/opal/mca/btl/usnic/btl_usnic_proc.c b/opal/mca/btl/usnic/btl_usnic_proc.c index e252ed0822..19164e3d87 100644 --- a/opal/mca/btl/usnic/btl_usnic_proc.c +++ b/opal/mca/btl/usnic/btl_usnic_proc.c @@ -29,6 +29,7 @@ #include "opal/util/show_help.h" #include "opal/constants.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/proc.h" #include "btl_usnic.h" #include "btl_usnic_proc.h" @@ -248,7 +249,7 @@ static int create_proc(opal_proc_t *opal_proc, "transport mismatch", true, opal_process_info.nodename, - proc->proc_opal->proc_hostname); + opal_get_proc_hostname(proc->proc_opal)); OBJ_RELEASE(proc); return OPAL_ERR_BAD_PARAM; } @@ -628,8 +629,7 @@ static int match_modex(opal_btl_usnic_module_t *module, ibv_get_device_name(module->device), module->if_name, module->if_mtu, - (NULL == proc->proc_opal->proc_hostname) ? - "unknown" : proc->proc_opal->proc_hostname, + opal_get_proc_hostname(proc->proc_opal), proc->proc_modex[*index_out].mtu); *index_out = -1; return OPAL_ERR_UNREACH; diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 7ad6e4adde..4be54a7bc2 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -255,6 +255,7 @@ opal_init_util(int* pargc, char*** pargv) { int ret; char *error = NULL; + char hostname[512]; if( ++opal_util_initialized != 1 ) { if( opal_util_initialized < 1 ) { @@ -263,6 +264,13 @@ opal_init_util(int* pargc, char*** pargv) return OPAL_SUCCESS; } + /* set the nodename right away so anyone who needs it has it. Note + * that we don't bother with fqdn and prefix issues here - we let + * the RTE later replace this with a modified name if the user + * requests it */ + gethostname(hostname, 512); + opal_process_info.nodename = strdup(hostname); + /* initialize the memory allocator */ opal_malloc_init(); diff --git a/opal/util/proc.c b/opal/util/proc.c index 198f29dac3..045dd567a9 100644 --- a/opal/util/proc.c +++ b/opal/util/proc.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2013 Inria. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,9 +18,9 @@ #include "opal/mca/pmix/pmix.h" opal_process_info_t opal_process_info = { - .nodename = "not yet named", - .job_session_dir = "not yet defined", - .proc_session_dir = "not yet defined", + .nodename = NULL, + .job_session_dir = NULL, + .proc_session_dir = NULL, .num_local_peers = 0, /* there is nobody else but me */ .my_local_rank = 0, /* I'm the only process around here */ #if OPAL_HAVE_HWLOC @@ -34,7 +35,7 @@ static opal_proc_t opal_local_proc = { 0, 0, NULL, - "localhost - unnamed" + NULL }; static opal_proc_t* opal_proc_my_name = &opal_local_proc; @@ -120,3 +121,34 @@ char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_ uint32_t (*opal_process_name_vpid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called; uint32_t (*opal_process_name_jobid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called; +char* opal_get_proc_hostname(const opal_proc_t *proc) +{ + int ret; + + /* if the proc is NULL, then we can't know */ + if (NULL == proc) { + return "unknown"; + } + + /* if it is my own hostname we are after, then just hand back + * the value in opal_process_info */ + if (proc == opal_proc_my_name) { + return opal_process_info.nodename; + } + + /* see if we already have the data - if so, pass it back */ + if (NULL != proc->proc_hostname) { + return proc->proc_hostname; + } + + /* if we don't already have it, then try to get it */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, proc, + (char**)&(proc->proc_hostname), OPAL_STRING); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + return "unknown"; // return something so the caller doesn't segfault + } + + /* user is not allowed to release the data */ + return proc->proc_hostname; +} diff --git a/opal/util/proc.h b/opal/util/proc.h index 74397c4f32..8a52a08101 100644 --- a/opal/util/proc.h +++ b/opal/util/proc.h @@ -108,4 +108,10 @@ OPAL_DECLSPEC extern uint32_t (*opal_process_name_jobid)(const opal_process_name #define OPAL_PROC_MY_HOSTNAME (opal_proc_local_get()->proc_hostname) #define OPAL_NAME_INVALID 0xffffffffffffffff +/* provide a safe way to retrieve the hostname of a proc, including + * our own. This is to be used by all BTLs so we don't retrieve hostnames + * unless needed. The returned value MUST NOT be free'd as it is + * owned by the proc_t */ +OPAL_DECLSPEC char* opal_get_proc_hostname(const opal_proc_t *proc); + #endif /* OPAL_PROC_H */ diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 28b8104dcd..0300da4448 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -170,6 +170,15 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + /* we may have modified the local nodename according to + * request to retain/strip the FQDN and prefix, so update + * it here. The OPAL layer will strdup the hostname, so + * we have to free it first to avoid a memory leak */ + if (NULL != opal_process_info.nodename) { + free(opal_process_info.nodename); + } + opal_process_info.nodename = orte_process_info.nodename; + /* setup the dstore framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -232,6 +241,18 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + /* set the remaining opal_process_info fields. Note that + * the OPAL layer will have initialized these to NULL, and + * anyone between us would not have strdup'd the string, so + * we cannot free it here */ + opal_process_info.job_session_dir = orte_process_info.job_session_dir; + opal_process_info.proc_session_dir = orte_process_info.proc_session_dir; + opal_process_info.num_local_peers = (int32_t)orte_process_info.num_local_peers; + opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank; +#if OPAL_HAVE_HWLOC + opal_process_info.cpuset = orte_process_info.cpuset; +#endif /* OPAL_HAVE_HWLOC */ + #if OPAL_ENABLE_TIMING opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); #endif