From 45e695928f0a7f445ef2ed6d78ded5303a21ed05 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 20 Aug 2013 18:59:36 +0000 Subject: [PATCH] As per the email discussion, revise the sparse handling of hostnames so that we avoid potential infinite loops while allowing large-scale users to improve their startup time: * add a new MCA param orte_hostname_cutoff to specify the number of nodes at which we stop including hostnames. This defaults to INT_MAX => always include hostnames. If a value is given, then we will include hostnames for any allocation smaller than the given limit. * remove ompi_proc_get_hostname. Replace all occurrences with a direct link to ompi_proc_t's proc_hostname, protected by appropriate "if NULL" * modify the OMPI-ORTE integration component so that any call to modex_recv automatically loads the ompi_proc_t->proc_hostname field as well as returning the requested info. Thus, any process whose modex info you retrieve will automatically receive the hostname. Note that on-demand retrieval is still enabled - i.e., if we are running under direct launch with PMI, the hostname will be fetched upon first call to modex_recv, and then the ompi_proc_t->proc_hostname field will be loaded * removed a stale MCA param "mpi_keep_peer_hostnames" that was no longer used anywhere in the code base * added an envar lookup in ess/pmi for the number of nodes in the allocation. Sadly, PMI itself doesn't provide that info, so we have to get it a different way. Currently, we support PBS-based systems and SLURM - for any other, rank0 will emit a warning and we assume max number of daemons so we will always retain hostnames This commit was SVN r29052. --- ompi/mca/bml/r2/bml_r2.c | 9 +- ompi/mca/btl/base/btl_base_error.h | 8 +- ompi/mca/btl/openib/btl_openib.c | 28 +- ompi/mca/btl/openib/btl_openib_component.c | 17 +- ompi/mca/btl/openib/btl_openib_endpoint.c | 13 +- .../openib/connect/btl_openib_connect_base.c | 4 +- .../connect/btl_openib_connect_rdmacm.c | 36 ++- ompi/mca/btl/tcp/btl_tcp_proc.c | 5 +- ompi/mca/btl/udapl/btl_udapl_proc.c | 4 +- ompi/mca/btl/usnic/btl_usnic_proc.c | 24 +- ompi/mca/common/ofacm/common_ofacm_base.c | 4 +- ompi/mca/mtl/mxm/mtl_mxm.c | 5 +- ompi/mca/mtl/psm/mtl_psm.c | 4 +- ompi/mca/pml/base/pml_base_select.c | 18 +- ompi/mca/pml/bfo/pml_bfo_failover.c | 5 +- ompi/mca/rte/orte/rte_orte.h | 2 + ompi/mca/rte/orte/rte_orte_module.c | 41 ++- ompi/proc/proc.c | 60 ++-- ompi/proc/proc.h | 10 +- ompi/runtime/ompi_mpi_params.c | 12 +- ompi/runtime/params.h | 7 +- orte/mca/ess/pmi/ess_pmi_module.c | 16 ++ orte/runtime/help-orte-runtime.txt | 5 + orte/runtime/orte_globals.c | 2 + orte/runtime/orte_globals.h | 2 + orte/runtime/orte_mca_params.c | 10 + orte/util/nidmap.c | 268 ++++++++++-------- 27 files changed, 365 insertions(+), 254 deletions(-) diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index 66def6abc1..3bbce04827 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -13,6 +13,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -408,11 +409,11 @@ static int mca_bml_r2_add_procs( size_t nprocs, "unreachable proc", true, OMPI_NAME_PRINT(&(ompi_proc_local_proc->proc_name)), - (ompi_proc_get_hostname(ompi_proc_local_proc) ? - ompi_proc_get_hostname(ompi_proc_local_proc) : "unknown!"), + (NULL != ompi_proc_local_proc->proc_hostname ? + ompi_proc_local_proc->proc_hostname : "unknown!"), OMPI_NAME_PRINT(&(unreach_proc->proc_name)), - (ompi_proc_get_hostname(unreach_proc) ? - ompi_proc_get_hostname(unreach_proc) : "unknown!"), + (NULL != ompi_proc_local_proc->proc_hostname ? + ompi_proc_local_proc->proc_hostname : "unknown!"), btl_names); } diff --git a/ompi/mca/btl/base/btl_base_error.h b/ompi/mca/btl/base/btl_base_error.h index 406d2d256e..ad5c583cbd 100644 --- a/ompi/mca/btl/base/btl_base_error.h +++ b/ompi/mca/btl/base/btl_base_error.h @@ -13,7 +13,8 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * $COPYRIGHT$ + * Copyright (c) 2013 Intel, Inc. All rights reserved + * $COPYRIGHT$ * * Additional copyrights may follow * @@ -62,8 +63,9 @@ do { \ OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ __FILE__, __LINE__, __func__, \ ompi_process_info.nodename); \ - if(proc && ompi_proc_get_hostname(proc)) { \ - mca_btl_base_err("to: %s ", ompi_proc_get_hostname(proc)); \ + if(proc) { \ + mca_btl_base_err("to: %s ", (NULL == proc->proc_hostname) ? \ + "unknown" : proc->proc_hostname); \ } \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index 3aeda3a981..3a0790019e 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -17,6 +17,7 @@ * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -483,16 +484,17 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, if(mca_btl_openib_get_transport_type(openib_btl) != endpoint->rem_info.rem_transport_type) { opal_show_help("help-mpi-btl-openib.txt", - "conflicting transport types", true, - ompi_process_info.nodename, - ibv_get_device_name(openib_btl->device->ib_dev), - (openib_btl->device->ib_dev_attr).vendor_id, - (openib_btl->device->ib_dev_attr).vendor_part_id, - mca_btl_openib_transport_name_strings[mca_btl_openib_get_transport_type(openib_btl)], - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), - endpoint->rem_info.rem_vendor_id, - endpoint->rem_info.rem_vendor_part_id, - mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]); + "conflicting transport types", true, + ompi_process_info.nodename, + ibv_get_device_name(openib_btl->device->ib_dev), + (openib_btl->device->ib_dev_attr).vendor_id, + (openib_btl->device->ib_dev_attr).vendor_part_id, + mca_btl_openib_transport_name_strings[mca_btl_openib_get_transport_type(openib_btl)], + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname, + endpoint->rem_info.rem_vendor_id, + endpoint->rem_info.rem_vendor_part_id, + mca_btl_openib_transport_name_strings[endpoint->rem_info.rem_transport_type]); return OMPI_ERROR; } @@ -551,7 +553,8 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, (openib_btl->device->ib_dev_attr).vendor_id, (openib_btl->device->ib_dev_attr).vendor_part_id, mca_btl_openib_component.receive_queues, - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown", endpoint->endpoint_proc->proc_ompi->proc_hostname, endpoint->rem_info.rem_vendor_id, endpoint->rem_info.rem_vendor_part_id, recv_qps); @@ -573,7 +576,8 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, (openib_btl->device->ib_dev_attr).vendor_id, (openib_btl->device->ib_dev_attr).vendor_part_id, mca_btl_openib_component.receive_queues, - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown", endpoint->endpoint_proc->proc_ompi->proc_hostname, endpoint->rem_info.rem_vendor_id, endpoint->rem_info.rem_vendor_part_id, values.receive_queues); diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index a20d503d96..7c88be1e89 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -18,6 +18,7 @@ * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -535,7 +536,8 @@ static void btl_openib_control(mca_btl_base_module_t* btl, break; case MCA_BTL_OPENIB_CONTROL_CTS: OPAL_OUTPUT((-1, "received CTS from %s (buffer %p): posted recvs %d, sent cts %d", - ompi_proc_get_hostname(ep->endpoint_proc->proc_ompi), + (NULL == ep->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : ep->endpoint_proc->proc_ompi->proc_hostname, (void*) ctl_hdr, ep->endpoint_posted_recvs, ep->endpoint_cts_sent)); ep->endpoint_cts_received = true; @@ -3530,9 +3532,9 @@ error: if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status || IBV_WC_RETRY_EXC_ERR == wc->status) { - char *peer_hostname = - (NULL != ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi)) ? - (char*)ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi) : + const char *peer_hostname = + (NULL != endpoint->endpoint_proc->proc_ompi) ? + endpoint->endpoint_proc->proc_ompi) : ""; const char *device_name = ibv_get_device_name(endpoint->qps[qp].qp->lcl_qp->context->device); @@ -3543,12 +3545,15 @@ error: "pp rnr retry exceeded" : "srq rnr retry exceeded", true, ompi_process_info.nodename, device_name, - peer_hostname); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname); } else if (IBV_WC_RETRY_EXC_ERR == wc->status) { opal_show_help("help-mpi-btl-openib.txt", "pp retry exceeded", true, ompi_process_info.nodename, - device_name, peer_hostname); + device_name, + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname); } } diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index 81016cce24..fa8f5cf377 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -17,6 +17,7 @@ * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2010-2011 IBM Corporation. All rights reserved. * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -507,7 +508,8 @@ static void cts_sent(mca_btl_base_module_t* btl, /* Nothing to do/empty function (we can't pass in a NULL pointer for the des_cbfunc) */ OPAL_OUTPUT((-1, "CTS send to %s completed", - ompi_proc_get_hostname(ep->endpoint_proc->proc_ompi))); + (NULL == ep->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : ep->endpoint_proc->proc_ompi->proc_hostname)); } /* @@ -522,7 +524,8 @@ void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint) mca_btl_openib_control_header_t *ctl_hdr; OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, mca_btl_openib_component.credits_qp, endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num)); sc_frag = alloc_control_frag(endpoint->endpoint_btl); @@ -592,7 +595,8 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint) transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type); #endif OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, transport_type_ib_p, endpoint->endpoint_initiator, endpoint->endpoint_cts_received)); @@ -605,7 +609,8 @@ void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint) mark us as connected */ if (endpoint->endpoint_cts_received) { OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); mca_btl_openib_endpoint_connected(endpoint); } } diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_base.c b/ompi/mca/btl/openib/connect/btl_openib_connect_base.c index a132d7a4b0..170812fd3a 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_base.c @@ -3,6 +3,7 @@ * Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -457,7 +458,8 @@ int ompi_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) mca_btl_openib_component.credits_qp; endpoint->endpoint_cts_frag.super.endpoint = endpoint; OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, (void*) endpoint->endpoint_cts_frag.super.sg_entry.addr, endpoint->endpoint_cts_frag.super.sg_entry.length, endpoint->endpoint_cts_frag.super.sg_entry.lkey)); diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index 0b5ba6cb6e..4f42b24e3f 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -6,6 +6,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -716,7 +717,8 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp (void*) endpoint, (void*) endpoint->endpoint_local_cpc, endpoint->endpoint_initiator ? "am" : "am NOT", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); /* If we're the initiator, then open all the QPs */ if (contents->endpoint->endpoint_initiator) { @@ -845,7 +847,8 @@ static int handle_connect_request(struct rdma_cm_event *event) (void*) endpoint, (void*) endpoint->endpoint_local_cpc, endpoint->endpoint_initiator ? "am" : "am NOT", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); if (endpoint->endpoint_initiator) { reject_reason_t reason = REJECT_WRONG_DIRECTION; @@ -906,7 +909,8 @@ static int handle_connect_request(struct rdma_cm_event *event) } OPAL_OUTPUT((-1, "Posted CTS receiver buffer (%p) for peer %s, qp index %d (QP num %d), WR ID %p, SG addr %p, len %d, lkey %d", (void*) wr->sg_list[0].addr, - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, qpnum, endpoint->qps[qpnum].qp->lcl_qp->qp_num, (void*) wr->wr_id, @@ -1097,7 +1101,8 @@ static void *local_endpoint_cpc_complete(void *context) mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t *)context; OPAL_OUTPUT((-1, "MAIN local_endpoint_cpc_complete to %s", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); mca_btl_openib_endpoint_cpc_complete(endpoint); return NULL; @@ -1117,7 +1122,8 @@ static int rdmacm_connect_endpoint(id_context_t *context, if (contents->server) { endpoint = context->endpoint; OPAL_OUTPUT((-1, "SERVICE Server CPC complete to %s", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); } else { endpoint = contents->endpoint; endpoint->rem_info.rem_index = @@ -1132,7 +1138,8 @@ static int rdmacm_connect_endpoint(id_context_t *context, contents->on_client_list = true; } OPAL_OUTPUT((-1, "SERVICE Client CPC complete to %s", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi))); + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname)); } if (NULL == endpoint) { BTL_ERROR(("Can't find endpoint")); @@ -1144,8 +1151,12 @@ static int rdmacm_connect_endpoint(id_context_t *context, /* Only notify the upper layers after the last QP has been connected */ if (++data->rdmacm_counter < mca_btl_openib_component.num_qps) { - BTL_VERBOSE(("%s to peer %s, count == %d", contents->server?"server":"client", ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), data->rdmacm_counter)); - OPAL_OUTPUT((-1, "%s to peer %s, count == %d", contents->server?"server":"client", ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), data->rdmacm_counter)); + BTL_VERBOSE(("%s to peer %s, count == %d", contents->server?"server":"client", + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, data->rdmacm_counter)); + OPAL_OUTPUT((-1, "%s to peer %s, count == %d", contents->server?"server":"client", + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, data->rdmacm_counter)); return OMPI_SUCCESS; } @@ -1376,7 +1387,8 @@ static int finish_connect(id_context_t *context) OPAL_OUTPUT((-1, "Posted initiator CTS buffer (%p, length %d) for peer %s, qp index %d (QP num %d)", (void*) wr->sg_list[0].addr, wr->sg_list[0].length, - ompi_proc_get_hostname(contents->endpoint->endpoint_proc->proc_ompi), + (NULL == contents->endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : contents->endpoint->endpoint_proc->proc_ompi->proc_hostname, context->qpnum, contents->endpoint->qps[context->qpnum].qp->lcl_qp->qp_num)); } @@ -1443,7 +1455,8 @@ static int finish_connect(id_context_t *context) (void*) contents->endpoint, (void*) contents->endpoint->endpoint_local_cpc, contents->endpoint->endpoint_initiator ? "am" : "am NOT", - ompi_proc_get_hostname(contents->endpoint->endpoint_proc->proc_ompi))); + (NULL == contents->endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : contents->endpoint->endpoint_proc->proc_ompi->proc_hostname)); rc = rdma_connect(context->id, &conn_param); if (0 != rc) { BTL_ERROR(("rdma_connect Failed with %d", rc)); @@ -1485,7 +1498,8 @@ static void *show_help_rdmacm_event_error(void *c) ompi_process_info.nodename, device, rdma_event_str(event->event), - ompi_proc_get_hostname(context->endpoint->endpoint_proc->proc_ompi)); + (NULL == context->endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : context->endpoint->endpoint_proc->proc_ompi->proc_hostname)); } return NULL; diff --git a/ompi/mca/btl/tcp/btl_tcp_proc.c b/ompi/mca/btl/tcp/btl_tcp_proc.c index bf679c15cc..6be44d8832 100644 --- a/ompi/mca/btl/tcp/btl_tcp_proc.c +++ b/ompi/mca/btl/tcp/btl_tcp_proc.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -377,7 +378,9 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, int rc, *a = NULL; size_t i, j; - proc_hostname = ompi_proc_get_hostname(btl_proc->proc_ompi); + if (NULL == (proc_hostname = btl_proc->proc_ompi->proc_hostname)) { + return OMPI_ERR_UNREACH; + } #ifndef WORDS_BIGENDIAN /* if we are little endian and our peer is not so lucky, then we diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.c b/ompi/mca/btl/udapl/btl_udapl_proc.c index d78368c72c..95cb8101d9 100644 --- a/ompi/mca/btl/udapl/btl_udapl_proc.c +++ b/ompi/mca/btl/udapl/btl_udapl_proc.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -259,7 +260,8 @@ static int mca_btl_udapl_proc_address_match( BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", "no network match", true, btl_addr_string, ompi_process_info.nodename, - ompi_proc_get_hostname(peer_proc->proc_ompi))); + (NULL == peer_proc->proc_ompi->proc_hostname) ? + "unknown" : peer_proc->proc_ompi->proc_hostname)); return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/btl/usnic/btl_usnic_proc.c b/ompi/mca/btl/usnic/btl_usnic_proc.c index e4dff8c309..c3d65eeca9 100644 --- a/ompi/mca/btl/usnic/btl_usnic_proc.c +++ b/ompi/mca/btl/usnic/btl_usnic_proc.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -295,22 +296,15 @@ static int match_modex(ompi_btl_usnic_module_t *module, /* If MTU does not match, throw an error */ if (proc->proc_modex[i].mtu != module->if_mtu) { - const char *peer_hostname; - - if (NULL != ompi_proc_get_hostname(proc->proc_ompi)) { - peer_hostname = ompi_proc_get_hostname(proc->proc_ompi); - } else { - peer_hostname = - ""; - } opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch", - true, - ompi_process_info.nodename, - ibv_get_device_name(module->device), - module->port_num, - module->if_mtu, - peer_hostname, - proc->proc_modex[i].mtu); + true, + ompi_process_info.nodename, + ibv_get_device_name(module->device), + module->port_num, + module->if_mtu, + (NULL == proc->proc_ompi->proc_hostname) ? + "unknown" : proc->proc_ompi->proc_hostname, + proc->proc_modex[i].mtu); return -1; } diff --git a/ompi/mca/common/ofacm/common_ofacm_base.c b/ompi/mca/common/ofacm/common_ofacm_base.c index 3898fd2db9..9d7c9e427a 100644 --- a/ompi/mca/common/ofacm/common_ofacm_base.c +++ b/ompi/mca/common/ofacm/common_ofacm_base.c @@ -3,6 +3,7 @@ * Copyright (c) 2007-2012 Mellanox Technologies. All rights reserved. * * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -621,7 +622,8 @@ int ompi_common_ofacm_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) mca_btl_openib_component.credits_qp; endpoint->endpoint_cts_frag.super.endpoint = endpoint; OPAL_OUTPUT((-1, "Got a CTS frag for peer %s, addr %p, length %d, lkey %d", - ompi_proc_get_hostname(endpoint->endpoint_proc->proc_ompi), + (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ? + "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname, (void*) endpoint->endpoint_cts_frag.super.sg_entry.addr, endpoint->endpoint_cts_frag.super.sg_entry.length, endpoint->endpoint_cts_frag.super.sg_entry.lkey)); diff --git a/ompi/mca/mtl/mxm/mtl_mxm.c b/ompi/mca/mtl/mxm/mtl_mxm.c index d97f521eab..3853035e7c 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm.c +++ b/ompi/mca/mtl/mxm/mtl_mxm.c @@ -1,5 +1,6 @@ /* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -508,7 +509,9 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < nprocs; ++i) { if (MXM_OK != conn_reqs[i].error) { - MXM_ERROR("MXM EP connect to %s error: %s\n", ompi_proc_get_hostname(procs[i]), + MXM_ERROR("MXM EP connect to %s error: %s\n", + (NULL == procs[i]->proc_hostname) ? + "unknown" : procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index 3812910d58..5ba38f37ba 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -313,7 +314,8 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, errstr ? errstr : "unknown connect error"); for (j = 0; j < (int) nprocs; j++) { if (errs_out[j] == thiserr) { - opal_output(0, " %s", ompi_proc_get_hostname(procs[j])); + opal_output(0, " %s", (NULL == procs[j]->proc_hostname) ? + "unknown" : procs[j]->proc_hostname); } } opal_output(0, "\n"); diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 226554be2e..469999626c 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -368,18 +369,11 @@ mca_pml_base_pml_check_selected(const char *my_pml, /* if that module doesn't match my own, return an error */ if ((size != strlen(my_pml) + 1) || (0 != strcmp(my_pml, remote_pml))) { - if (ompi_proc_get_hostname(procs[0])) { - opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s", - OMPI_NAME_PRINT(&ompi_proc_local()->proc_name), - my_pml, OMPI_NAME_PRINT(&procs[0]->proc_name), - ompi_proc_get_hostname(procs[0]), - remote_pml); - } else { - opal_output(0, "%s selected pml %s, but peer %s selected pml %s", - OMPI_NAME_PRINT(&ompi_proc_local()->proc_name), - my_pml, OMPI_NAME_PRINT(&procs[0]->proc_name), - remote_pml); - } + opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s", + OMPI_NAME_PRINT(&ompi_proc_local()->proc_name), + my_pml, OMPI_NAME_PRINT(&procs[0]->proc_name), + (NULL == procs[0]->proc_hostname) ? "unknown" : procs[0]->proc_hostname, + remote_pml); free(remote_pml); /* cleanup before returning */ return OMPI_ERR_UNREACH; } diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.c b/ompi/mca/pml/bfo/pml_bfo_failover.c index fed93a925f..cb3f20440d 100644 --- a/ompi/mca/pml/bfo/pml_bfo_failover.c +++ b/ompi/mca/pml/bfo/pml_bfo_failover.c @@ -2,6 +2,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -408,7 +409,7 @@ void mca_pml_bfo_recv_frag_callback_rndvrestartnotify(mca_btl_base_module_t* btl recvreq->remote_req_send.pval, (void *)recvreq, recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, hdr->hdr_restart.hdr_jobid, hdr->hdr_restart.hdr_vpid, - ompi_proc_get_hostname(ompi_proc)); + (NULL == ompi_proc->proc_hostname) ? "unknown" : ompi_proc->proc_hostname); mca_pml_bfo_recv_request_rndvrestartnack(des, ompi_proc, false); return; } @@ -1415,7 +1416,7 @@ void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl, btl->btl_component->btl_version.mca_component_name, OMPI_PROC_MY_NAME->vpid, btlname, errproc->proc_name.vpid, - ompi_proc_get_hostname(errproc)); + (NULL == errproc->proc_hostname) ? "unknown" : errproc->proc_hostname); /* Need to search for any pending packets associated * with this endpoint and remove them. We may also diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index 6c3dca66d4..3d3e43c44b 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -65,6 +66,7 @@ typedef orte_node_rank_t ompi_node_rank_t; typedef orte_local_rank_t ompi_local_rank_t; #define ompi_process_info orte_process_info #define ompi_rte_proc_is_bound orte_proc_is_bound +#define ompi_rte_hostname_cutoff orte_hostname_cutoff /* Error handling objects and operations */ OMPI_DECLSPEC void ompi_rte_abort(int error_code, char *fmt, ...); diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index 9917e962cd..8c28126d73 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved */ #include "ompi_config.h" #include "ompi/constants.h" @@ -35,6 +36,7 @@ #include "ompi/mca/rte/base/base.h" #include "ompi/mca/rte/rte.h" #include "ompi/debuggers/debuggers.h" +#include "ompi/proc/proc.h" void ompi_rte_abort(int error_code, char *fmt, ...) { @@ -149,21 +151,54 @@ int ompi_rte_db_fetch(const orte_process_name_t *nm, const char *key, void **data, opal_data_type_t type) { - return opal_db.fetch((opal_identifier_t*)nm, key, data, type); + ompi_proc_t *proct; + int rc; + + if (OPAL_SUCCESS != (rc = opal_db.fetch((opal_identifier_t*)nm, key, data, type))) { + return rc; + } + /* update the hostname */ + proct = ompi_proc_find(nm); + if (NULL == proct->proc_hostname) { + opal_db.fetch_pointer((opal_identifier_t*)nm, ORTE_DB_HOSTNAME, (void**)&proct->proc_hostname, OPAL_STRING); + } + return OMPI_SUCCESS; } int ompi_rte_db_fetch_pointer(const orte_process_name_t *nm, const char *key, void **data, opal_data_type_t type) { - return opal_db.fetch_pointer((opal_identifier_t*)nm, key, data, type); + ompi_proc_t *proct; + int rc; + + if (OPAL_SUCCESS != (rc = opal_db.fetch_pointer((opal_identifier_t*)nm, key, data, type))) { + return rc; + } + /* update the hostname */ + proct = ompi_proc_find(nm); + if (NULL == proct->proc_hostname) { + opal_db.fetch_pointer((opal_identifier_t*)nm, ORTE_DB_HOSTNAME, (void**)&proct->proc_hostname, OPAL_STRING); + } + return OMPI_SUCCESS; } int ompi_rte_db_fetch_multiple(const orte_process_name_t *nm, const char *key, opal_list_t *kvs) { - return opal_db.fetch_multiple((opal_identifier_t*)nm, key, kvs); + ompi_proc_t *proct; + int rc; + + if (OPAL_SUCCESS != (rc = opal_db.fetch_multiple((opal_identifier_t*)nm, key, kvs))) { + return rc; + } + /* update the hostname */ + proct = ompi_proc_find(nm); + if (NULL == proct->proc_hostname) { + opal_db.fetch_pointer((opal_identifier_t*)nm, ORTE_DB_HOSTNAME, (void**)&proct->proc_hostname, OPAL_STRING); + } + return OMPI_SUCCESS; } int ompi_rte_db_remove(const orte_process_name_t *nm, diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 18e73ff638..6c2bb9fdf5 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -12,6 +12,7 @@ * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -156,8 +157,20 @@ int ompi_proc_complete_init(void) break; } - /* get the remote architecture */ + if (ompi_process_info.num_daemons < ompi_rte_hostname_cutoff) { + /* retrieve the hostname */ + ret = ompi_modex_recv_string_pointer(OMPI_DB_HOSTNAME, proc, (void**)&(proc->proc_hostname), OPAL_STRING); + if (OMPI_SUCCESS != ret) { + break; + } + } else { + /* just set the hostname to NULL for now - we'll fill it in + * as modex_recv's are called for procs we will talk to + */ + proc->proc_hostname = NULL; + } #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + /* get the remote architecture */ { uint32_t *ui32ptr; ui32ptr = &(proc->proc_arch); @@ -185,21 +198,6 @@ int ompi_proc_complete_init(void) return errcode; } -const char *ompi_proc_get_hostname (ompi_proc_t *proc) -{ - int ret; - - if (NULL == proc->proc_hostname) { - /* get a pointer to the name of the node it is on */ - ret = ompi_modex_recv_string_pointer(OMPI_DB_HOSTNAME, proc, (void**)&(proc->proc_hostname), OPAL_STRING); - if (OMPI_SUCCESS != ret) { - return NULL; - } - } - - return proc->proc_hostname; -} - int ompi_proc_finalize (void) { opal_list_item_t *item; @@ -371,7 +369,6 @@ int ompi_proc_refresh(void) { ompi_vpid_t i = 0; int ret=OMPI_SUCCESS; opal_hwloc_locality_t *hwlocale; - uint32_t *uiptr; OPAL_THREAD_LOCK(&ompi_proc_lock); @@ -397,25 +394,31 @@ int ompi_proc_refresh(void) { if (OMPI_SUCCESS != ret) { break; } - proc->proc_hostname = NULL; + if (ompi_process_info.num_daemons < ompi_rte_hostname_cutoff) { + /* retrieve the hostname */ + ret = ompi_modex_recv_string_pointer(OMPI_DB_HOSTNAME, proc, (void**)&(proc->proc_hostname), OPAL_STRING); + if (OMPI_SUCCESS != ret) { + break; + } + } else { + /* just set the hostname to NULL for now - we'll fill it in + * as modex_recv's are called for procs we will talk to + */ + proc->proc_hostname = NULL; + } +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT /* get the remote architecture */ uiptr = &(proc->proc_arch); ret = ompi_modex_recv_key_value("OMPI_ARCH", proc, (void**)&uiptr, OPAL_UINT32); /* if arch is different than mine, create a new convertor for this proc */ if (proc->proc_arch != opal_local_arch) { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT OBJ_RELEASE(proc->proc_convertor); proc->proc_convertor = opal_convertor_create(proc->proc_arch, 0); -#else - opal_show_help("help-mpi-runtime", - "heterogeneous-support-unavailable", - true, ompi_process_info.nodename, - proc->proc_hostname == NULL ? "" : - proc->proc_hostname); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - return OMPI_ERR_NOT_SUPPORTED; -#endif } +#else + /* must be same arch as my own */ + proc->proc_arch = opal_local_arch; +#endif } } @@ -456,7 +459,6 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, opal_buffer_t* buf) OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - (void) ompi_proc_get_hostname (proclist[i]); rc = opal_dss.pack(buf, &(proclist[i]->proc_hostname), 1, OPAL_STRING); if(rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index 18642678c3..da3d52870a 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -12,6 +12,7 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -304,15 +305,6 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, */ OMPI_DECLSPEC int ompi_proc_refresh(void); -/** - * Retrieve the hostname for a process - * - * @note Retrieving the hostname may require communication. - * - * @param proc process to retrieve hostname from - */ -OMPI_DECLSPEC const char *ompi_proc_get_hostname (ompi_proc_t *proc); - END_C_DECLS #endif /* OMPI_PROC_PROC_H */ diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 67a243801f..7219b84d07 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -14,6 +14,7 @@ * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,6 @@ bool ompi_mpi_show_mca_params = false; char *ompi_mpi_show_mca_params_file = NULL; bool ompi_mpi_abort_print_stack = false; int ompi_mpi_abort_delay = 0; -bool ompi_mpi_keep_peer_hostnames = true; bool ompi_mpi_keep_fqdn_hostnames = false; int ompi_mpi_leave_pinned = -1; bool ompi_mpi_leave_pinned_pipeline = false; @@ -211,16 +211,6 @@ int ompi_mpi_register_params(void) /* User-level process pinning controls */ - /* Do we want to save hostnames for debugging messages? This can - eat quite a bit of memory... */ - ompi_mpi_keep_peer_hostnames = true; - (void) mca_base_var_register("ompi", "mpi", NULL, "keep_peer_hostnames", - "If nonzero, save the string hostnames of all MPI peer processes (mostly for error / debugging output messages). This can add quite a bit of memory usage to each MPI process.", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_mpi_keep_peer_hostnames); - /* MPI_ABORT controls */ ompi_mpi_abort_delay = 0; (void) mca_base_var_register("ompi", "mpi", NULL, "abort_delay", diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 24bb5b60f8..41970ac542 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -13,6 +13,7 @@ * reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -96,12 +97,6 @@ OMPI_DECLSPEC extern bool ompi_mpi_show_mca_params; */ OMPI_DECLSPEC extern char * ompi_mpi_show_mca_params_file; -/** - * Whether we should keep the string hostnames of all the MPI - * process peers around or not (eats up a good bit of memory). - */ -OMPI_DECLSPEC extern bool ompi_mpi_keep_peer_hostnames; - /** * Whether an MPI_ABORT should print out a stack trace or not. */ diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 506b3ccf8a..6fa0f2db29 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -97,6 +97,7 @@ static int rte_init(void) orte_node_rank_t node_rank; char *rmluri; opal_hwloc_locality_t locality; + char *tmp; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -346,6 +347,21 @@ static int rte_init(void) orte_process_info.max_procs = orte_process_info.num_procs; } + /* set the number of nodes - have to test as it could be + * one of multiple environments + */ + if (NULL != (tmp = getenv("SLURM_NNODES"))) { + orte_process_info.num_daemons = strtol(tmp, NULL, 10); + } else if (NULL != (tmp = getenv("PBS_NUM_NODES"))) { + orte_process_info.num_daemons = strtol(tmp, NULL, 10); + } else { + if (0 == ORTE_PROC_MY_NAME->vpid) { + orte_show_help("help-orte-runtime.txt", + "orte_init:startup:num_daemons", true); + } + orte_process_info.num_daemons = UINT_MAX; + } + /* construct the PMI RTE string */ rmluri = orte_rml.get_contact_info(); diff --git a/orte/runtime/help-orte-runtime.txt b/orte/runtime/help-orte-runtime.txt index 86aa1dbcee..d3a9068ed8 100644 --- a/orte/runtime/help-orte-runtime.txt +++ b/orte/runtime/help-orte-runtime.txt @@ -56,3 +56,8 @@ again. An error occurred while trying to pack the information about the job. More nodes have been found than the %d expected. Please check your configuration files such as the mapping. +# +[orte_init:startup:num_daemons] +Open MPI was unable to determine the number of nodes in your allocation. We +are therefore assuming a very large number to ensure you receive proper error +messages. diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index e52320efb0..d728a8dd8b 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -13,6 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,6 +79,7 @@ bool orte_have_fqdn_allocation = false; bool orte_show_resolved_nodenames; bool orte_retain_aliases; int orte_use_hostname_alias; +orte_vpid_t orte_hostname_cutoff; int orted_debug_failure; int orted_debug_failure_delay; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 2d1a6ae8c1..1d95bd8d18 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -13,6 +13,7 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -606,6 +607,7 @@ ORTE_DECLSPEC extern bool orte_have_fqdn_allocation; ORTE_DECLSPEC extern bool orte_show_resolved_nodenames; ORTE_DECLSPEC extern bool orte_retain_aliases; ORTE_DECLSPEC extern int orte_use_hostname_alias; +ORTE_DECLSPEC extern orte_vpid_t orte_hostname_cutoff; /* debug flags */ ORTE_DECLSPEC extern int orted_debug_failure; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 2b102576fb..465de5b9e6 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -13,6 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved + * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -435,6 +436,15 @@ int orte_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_use_hostname_alias); + /* cutoff for including hostnames in modex */ + orte_hostname_cutoff = UINT_MAX; + (void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff", + "If the number of nodes in the allocation exceeds the provided value," + "hostnames for remote processes will not be supplied to applications [default: UINT_MAX]", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, + &orte_hostname_cutoff); + orte_xml_output = false; (void) mca_base_var_register ("orte", "orte", NULL, "xml_output", "Display all output in XML format (default: false)", diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 7d0ecdde86..9b5b91e2a0 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -272,6 +273,12 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update) /* setup a buffer for tmp use */ OBJ_CONSTRUCT(&buf, opal_buffer_t); + /* send the number of nodes */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &daemons->num_procs, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* only send info on nodes that have daemons on them, and * only regarding daemons that have changed - i.e., new * daemons since the last time we sent the info - so we @@ -299,40 +306,42 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update) ORTE_ERROR_LOG(rc); return rc; } - /* pack the name of the node */ - if (!orte_keep_fqdn_hostnames) { - nodename = strdup(node->name); - /* if the nodename is an IP address, do not mess with it! */ - if (!opal_net_isaddr(nodename)) { - /* not an IP address */ - if (NULL != (ptr = strchr(nodename, '.'))) { - *ptr = '\0'; + if (daemons->num_procs < orte_hostname_cutoff) { + /* pack the name of the node */ + if (!orte_keep_fqdn_hostnames) { + nodename = strdup(node->name); + /* if the nodename is an IP address, do not mess with it! */ + if (!opal_net_isaddr(nodename)) { + /* not an IP address */ + if (NULL != (ptr = strchr(nodename, '.'))) { + *ptr = '\0'; + } } - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(nodename); - } else { - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - /* if requested, pack any aliases */ - if (orte_retain_aliases) { - uint8_t naliases, ni; - naliases = opal_argv_count(node->alias); - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &naliases, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - for (ni=0; ni < naliases; ni++) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->alias[ni], 1, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodename, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } + free(nodename); + } else { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->name, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* if requested, pack any aliases */ + if (orte_retain_aliases) { + uint8_t naliases, ni; + naliases = opal_argv_count(node->alias); + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &naliases, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (ni=0; ni < naliases; ni++) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->alias[ni], 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } } } @@ -358,7 +367,7 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update) int orte_util_decode_nodemap(opal_byte_object_t *bo) { int n; - int32_t num_daemons; + orte_vpid_t num_daemons; orte_process_name_t daemon; opal_buffer_t buf; int rc=ORTE_SUCCESS; @@ -378,75 +387,82 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo) OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, bo->bytes, bo->size); + /* unpack the number of daemons */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_daemons, &n, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* set the daemon jobid */ daemon.jobid = ORTE_DAEMON_JOBID(ORTE_PROC_MY_NAME->jobid); - num_daemons = 0; n=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &daemon.vpid, &n, ORTE_VPID))) { - ++num_daemons; - /* unpack and store the node's name */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &nodename, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&daemon, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* now store a direct reference so we can quickly lookup the daemon from a hostname */ - opal_output_verbose(2, orte_nidmap_output, - "%s storing nodename %s for daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nodename, ORTE_VPID_PRINT(daemon.vpid)); - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_NAME_WILDCARD, OPAL_DB_INTERNAL, nodename, &daemon.vpid, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output, - "%s orte:util:decode:nidmap daemon %s node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_VPID_PRINT(daemon.vpid), nodename)); - - /* if this is my daemon, then store the data for me too */ - if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_PROC_MY_NAME, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_PROC_MY_NAME, OPAL_DB_INTERNAL, ORTE_DB_DAEMON_VPID, &daemon.vpid, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* if requested, unpack any aliases */ - if (orte_retain_aliases) { - char *alias; - uint8_t naliases, ni; + if (num_daemons < orte_hostname_cutoff) { + /* unpack and store the node's name */ n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &nodename, &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } - for (ni=0; ni < naliases; ni++) { + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&daemon, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* now store a direct reference so we can quickly lookup the daemon from a hostname */ + opal_output_verbose(2, orte_nidmap_output, + "%s storing nodename %s for daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + nodename, ORTE_VPID_PRINT(daemon.vpid)); + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_NAME_WILDCARD, OPAL_DB_INTERNAL, nodename, &daemon.vpid, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output, + "%s orte:util:decode:nidmap daemon %s node %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_VPID_PRINT(daemon.vpid), nodename)); + + /* if this is my daemon, then store the data for me too */ + if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_PROC_MY_NAME, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_PROC_MY_NAME, OPAL_DB_INTERNAL, ORTE_DB_DAEMON_VPID, &daemon.vpid, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + + /* if requested, unpack any aliases */ + if (orte_retain_aliases) { + char *alias; + uint8_t naliases, ni; n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } - /* store a cross-reference to the daemon for this nodename */ - opal_output_verbose(2, orte_nidmap_output, - "%s storing alias %s for daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - alias, ORTE_VPID_PRINT(daemon.vpid)); - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_NAME_WILDCARD, OPAL_DB_INTERNAL, alias, &daemon.vpid, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - return rc; + for (ni=0; ni < naliases; ni++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* store a cross-reference to the daemon for this nodename */ + opal_output_verbose(2, orte_nidmap_output, + "%s storing alias %s for daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + alias, ORTE_VPID_PRINT(daemon.vpid)); + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)ORTE_NAME_WILDCARD, OPAL_DB_INTERNAL, alias, &daemon.vpid, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + free(alias); } - free(alias); } } @@ -482,6 +498,7 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) char *name; orte_job_t *daemons; orte_proc_t *dptr; + orte_vpid_t num_daemons; OPAL_OUTPUT_VERBOSE((1, orte_nidmap_output, "%s decode:nidmap decoding daemon nodemap", @@ -496,43 +513,51 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, bo->bytes, bo->size); - /* transfer the data to the nodes, counting the number of - * daemons in the system - */ + /* unpack the number of procs */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_daemons, &n, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* transfer the data to the nodes */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + daemons->num_procs = num_daemons; n=1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &vpid, &n, ORTE_VPID))) { - /* unpack and store the node's name */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &name, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* do we already have this node? */ - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, vpid))) { - node = OBJ_NEW(orte_node_t); - node->name = name; - opal_pointer_array_set_item(orte_node_pool, vpid, node); - } else { - free(name); - } - /* if requested, unpack any aliases */ - if (orte_retain_aliases) { - char *alias; - uint8_t naliases, ni; + if (daemons->num_procs < orte_hostname_cutoff) { + /* unpack and store the node's name */ n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &name, &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } - for (ni=0; ni < naliases; ni++) { + /* do we already have this node? */ + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, vpid))) { + node = OBJ_NEW(orte_node_t); + node->name = name; + opal_pointer_array_set_item(orte_node_pool, vpid, node); + } else { + free(name); + } + /* if requested, unpack any aliases */ + if (orte_retain_aliases) { + char *alias; + uint8_t naliases, ni; n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } - opal_argv_append_nosize(&node->alias, alias); - free(alias); + for (ni=0; ni < naliases; ni++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + opal_argv_append_nosize(&node->alias, alias); + free(alias); + } } } /* unpack the oversubscribed flag */ @@ -546,7 +571,6 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; dptr->name.vpid = vpid; opal_pointer_array_set_item(daemons->procs, vpid, dptr); - daemons->num_procs++; } if (NULL != node->daemon) { OBJ_RELEASE(node->daemon); @@ -904,14 +928,16 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo) ORTE_ERROR_LOG(rc); goto cleanup; } - /* lookup and store the hostname for this proc */ - if (ORTE_SUCCESS != (rc = opal_db.fetch_pointer((opal_identifier_t*)&dmn, ORTE_DB_HOSTNAME, (void**)&hostname, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&proc, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, hostname, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; + if (orte_process_info.num_daemons < orte_hostname_cutoff) { + /* lookup and store the hostname for this proc */ + if (ORTE_SUCCESS != (rc = opal_db.fetch_pointer((opal_identifier_t*)&dmn, ORTE_DB_HOSTNAME, (void**)&hostname, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&proc, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, hostname, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } } } }