/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "orte_config.h" #include "orte/constants.h" #include "opal/util/output.h" #include "opal/mca/dstore/dstore.h" #include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" #include "orte/mca/rml/rml.h" #include "orte/mca/oob/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "orte/mca/state/base/base.h" #endif static void process_uri(char *uri); void orte_oob_base_send_nb(int fd, short args, void *cbdata) { orte_oob_send_t *cd = (orte_oob_send_t*)cbdata; orte_rml_send_t *msg = cd->msg; mca_base_component_list_item_t *cli; orte_oob_base_peer_t *pr; int rc; uint64_t ui64; bool msg_sent; mca_oob_base_component_t *component; bool reachable; opal_list_t myvals; opal_value_t *kv; /* done with this. release it now */ OBJ_RELEASE(cd); opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:base:send to target %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->dst)); /* check if we have this peer in our hash table */ memcpy(&ui64, (char*)&msg->dst, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&pr) || NULL == pr) { opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:base:send unknown peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->dst)); /* for direct launched procs, the URI might be in the database, * so check there next - if it is, the peer object will be added * to our hash table */ OBJ_CONSTRUCT(&myvals, opal_list_t); if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &msg->dst, OPAL_DSTORE_URI, &myvals)) { kv = (opal_value_t*)opal_list_get_first(&myvals); if (NULL != kv) { process_uri(kv->data.string); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&pr) || NULL == pr) { /* that is just plain wrong */ ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN); msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); OPAL_LIST_DESTRUCT(&myvals); return; } } else { ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN); msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); OPAL_LIST_DESTRUCT(&myvals); return; } OPAL_LIST_DESTRUCT(&myvals); } else { /* even though we don't know about this peer yet, we still might * be able to get to it via routing, so ask each component if * it can reach it */ reachable = false; pr = NULL; OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; if (NULL != component->is_reachable) { if (component->is_reachable(&msg->dst)) { /* there is a way to reach this peer - record it * so we don't waste this time again */ if (NULL == pr) { pr = OBJ_NEW(orte_oob_base_peer_t); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, (void*)pr))) { ORTE_ERROR_LOG(rc); msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); return; } } /* mark that this component can reach the peer */ opal_bitmap_set_bit(&pr->addressable, component->idx); /* flag that at least one component can reach this peer */ reachable = true; } } } /* if nobody could reach it, then that's an error */ if (!reachable) { msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); return; } } } /* if we already have a connection to this peer, use it */ if (NULL != pr->component) { /* post this msg for send by this transport - the component * runs on our event base, so we can just call their function */ opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:base:send known transport for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->dst)); if (ORTE_SUCCESS == (rc = pr->component->send_nb(msg))) { return; } } /* if we haven't identified a transport to this peer, * loop across all available components in priority order until * one replies that it has a module that can reach this peer. * Let it try to make the connection */ msg_sent = false; OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; /* is this peer addressable by this component? */ if (!opal_bitmap_is_set_bit(&pr->addressable, component->idx)) { continue; } /* it is addressable, so attempt to send via that transport */ if (ORTE_SUCCESS == (rc = component->send_nb(msg))) { /* the msg status will be set upon send completion/failure */ msg_sent = true; /* point to this transport for any future messages */ pr->component = component; break; } else if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { /* components return "next option" if they can't connect * to this peer. anything else is a true error. */ ORTE_ERROR_LOG(rc); msg->status = rc; ORTE_RML_SEND_COMPLETE(msg); return; } } /* if no component can reach this peer, that's an error - post * it back to the RML for handling */ if (!msg_sent) { opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:base:send no path to target %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->dst)); msg->status = ORTE_ERR_NO_PATH_TO_TARGET; ORTE_RML_SEND_COMPLETE(msg); } } /** * Obtain a uri for initial connection purposes * * During initial wireup, we can only transfer contact info on the daemon * command line. This limits what we can send to a string representation of * the actual contact info, which gets sent in a uri-like form. Not every * oob module can support this transaction, so this function will loop * across all oob components/modules, letting each add to the uri string if * it supports bootstrap operations. An error will be returned in the cbfunc * if NO component can successfully provide a contact. * * Note: since there is a limit to what an OS will allow on a cmd line, we * impose a limit on the length of the resulting uri via an MCA param. The * default value of -1 implies unlimited - however, users with large numbers * of interfaces on their nodes may wish to restrict the size. */ void orte_oob_base_get_addr(char **uri) { char *turi, *final=NULL, *tmp; size_t len = 0; int rc=ORTE_SUCCESS; bool one_added = false; mca_base_component_list_item_t *cli; mca_oob_base_component_t *component; /* start with our process name */ if (ORTE_SUCCESS != (rc = orte_util_convert_process_name_to_string(&final, ORTE_PROC_MY_NAME))) { ORTE_ERROR_LOG(rc); goto unblock; } len = strlen(final); /* loop across all available modules to get their input * up to the max length */ OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; /* ask the component for its input, obtained when it * opened its modules */ if (NULL == component->get_addr) { /* doesn't support this ability */ continue; } /* the components operate within our event base, so we * can directly call their get_uri function to get the * pointer to the uri - this is not a copy, so * do NOT free it! */ turi = component->get_addr(); if (NULL != turi) { /* check overall length for limits */ if (0 < orte_oob_base.max_uri_length && orte_oob_base.max_uri_length < (int)(len + strlen(turi))) { /* cannot accept the payload */ continue; } /* add new value to final one */ asprintf(&tmp, "%s;%s", final, turi); free(turi); free(final); final = tmp; len = strlen(final); /* flag that at least one contributed */ one_added = true; } } if (!one_added) { /* nobody could contribute */ if (NULL != final) { free(final); final = NULL; } } unblock: *uri = final; } /** * This function will loop * across all oob components, letting each look at the uri and extract * info from it if it can. An error is to be returned if NO component * can successfully extract a contact. */ static void req_cons(mca_oob_uri_req_t *ptr) { ptr->uri = NULL; } static void req_des(mca_oob_uri_req_t *ptr) { if (NULL != ptr->uri) { free(ptr->uri); } } OBJ_CLASS_INSTANCE(mca_oob_uri_req_t, opal_object_t, req_cons, req_des); void orte_oob_base_set_addr(int fd, short args, void *cbdata) { mca_oob_uri_req_t *req = (mca_oob_uri_req_t*)cbdata; char *uri = req->uri; opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s: set_addr to uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == uri) ? "NULL" : uri); /* if the request doesn't contain a URI, then we * have an error */ if (NULL == uri) { opal_output(0, "%s: NULL URI", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_FORCED_TERMINATE(1); OBJ_RELEASE(req); return; } process_uri(uri); OBJ_RELEASE(req); } static void process_uri(char *uri) { orte_process_name_t peer; char *cptr; mca_base_component_list_item_t *cli; mca_oob_base_component_t *component; char **uris=NULL; int rc; uint64_t ui64; orte_oob_base_peer_t *pr; /* find the first semi-colon in the string */ cptr = strchr(uri, ';'); if (NULL == cptr) { /* got a problem - there must be at least two fields, * the first containing the process name of our peer * and all others containing the OOB contact info */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return; } *cptr = '\0'; cptr++; /* the first field is the process name, so convert it */ orte_util_convert_string_to_process_name(&peer, uri); /* if the peer is us, no need to go further as we already * know our own contact info */ if (peer.jobid == ORTE_PROC_MY_NAME->jobid && peer.vpid == ORTE_PROC_MY_NAME->vpid) { opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s:set_addr peer %s is me", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer)); return; } /* split the rest of the uri into component parts */ uris = opal_argv_split(cptr, ';'); /* get the peer object for this process */ memcpy(&ui64, (char*)&peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&pr) || NULL == pr) { pr = OBJ_NEW(orte_oob_base_peer_t); if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, (void*)pr))) { ORTE_ERROR_LOG(rc); opal_argv_free(uris); return; } } /* loop across all available components and let them extract * whatever piece(s) of the uri they find relevant - they * are all operating on our event base, so we can just * directly call their functions */ rc = ORTE_ERR_UNREACH; OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s:set_addr checking if peer %s is reachable via component %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer), component->oob_base.mca_component_name); if (NULL != component->set_addr) { if (ORTE_SUCCESS == component->set_addr(&peer, uris)) { /* this component found reachable addresses * in the uris */ opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s: peer %s is reachable via component %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer), component->oob_base.mca_component_name); opal_bitmap_set_bit(&pr->addressable, component->idx); } else { opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s: peer %s is NOT reachable via component %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer), component->oob_base.mca_component_name); } } } opal_argv_free(uris); } #if OPAL_ENABLE_FT_CR == 1 void orte_oob_base_ft_event(int sd, short argc, void *cbdata) { int rc; mca_base_component_list_item_t *cli; mca_oob_base_component_t *component; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:base:ft_event %s(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_job_state_to_str(state->job_state), state->job_state); /* loop across all available modules in priority order * and call each one's ft_event handler */ OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; if (NULL == component->ft_event) { /* doesn't support this ability */ continue; } if (ORTE_SUCCESS != (rc = component->ft_event(state->job_state))) { ORTE_ERROR_LOG(rc); } } OBJ_RELEASE(state); } #endif