openmpi/ompi/mca/btl/udapl/btl_udapl_proc.c

/*
 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2006      Sandia National Laboratories. All rights
 *                         reserved.
 * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "ompi_config.h"

#include "opal/class/opal_hash_table.h"
#include "orte/util/show_help.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "opal/util/net.h"
#include "btl_udapl.h"
#include "btl_udapl_endpoint.h"
#include "btl_udapl_proc.h"

static void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* proc);
static void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* proc);

OBJ_CLASS_INSTANCE(mca_btl_udapl_proc_t,
        opal_list_item_t, mca_btl_udapl_proc_construct,
        mca_btl_udapl_proc_destruct);

void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* proc)
{
    proc->proc_ompi = 0;
    proc->proc_addr_count = 0;
    proc->proc_endpoints = 0;
    proc->proc_endpoint_count = 0;
    OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);

    /* add to list of all proc instance */
    OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
    opal_list_append(&mca_btl_udapl_component.udapl_procs, &proc->super);
    OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);
}


/*
 * Cleanup uDAPL proc instance
 */

void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* proc)
{
    /* remove from list of all proc instances */
    OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
    opal_list_remove_item(&mca_btl_udapl_component.udapl_procs, &proc->super);
    OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);

    /* release resources */
    if(NULL != proc->proc_endpoints) {
        free(proc->proc_endpoints);
    }
}


/*
 * Look for an existing uDAPL process instances based on the associated
 * ompi_proc_t instance.
 */
static mca_btl_udapl_proc_t* mca_btl_udapl_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
    mca_btl_udapl_proc_t* udapl_proc;

    OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);

    for(udapl_proc = (mca_btl_udapl_proc_t*)
            opal_list_get_first(&mca_btl_udapl_component.udapl_procs);
            udapl_proc != (mca_btl_udapl_proc_t*)
            opal_list_get_end(&mca_btl_udapl_component.udapl_procs);
            udapl_proc  = (mca_btl_udapl_proc_t*)opal_list_get_next(udapl_proc)) {

        if(udapl_proc->proc_ompi == ompi_proc) {
            OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);
            return udapl_proc;
        }

    }

    OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);

    return NULL;
}

/*
 * Create a uDAPL process structure. There is a one-to-one correspondence
 * between a ompi_proc_t and a mca_btl_udapl_proc_t instance. We cache
 * additional data (specifically the list of mca_btl_udapl_endpoint_t instances,
 * and published addresses) associated w/ a given destination on this
 * datastructure.
 */

mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
{
    mca_btl_udapl_proc_t* udapl_proc = NULL;
    size_t size;
    int rc;

    /* Check if we have already created a uDAPL proc
     * structure for this ompi process */
    udapl_proc = mca_btl_udapl_proc_lookup_ompi(ompi_proc);
    if(udapl_proc != NULL) {
        return udapl_proc;
    }

    /* create a new udapl proc out of the ompi_proc ... */
    udapl_proc = OBJ_NEW(mca_btl_udapl_proc_t);
    udapl_proc->proc_endpoint_count = 0;
    udapl_proc->proc_ompi = ompi_proc;
    udapl_proc->proc_guid = ompi_proc->proc_name;

    /* query for the peer address info */
    rc = ompi_modex_recv(
                 &mca_btl_udapl_component.super.btl_version,
                 ompi_proc,
                 (void*)&udapl_proc->proc_addrs,
                 &size);
    if(OMPI_SUCCESS != rc) {
        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
            ("ompi_modex_recv failed for peer %s",
            ORTE_NAME_PRINT(&ompi_proc->proc_name)));
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }

    if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
            ("invalid udapl address for peer %s",
            ORTE_NAME_PRINT(&ompi_proc->proc_name)));
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }

    udapl_proc->proc_addr_count = size/sizeof(mca_btl_udapl_addr_t);
    if (0 == udapl_proc->proc_addr_count) {
        udapl_proc->proc_endpoints = NULL;
    } else {
        udapl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
            malloc(udapl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
    }
    if(NULL == udapl_proc->proc_endpoints) {
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }
    return udapl_proc;
}


/*
 * Find an address on the peer_process which matches stated criteria
 * to the udapl btl module address information. Return in peer_addr_idx
 * the index to the peer_process address that matches the btl module
 * address. Where match criteria is:
 * - the address in not already in use
 * - compare addresses using netmask, the netmask value can be modified with
 *   "--mca btl_udapl_if_mask"
 *
 * Note: since this is called from mca_btl_udapl_proc_insert() it
 * is assumed that the process lock is locked when entered.
 *
 * @param udapl_btl (IN)        BTL module
 * @param peer_process (IN)     BTL peer process
 * @param peer_addr_idx(IN/OUT) Index of address on peer_process
 *                              which matches the udapl_btl address data.
 *                              On success should be  >= 0.
 * @return                      OMPI_SUCCESS or error status on failure
 */
static int mca_btl_udapl_proc_address_match(
    mca_btl_udapl_module_t* udapl_btl,
    mca_btl_udapl_proc_t* peer_proc,
    int* peer_addr_idx)
{
    int i;
    struct sockaddr *saddr;
    struct sockaddr_in *btl_addr;
    struct sockaddr_in *peer_addr;
    char btl_addr_string[INET_ADDRSTRLEN];
    char peer_addr_string[INET_ADDRSTRLEN];

    *peer_addr_idx = MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX;

    /* use generic address to find address family */
    saddr = (struct sockaddr *)&(udapl_btl->udapl_addr.addr);

    if (saddr->sa_family == AF_INET) {

        btl_addr = (struct sockaddr_in *)saddr;

        /* Loop thru peer process addresses looking for match.
         * Match criteria:
         * - address should not be "inuse"
         * - both udapl btl module and peer address should be on
         *   the same subnet (compare with if_mask value)
         */
        for(i = 0; i < (int) peer_proc->proc_addr_count; i++) {

            peer_addr =
                (struct sockaddr_in *)&(peer_proc->proc_addrs[i].addr);

            if (VERBOSE_INFORM <=
                mca_btl_udapl_component.udapl_verbosity) {

                /*  retrieve udapl btl and peer address string for reporting */
                inet_ntop(AF_INET, (void *) &btl_addr->sin_addr,
                    btl_addr_string, INET_ADDRSTRLEN);
                inet_ntop(AF_INET, (void *) &peer_addr->sin_addr,
                    peer_addr_string, INET_ADDRSTRLEN);
            }

            if ((false == peer_proc->proc_addrs[i].inuse) &&
                (opal_net_samenetwork((struct sockaddr *)btl_addr,
                    (struct sockaddr *)peer_addr, udapl_btl->udapl_if_mask))) {

                /* capture index of remote address where match found */
                *peer_addr_idx = i;

                /* mark this address as now being used */
                peer_proc->proc_addrs[i].inuse = true;

                /* report what address was found to match */
                BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
                    ("uDAPL BTL module(%s) matched %s",
                    btl_addr_string, peer_addr_string));
                break;
            } else {
                /* peer address already used by another udapl btl
                 * module or netmask check not successful so skip
                 */
                BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
                    ("uDAPL BTL module(%s) either skipped because it "
                    "is already in use or match criteria not successful "
                    "for peer address %s",
                    btl_addr_string, peer_addr_string));
            }
        }

    } else {
        /* current uDAPL BTL only supports IPv4 */
        BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
            ("help-mpi-btl-udapl.txt", "IPv4 only",
            true, orte_process_info.nodename));
        return OMPI_ERROR;
    }

    if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
        BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
            ("help-mpi-btl-udapl.txt", "no network match",
            true, btl_addr_string, orte_process_info.nodename,
            peer_proc->proc_ompi->proc_hostname));
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    return OMPI_SUCCESS;
}


/*
 * Note that this routine must be called with the lock on the process
 * already held.  Insert a btl instance into the proc array and assign
 * it an address.
 */
int mca_btl_udapl_proc_insert(
    mca_btl_udapl_proc_t* udapl_proc,
    mca_btl_base_endpoint_t* udapl_endpoint)
{
    int peer_address_idx;
    mca_btl_udapl_module_t* udapl_btl = udapl_endpoint->endpoint_btl;

    /* Check so as not to create more endpoints than addresses.
     * Example: If one node has 3 btl modules and another only has 2,
     * this check prevents the node with 3 btl modules from
     * overloading the other, i.e. only 2 possible connections will
     * be possible.
     */
    if (udapl_proc->proc_endpoint_count > udapl_proc->proc_addr_count)
        return OMPI_ERR_OUT_OF_RESOURCE;

    /* Find an endpoint on the udapl process of interest that matches
     * the endpoint information of the current udapl btl module
     */
    if (OMPI_SUCCESS !=
        mca_btl_udapl_proc_address_match(udapl_btl, udapl_proc,
            &peer_address_idx)) {
        /* no address on peer proc met criteria */
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* insert into endpoint array */
    udapl_endpoint->endpoint_proc = udapl_proc;
    udapl_endpoint->endpoint_addr =
        udapl_proc->proc_addrs[peer_address_idx];

    udapl_proc->proc_endpoints[udapl_proc->proc_endpoint_count] = udapl_endpoint;
    udapl_proc->proc_endpoint_count++;
    return OMPI_SUCCESS;
}